[Qemu-devel] [PATCH v3] qmp: access the local QemuOptsLists for drive option

2013-11-08 Thread Amos Kong
Currently we have three QemuOptsList (qemu_common_drive_opts,
qemu_legacy_drive_opts, and qemu_drive_opts), only qemu_drive_opts
is added to vm_config_groups[].

This patch changes query-command-line-options to access three local
QemuOptsLists for drive option, and merge the description items
together.

Signed-off-by: Amos Kong 
---
V2: access local QemuOptsLists for drive (kevin)
V3: fix indent (fam)

Signed-off-by: Amos Kong 
---
 blockdev.c |  1 -
 include/qemu/config-file.h |  1 +
 include/sysemu/sysemu.h|  2 ++
 util/qemu-config.c | 77 +-
 vl.c   |  3 ++
 5 files changed, 82 insertions(+), 2 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index b260477..f09f991 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -47,7 +47,6 @@
 #include "sysemu/arch_init.h"
 
 static QTAILQ_HEAD(drivelist, DriveInfo) drives = 
QTAILQ_HEAD_INITIALIZER(drives);
-extern QemuOptsList qemu_common_drive_opts;
 
 static const char *const if_name[IF_COUNT] = {
 [IF_NONE] = "none",
diff --git a/include/qemu/config-file.h b/include/qemu/config-file.h
index ad4a9e5..508428f 100644
--- a/include/qemu/config-file.h
+++ b/include/qemu/config-file.h
@@ -8,6 +8,7 @@
 QemuOptsList *qemu_find_opts(const char *group);
 QemuOptsList *qemu_find_opts_err(const char *group, Error **errp);
 void qemu_add_opts(QemuOptsList *list);
+void qemu_add_drive_opts(QemuOptsList *list);
 int qemu_set_option(const char *str);
 int qemu_global_option(const char *str);
 void qemu_add_globals(void);
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index cd5791e..495dae8 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -193,6 +193,8 @@ QemuOpts *qemu_get_machine_opts(void);
 
 bool usb_enabled(bool default_usb);
 
+extern QemuOptsList qemu_legacy_drive_opts;
+extern QemuOptsList qemu_common_drive_opts;
 extern QemuOptsList qemu_drive_opts;
 extern QemuOptsList qemu_chardev_opts;
 extern QemuOptsList qemu_device_opts;
diff --git a/util/qemu-config.c b/util/qemu-config.c
index a59568d..04da942 100644
--- a/util/qemu-config.c
+++ b/util/qemu-config.c
@@ -8,6 +8,7 @@
 #include "qmp-commands.h"
 
 static QemuOptsList *vm_config_groups[32];
+static QemuOptsList *drive_config_groups[4];
 
 static QemuOptsList *find_list(QemuOptsList **lists, const char *group,
Error **errp)
@@ -77,6 +78,59 @@ static CommandLineParameterInfoList 
*query_option_descs(const QemuOptDesc *desc)
 return param_list;
 }
 
+/* remove repeated entry from the info list */
+static void cleanup_infolist(CommandLineParameterInfoList *head)
+{
+CommandLineParameterInfoList *pre_entry, *cur, *del_entry;
+
+cur = head;
+while (cur->next) {
+pre_entry = head;
+while (pre_entry != cur->next) {
+if (!strcmp(pre_entry->value->name, cur->next->value->name)) {
+del_entry = cur->next;
+cur->next = cur->next->next;
+g_free(del_entry);
+break;
+}
+pre_entry = pre_entry->next;
+}
+cur = cur->next;
+}
+}
+
+/* merge the description items of two parameter infolists */
+static void connect_infolist(CommandLineParameterInfoList *head,
+ CommandLineParameterInfoList *new)
+{
+CommandLineParameterInfoList *cur;
+
+cur = head;
+while (cur->next) {
+cur = cur->next;
+}
+cur->next = new;
+}
+
+/* access all the local QemuOptsLists for drive option */
+static CommandLineParameterInfoList *get_drive_infolist(void)
+{
+CommandLineParameterInfoList *head = NULL, *cur;
+int i;
+
+for (i = 0; drive_config_groups[i] != NULL; i++) {
+if (!head) {
+head = query_option_descs(drive_config_groups[i]->desc);
+} else {
+cur = query_option_descs(drive_config_groups[i]->desc);
+connect_infolist(head, cur);
+}
+}
+cleanup_infolist(head);
+
+return head;
+}
+
 CommandLineOptionInfoList *qmp_query_command_line_options(bool has_option,
   const char *option,
   Error **errp)
@@ -89,7 +143,12 @@ CommandLineOptionInfoList 
*qmp_query_command_line_options(bool has_option,
 if (!has_option || !strcmp(option, vm_config_groups[i]->name)) {
 info = g_malloc0(sizeof(*info));
 info->option = g_strdup(vm_config_groups[i]->name);
-info->parameters = query_option_descs(vm_config_groups[i]->desc);
+if (!strcmp("drive", vm_config_groups[i]->name)) {
+info->parameters = get_drive_infolist();
+} else {
+info->parameters =
+query_option_descs(vm_config_groups[i]->desc);
+}
 entry = g_malloc0(sizeof(*entry));
 entry->value = info;
 entry

Re: [Qemu-devel] [PATCH v2] qmp: access the local QemuOptsLists for drive option

2013-11-08 Thread Fam Zheng

On 11/09/2013 11:19 AM, Amos Kong wrote:

On Fri, Nov 08, 2013 at 05:18:02PM +0800, Fam Zheng wrote:

Looks good to me. Two small comments below.

On Wed, 11/06 13:16, Amos Kong wrote:

Currently we have three QemuOptsList (qemu_common_drive_opts,
qemu_legacy_drive_opts, and qemu_drive_opts), only qemu_drive_opts
is added to vm_config_groups[].

This patch changes query-command-line-options to access three local
QemuOptsLists for drive option, and merge the description items
together.

Signed-off-by: Amos Kong 
---
V2: access local QemuOptsLists for drive (kevin)
---
  blockdev.c |  1 -
  include/qemu/config-file.h |  1 +
  include/sysemu/sysemu.h|  2 ++
  util/qemu-config.c | 77 +-
  vl.c   |  3 ++
  5 files changed, 82 insertions(+), 2 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index b260477..f09f991 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -47,7 +47,6 @@
  #include "sysemu/arch_init.h"

  static QTAILQ_HEAD(drivelist, DriveInfo) drives = 
QTAILQ_HEAD_INITIALIZER(drives);
-extern QemuOptsList qemu_common_drive_opts;

  static const char *const if_name[IF_COUNT] = {
  [IF_NONE] = "none",
diff --git a/include/qemu/config-file.h b/include/qemu/config-file.h
index ad4a9e5..508428f 100644
--- a/include/qemu/config-file.h
+++ b/include/qemu/config-file.h
@@ -8,6 +8,7 @@
  QemuOptsList *qemu_find_opts(const char *group);
  QemuOptsList *qemu_find_opts_err(const char *group, Error **errp);
  void qemu_add_opts(QemuOptsList *list);
+void qemu_add_drive_opts(QemuOptsList *list);


This can be static. Do you plan to use it outside qemu-config.c?


It's used in vl.c



Ah yes, I missed it.

Fam


  int qemu_set_option(const char *str);
  int qemu_global_option(const char *str);
  void qemu_add_globals(void);
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index cd5791e..495dae8 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -193,6 +193,8 @@ QemuOpts *qemu_get_machine_opts(void);

  bool usb_enabled(bool default_usb);

+extern QemuOptsList qemu_legacy_drive_opts;
+extern QemuOptsList qemu_common_drive_opts;
  extern QemuOptsList qemu_drive_opts;
  extern QemuOptsList qemu_chardev_opts;
  extern QemuOptsList qemu_device_opts;
diff --git a/util/qemu-config.c b/util/qemu-config.c
index a59568d..867fb4b 100644
--- a/util/qemu-config.c
+++ b/util/qemu-config.c
@@ -8,6 +8,7 @@
  #include "qmp-commands.h"

  static QemuOptsList *vm_config_groups[32];
+static QemuOptsList *drive_config_groups[4];

  static QemuOptsList *find_list(QemuOptsList **lists, const char *group,
 Error **errp)
@@ -77,6 +78,59 @@ static CommandLineParameterInfoList 
*query_option_descs(const QemuOptDesc *desc)
  return param_list;
  }

+/* remove repeated entry from the info list */
+static void cleanup_infolist(CommandLineParameterInfoList *head)
+{
+CommandLineParameterInfoList *pre_entry, *cur, *del_entry;
+
+cur = head;
+while (cur->next) {
+pre_entry = head;
+while (pre_entry != cur->next) {
+if (!strcmp(pre_entry->value->name, cur->next->value->name)) {
+del_entry = cur->next;
+cur->next = cur->next->next;
+g_free(del_entry);
+break;
+}
+pre_entry = pre_entry->next;
+}
+cur = cur->next;
+}
+}
+
+/* merge the description items of two parameter infolists */
+static void connect_infolist(CommandLineParameterInfoList *head,
+ CommandLineParameterInfoList *new)
+{
+CommandLineParameterInfoList *cur;
+
+cur = head;
+while (cur->next) {
+cur = cur->next;
+}
+cur->next = new;
+}
+
+/* access all the local QemuOptsLists for drive option */
+static CommandLineParameterInfoList *get_drive_infolist(void)
+{
+CommandLineParameterInfoList *head = NULL, *cur;
+int i;
+
+for (i = 0; drive_config_groups[i] != NULL; i++) {
+if (!head) {
+head = query_option_descs(drive_config_groups[i]->desc);
+} else {
+cur = query_option_descs(drive_config_groups[i]->desc);
+connect_infolist(head, cur);
+}
+}
+cleanup_infolist(head);
+
+return head;
+}
+
  CommandLineOptionInfoList *qmp_query_command_line_options(bool has_option,
const char *option,
Error **errp)
@@ -89,7 +143,12 @@ CommandLineOptionInfoList 
*qmp_query_command_line_options(bool has_option,
  if (!has_option || !strcmp(option, vm_config_groups[i]->name)) {
  info = g_malloc0(sizeof(*info));
  info->option = g_strdup(vm_config_groups[i]->name);
-info->parameters = query_option_descs(vm_config_groups[i]->desc);
+if (!strcmp("drive", vm_config_groups[i]->name)) {
+

Re: [Qemu-devel] [PATCH] e1000/rtl8139: update HMP NIC when every bit is written

2013-11-08 Thread Amos Kong
On Fri, Nov 08, 2013 at 02:42:27PM -0500, Vlad Yasevich wrote:
> What about this approach?  This only updates the monitory when all the
> bits have been written to.
 
Hi Vlad,

Looks good to me.

Using this patch, we don't need to care the writing order.
If we add event notify in future, it can reduce the noise
event.

BTW, can we use a tmp buffer to record the new mac (changing is unfinished),
and write the new mac to registers until all bits is written?

Amos

> -vlad
> 
> -- >8 --
> Subject: [PATCH] e1000/rtl8139: update HMP NIC when every bit is written

Subject: [PATCH] e1000/rtl8139: update HMP NIC when all bits are written
 
> We currently just update the HMP NIC info when the last bit of macaddr
> is written. This assumes that guest driver will write all the macaddr
> from bit 0 to bit 5 when it changes the macaddr, this is the current
> behavior of linux driver (e1000/rtl8139cp), but we can't do this
> assumption.
> 
> The macaddr that is used for rx-filter will be updated when every bit
> is changed. This patch updates the e1000/rtl8139 nic to update HMP NIC
> info when every bit has been changed. It will be same as virtio-net.
> 
> Signed-off-by: Vlad Yasevich 
> ---
>  hw/net/e1000.c   | 17 -
>  hw/net/rtl8139.c | 14 +-
>  2 files changed, 25 insertions(+), 6 deletions(-)
> 
> diff --git a/hw/net/e1000.c b/hw/net/e1000.c
> index 8387443..a5967ed 100644
> --- a/hw/net/e1000.c
> +++ b/hw/net/e1000.c
> @@ -149,6 +149,10 @@ typedef struct E1000State_st {
>  #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
>  #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
>  uint32_t compat_flags;
> +uint32_t mac_changed;
> +#define E1000_RA0_CHANGED 0
> +#define E1000_RA1_CHANGED 1
> +#define E1000_RA_ALL_CHANGED (E1000_RA0_CHANGED|E1000_RA1_CHANGED)
>  } E1000State;
>  
>  #define TYPE_E1000 "e1000"
> @@ -402,6 +406,7 @@ static void e1000_reset(void *opaque)
>  d->mac_reg[RA + 1] |= (i < 2) ? macaddr[i + 4] << (8 * i) : 0;
>  }
>  qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
> +d->mac_changed = 0;
>  }
>  
>  static void
> @@ -1106,10 +,20 @@ mac_writereg(E1000State *s, int index, uint32_t val)
>  
>  s->mac_reg[index] = val;
>  
> -if (index == RA + 1) {
> +switch (index) {
> +case RA:
> +s->mac_changed |= E1000_RA0_CHANGED;
> +break;
> +case (RA + 1):
> +s->mac_changed |= E1000_RA1_CHANGED;
> +break;
> +}
> +
> +if (s->mac_changed ==  E1000_RA_ALL_CHANGED) {
>  macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
>  macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
>  qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
> + s->mac_changed = 0;
>  }
>  }
>  
> diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c
> index 5329f44..6dac10c 100644
> --- a/hw/net/rtl8139.c
> +++ b/hw/net/rtl8139.c
> @@ -476,6 +476,8 @@ typedef struct RTL8139State {
>  
>  uint16_t CpCmd;
>  uint8_t  TxThresh;
> +uint8_t  mac_changed;
> +#define RTL8139_MAC_CHANGED_ALL 0x3F
>  
>  NICState *nic;
>  NICConf conf;
> @@ -1215,6 +1217,7 @@ static void rtl8139_reset(DeviceState *d)
>  /* restore MAC address */
>  memcpy(s->phys, s->conf.macaddr.a, 6);
>  qemu_format_nic_info_str(qemu_get_queue(s->nic), s->phys);
> +s->mac_changed = 0;
>  
>  /* reset interrupt mask */
>  s->IntrStatus = 0;
> @@ -2741,12 +2744,13 @@ static void rtl8139_io_writeb(void *opaque, uint8_t 
> addr, uint32_t val)
>  
>  switch (addr)
>  {
> -case MAC0 ... MAC0+4:
> -s->phys[addr - MAC0] = val;
> -break;
> -case MAC0+5:
> +case MAC0 ... MAC0+5:
>  s->phys[addr - MAC0] = val;
> -qemu_format_nic_info_str(qemu_get_queue(s->nic), s->phys);
> +s->mac_changed |= (1 << (addr - MAC0));
> +if (s->mac_changed == RTL8139_MAC_CHANGED_ALL) {
> +qemu_format_nic_info_str(qemu_get_queue(s->nic), s->phys);
> +s->mac_changed = 0;
> +}
>  break;
>  case MAC0+6 ... MAC0+7:
>  /* reserved */
> -- 
> 1.8.4.2



Re: [Qemu-devel] [PATCH v2] qmp: access the local QemuOptsLists for drive option

2013-11-08 Thread Amos Kong
On Fri, Nov 08, 2013 at 05:18:02PM +0800, Fam Zheng wrote:
> Looks good to me. Two small comments below.
> 
> On Wed, 11/06 13:16, Amos Kong wrote:
> > Currently we have three QemuOptsList (qemu_common_drive_opts,
> > qemu_legacy_drive_opts, and qemu_drive_opts), only qemu_drive_opts
> > is added to vm_config_groups[].
> > 
> > This patch changes query-command-line-options to access three local
> > QemuOptsLists for drive option, and merge the description items
> > together.
> > 
> > Signed-off-by: Amos Kong 
> > ---
> > V2: access local QemuOptsLists for drive (kevin)
> > ---
> >  blockdev.c |  1 -
> >  include/qemu/config-file.h |  1 +
> >  include/sysemu/sysemu.h|  2 ++
> >  util/qemu-config.c | 77 
> > +-
> >  vl.c   |  3 ++
> >  5 files changed, 82 insertions(+), 2 deletions(-)
> > 
> > diff --git a/blockdev.c b/blockdev.c
> > index b260477..f09f991 100644
> > --- a/blockdev.c
> > +++ b/blockdev.c
> > @@ -47,7 +47,6 @@
> >  #include "sysemu/arch_init.h"
> >  
> >  static QTAILQ_HEAD(drivelist, DriveInfo) drives = 
> > QTAILQ_HEAD_INITIALIZER(drives);
> > -extern QemuOptsList qemu_common_drive_opts;
> >  
> >  static const char *const if_name[IF_COUNT] = {
> >  [IF_NONE] = "none",
> > diff --git a/include/qemu/config-file.h b/include/qemu/config-file.h
> > index ad4a9e5..508428f 100644
> > --- a/include/qemu/config-file.h
> > +++ b/include/qemu/config-file.h
> > @@ -8,6 +8,7 @@
> >  QemuOptsList *qemu_find_opts(const char *group);
> >  QemuOptsList *qemu_find_opts_err(const char *group, Error **errp);
> >  void qemu_add_opts(QemuOptsList *list);
> > +void qemu_add_drive_opts(QemuOptsList *list);
> 
> This can be static. Do you plan to use it outside qemu-config.c?

It's used in vl.c
 
> >  int qemu_set_option(const char *str);
> >  int qemu_global_option(const char *str);
> >  void qemu_add_globals(void);
> > diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
> > index cd5791e..495dae8 100644
> > --- a/include/sysemu/sysemu.h
> > +++ b/include/sysemu/sysemu.h
> > @@ -193,6 +193,8 @@ QemuOpts *qemu_get_machine_opts(void);
> >  
> >  bool usb_enabled(bool default_usb);
> >  
> > +extern QemuOptsList qemu_legacy_drive_opts;
> > +extern QemuOptsList qemu_common_drive_opts;
> >  extern QemuOptsList qemu_drive_opts;
> >  extern QemuOptsList qemu_chardev_opts;
> >  extern QemuOptsList qemu_device_opts;
> > diff --git a/util/qemu-config.c b/util/qemu-config.c
> > index a59568d..867fb4b 100644
> > --- a/util/qemu-config.c
> > +++ b/util/qemu-config.c
> > @@ -8,6 +8,7 @@
> >  #include "qmp-commands.h"
> >  
> >  static QemuOptsList *vm_config_groups[32];
> > +static QemuOptsList *drive_config_groups[4];
> >  
> >  static QemuOptsList *find_list(QemuOptsList **lists, const char *group,
> > Error **errp)
> > @@ -77,6 +78,59 @@ static CommandLineParameterInfoList 
> > *query_option_descs(const QemuOptDesc *desc)
> >  return param_list;
> >  }
> >  
> > +/* remove repeated entry from the info list */
> > +static void cleanup_infolist(CommandLineParameterInfoList *head)
> > +{
> > +CommandLineParameterInfoList *pre_entry, *cur, *del_entry;
> > +
> > +cur = head;
> > +while (cur->next) {
> > +pre_entry = head;
> > +while (pre_entry != cur->next) {
> > +if (!strcmp(pre_entry->value->name, cur->next->value->name)) {
> > +del_entry = cur->next;
> > +cur->next = cur->next->next;
> > +g_free(del_entry);
> > +break;
> > +}
> > +pre_entry = pre_entry->next;
> > +}
> > +cur = cur->next;
> > +}
> > +}
> > +
> > +/* merge the description items of two parameter infolists */
> > +static void connect_infolist(CommandLineParameterInfoList *head,
> > + CommandLineParameterInfoList *new)
> > +{
> > +CommandLineParameterInfoList *cur;
> > +
> > +cur = head;
> > +while (cur->next) {
> > +cur = cur->next;
> > +}
> > +cur->next = new;
> > +}
> > +
> > +/* access all the local QemuOptsLists for drive option */
> > +static CommandLineParameterInfoList *get_drive_infolist(void)
> > +{
> > +CommandLineParameterInfoList *head = NULL, *cur;
> > +int i;
> > +
> > +for (i = 0; drive_config_groups[i] != NULL; i++) {
> > +if (!head) {
> > +head = query_option_descs(drive_config_groups[i]->desc);
> > +} else {
> > +cur = query_option_descs(drive_config_groups[i]->desc);
> > +connect_infolist(head, cur);
> > +}
> > +}
> > +cleanup_infolist(head);
> > +
> > +return head;
> > +}
> > +
> >  CommandLineOptionInfoList *qmp_query_command_line_options(bool has_option,
> >const char 
> > *option,
> >Error **errp)
> 

Re: [Qemu-devel] [PATCH 0/4] i440fx-test: check firmware visibility

2013-11-08 Thread Laszlo Ersek
On 11/09/13 01:04, Laszlo Ersek wrote:

> Laszlo Ersek (4):
>   i440fx-test: qtest_start() should be paired with qtest_end()
>   i440fx-test: give each GTest case its own qtest
>   i440fx-test: generate temporary firmware blob
>   i440fx-test: verify firmware under 4G and 1M, both -bios and -pflash
> 
>  tests/i440fx-test.c | 169 
> ++--
>  1 file changed, 152 insertions(+), 17 deletions(-)

Self-NAK

I'll have to send a new version, for at least two reasons:

- The original code (before patch #1) doesn't bother to free "data.bus"
(the retval of qpci_init_pc()). Therefore I assumed this function only
grabbed a (non-counted) reference. This is not the case: the original
code leaks it (although it exits soon after), and my patch #2 doubles
the leak.

- There's something fishy with g_assert() firing in the new test case. I
end up with a hung qemu process, reparented to init:

  x86_64-softmmu/qemu-system-x86_64 \
-qtest unix:/tmp/qtest-26926.sock,nowait -qtest-log /dev/null \
-qmp unix:/tmp/qtest-26926.qmp,nowait \
-pidfile /tmp/qtest-26926.pid -machine accel=qtest \
-S -display none -pflash /tmp/fw_blob_553Y5W

I have no idea if this has to do with my unorthodox use of "-S" in
qtest, or if it's a general shortcoming of qtest (ie. aborting before
qtest_end()).

Anyway the series should be reviewable as-is, so I'll wait a bit for
comments.

Thanks!
Laszlo



[Qemu-devel] [PATCH 0/4] i440fx-test: check firmware visibility

2013-11-08 Thread Laszlo Ersek
Marcel's commit

  commit a53ae8e934cd54686875b5bcfc2f434244ee55d6
  Author: Marcel Apfelbaum 
  Date:   Mon Sep 16 11:21:16 2013 +0300

  hw/pci: partially handle pci master abort

has exposed a conflict (an unintended, unordered overlap) between the
memory ranges "pci-hole" and "system.flash". When the boot firmware is
passed with -pflash, the "pci-hole" region hides it, and the guest
cannot execute the firmware.

The test cases added by this series should help avoid regressions in
this area.

On top of v1.7.0-rc0, the "/i440fx/firmware/bios" test passes even
without fixing the memory region conflict (consistently with the fact
that we never noticed the conflict in practice due to using -bios
exclusively).

The "/i440fx/firmware/pflash" test catches the problem, unless one of
the discussed fixes are applied (ie. shrinking "pci-hole", or explicitly
ordering it under "system.flash"):

  $ tests/i440fx-test --verbose
  [...]
  GTest: run: /i440fx/firmware/bios
  (MSG: qemu cmdline: -S -display none -bios /tmp/fw_blob_MA3Y5W)
  GTest: result: OK
  GTest: run: /i440fx/firmware/pflash
  (MSG: qemu cmdline: -S -display none -pflash /tmp/fw_blob_ELLU5W)
  **
  ERROR:tests/i440fx-test.c:368:test_i440fx_firmware: assertion failed
  (buf[i] == (char unsigned)i): (0x00ff == 0x)
  Aborted

Laszlo Ersek (4):
  i440fx-test: qtest_start() should be paired with qtest_end()
  i440fx-test: give each GTest case its own qtest
  i440fx-test: generate temporary firmware blob
  i440fx-test: verify firmware under 4G and 1M, both -bios and -pflash

 tests/i440fx-test.c | 169 ++--
 1 file changed, 152 insertions(+), 17 deletions(-)

-- 
1.8.3.1




[Qemu-devel] [PATCH 1/4] i440fx-test: qtest_start() should be paired with qtest_end()

2013-11-08 Thread Laszlo Ersek
Similarly to commit 1d9358e6
("libqtest: New qtest_end() to go with qtest_start()").

Signed-off-by: Laszlo Ersek 
---
 tests/i440fx-test.c | 10 --
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/tests/i440fx-test.c b/tests/i440fx-test.c
index 08ce820..422f7be 100644
--- a/tests/i440fx-test.c
+++ b/tests/i440fx-test.c
@@ -2,9 +2,11 @@
  * qtest I440FX test case
  *
  * Copyright IBM, Corp. 2012-2013
+ * Copyright Red Hat, Inc. 2013
  *
  * Authors:
  *  Anthony Liguori   
+ *  Laszlo Ersek  
  *
  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  * See the COPYING file in the top-level directory.
@@ -256,7 +258,6 @@ static void test_i440fx_pam(gconstpointer opaque)
 
 int main(int argc, char **argv)
 {
-QTestState *s;
 TestData data;
 char *cmdline;
 int ret;
@@ -266,20 +267,17 @@ int main(int argc, char **argv)
 data.num_cpus = 1;
 
 cmdline = g_strdup_printf("-display none -smp %d", data.num_cpus);
-s = qtest_start(cmdline);
+qtest_start(cmdline);
 g_free(cmdline);
 
 data.bus = qpci_init_pc();
 
 g_test_add_data_func("/i440fx/defaults", &data, test_i440fx_defaults);
 g_test_add_data_func("/i440fx/pam", &data, test_i440fx_pam);
-
 
 ret = g_test_run();
 
-if (s) {
-qtest_quit(s);
-}
+qtest_end();
 
 return ret;
 }
-- 
1.8.3.1





Re: [Qemu-devel] [PATCH v2] target-ppc: move POWER7+ to a separate family

2013-11-08 Thread Alexey Kardashevskiy
On 11/09/2013 03:59 AM, Andreas Färber wrote:
> Am 08.11.2013 15:54, schrieb Alexey Kardashevskiy:
>> On 11/09/2013 12:44 AM, Andreas Färber wrote:
>>> Am 08.11.2013 03:37, schrieb Alexey Kardashevskiy:
 So far POWER7+ was a part of POWER7 family. However it has a different
 PVR base value so in order to support PVR masks, it needs a separate
 family class.

>>>
>>> Alexey,
>>>
 Another reason to make a POWER7+ family is that its name in the device
 tree (/proc/device-tree/cpus/cpu*) should be "Power7+" but not "Power7"
 and this cannot be easily fixed without a new family class.

 This adds a new family class, PVR base and mask values and moves
 Power7+ v2.1 CPU to a new family. The class init function is copied
 from the POWER7 family.

 Signed-off-by: Alexey Kardashevskiy 
 ---
 Changes:
 v2:
 * added VSX enable bit
 ---
  target-ppc/cpu-models.c |  2 +-
  target-ppc/cpu-models.h |  2 ++
  target-ppc/translate_init.c | 38 ++
  3 files changed, 41 insertions(+), 1 deletion(-)

 diff --git a/target-ppc/cpu-models.c b/target-ppc/cpu-models.c
 index 04d88c5..7c9466f 100644
 --- a/target-ppc/cpu-models.c
 +++ b/target-ppc/cpu-models.c
 @@ -1140,7 +1140,7 @@
  "POWER7 v2.1")
  POWERPC_DEF("POWER7_v2.3",   CPU_POWERPC_POWER7_v23, 
 POWER7,
  "POWER7 v2.3")
 -POWERPC_DEF("POWER7+_v2.1",  CPU_POWERPC_POWER7P_v21,
 POWER7,
 +POWERPC_DEF("POWER7+_v2.1",  CPU_POWERPC_POWER7P_v21,
 POWER7P,
  "POWER7+ v2.1")
  POWERPC_DEF("POWER8_v1.0",   CPU_POWERPC_POWER8_v10, 
 POWER8,
  "POWER8 v1.0")
 diff --git a/target-ppc/cpu-models.h b/target-ppc/cpu-models.h
 index 731ec4a..49ba4a4 100644
 --- a/target-ppc/cpu-models.h
 +++ b/target-ppc/cpu-models.h
 @@ -558,6 +558,8 @@ enum {
  CPU_POWERPC_POWER7_v20 = 0x003F0200,
  CPU_POWERPC_POWER7_v21 = 0x003F0201,
  CPU_POWERPC_POWER7_v23 = 0x003F0203,
 +CPU_POWERPC_POWER7P_BASE   = 0x004A,
 +CPU_POWERPC_POWER7P_MASK   = 0x,
  CPU_POWERPC_POWER7P_v21= 0x004A0201,
  CPU_POWERPC_POWER8_BASE= 0x004B,
  CPU_POWERPC_POWER8_MASK= 0x,
 diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
 index 35d1389..c030a20 100644
 --- a/target-ppc/translate_init.c
 +++ b/target-ppc/translate_init.c
 @@ -7253,6 +7253,44 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc, void *data)
  pcc->l1_icache_size = 0x8000;
  }
  
 +POWERPC_FAMILY(POWER7P)(ObjectClass *oc, void *data)
 +{
 +DeviceClass *dc = DEVICE_CLASS(oc);
 +PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
 +
 +dc->fw_name = "PowerPC,POWER7+";
>>>
>>> Apart from the commit message differing from the code...
>>
>>
>> In what part?
> 
> The spelling of POWER7. You write it should be "Power7+" but implement
> it as upper-case "POWER7+" (ignoring the "PowerPC," prefix, that is).


Ah. Sorry.


>>> We've had this discussion before: Jacques reported that on his POWER7+
>>> box only "POWER7" is shown, not "POWER7+", equivalent to my POWER5+ box
>>> showing only "PowerPC,POWER5". Compare my commit, which documents this:
>>>
>>> http://git.qemu.org/?p=qemu.git;a=commit;h=793826cd460828975591f289de78672af4a47ef9
>>>
>>> So, adding a POWER7P family seems correct to me, just the fw_name seems
>>> wrong - or you'll need to investigate further why there are conflicting
>>> reports of how it is shown. Possibly based on revision or pHyp vs. SLOF?
>>
>>
>> Yes we have had this discussion. Paul said it should "POWER7+". The only
>> P7+ machine I have handy shows "+":
>>
>> [aik@vpl4 ~]$ ls -d /proc/device-tree/cpus/PowerPC*
>> /proc/device-tree/cpus/PowerPC,POWER7+@0
>> /proc/device-tree/cpus/PowerPC,POWER7+@2c
>> /proc/device-tree/cpus/PowerPC,POWER7+@10
>> /proc/device-tree/cpus/PowerPC,POWER7+@30
>> /proc/device-tree/cpus/PowerPC,POWER7+@14
>> /proc/device-tree/cpus/PowerPC,POWER7+@34
>> /proc/device-tree/cpus/PowerPC,POWER7+@18
>> /proc/device-tree/cpus/PowerPC,POWER7+@38
>> /proc/device-tree/cpus/PowerPC,POWER7+@1c
>> /proc/device-tree/cpus/PowerPC,POWER7+@3c
>> /proc/device-tree/cpus/PowerPC,POWER7+@20
>> /proc/device-tree/cpus/PowerPC,POWER7+@4
>> /proc/device-tree/cpus/PowerPC,POWER7+@24
>> /proc/device-tree/cpus/PowerPC,POWER7+@8
>> /proc/device-tree/cpus/PowerPC,POWER7+@28
>> /proc/device-tree/cpus/PowerPC,POWER7+@c
>>
>> And this is a host, not a guest. I do not see any good reason to make dt
>> names different.
>>
>> And this does not really matter if there is "+" or not for anybody as far
>> as we concerned, ppc64_cpu works either way.
> 
> Right, it may not matter, but I expect you to

[Qemu-devel] [PATCH 2/4] i440fx-test: give each GTest case its own qtest

2013-11-08 Thread Laszlo Ersek
The current two GTest cases, /i440fx/defaults and /i440fx/pam can share a
qemu process, but the next two cases will need dedicated instances. It is
messy (and order-dependent) to dynamically configure GTest cases one by
one to start, stop, or keep the current qtest (*); let's just have each
GTest work with its own qtest. The performance difference should be
negligible.

(*) As g_test_run() can be invoked at most once per process startup, and
it runs GTest cases in sequence, we'd need clumsy data structures to
control each GTest case to start/stop/keep the qemu instance. Or, we'd
have to code the same information into the test methods themselves, which
would make them even more order-dependent.

Signed-off-by: Laszlo Ersek 
---
 tests/i440fx-test.c | 32 +++-
 1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/tests/i440fx-test.c b/tests/i440fx-test.c
index 422f7be..dc81fb2 100644
--- a/tests/i440fx-test.c
+++ b/tests/i440fx-test.c
@@ -28,16 +28,27 @@
 typedef struct TestData
 {
 int num_cpus;
-QPCIBus *bus;
 } TestData;
 
+static QPCIBus *test_start_get_bus(const TestData *s)
+{
+char *cmdline;
+
+cmdline = g_strdup_printf("-display none -smp %d", s->num_cpus);
+qtest_start(cmdline);
+g_free(cmdline);
+return qpci_init_pc();
+}
+
 static void test_i440fx_defaults(gconstpointer opaque)
 {
 const TestData *s = opaque;
+QPCIBus *bus;
 QPCIDevice *dev;
 uint32_t value;
 
-dev = qpci_device_find(s->bus, QPCI_DEVFN(0, 0));
+bus = test_start_get_bus(s);
+dev = qpci_device_find(bus, QPCI_DEVFN(0, 0));
 g_assert(dev != NULL);
 
 /* 3.2.2 */
@@ -121,6 +132,8 @@ static void test_i440fx_defaults(gconstpointer opaque)
 g_assert_cmpint(qpci_config_readb(dev, 0x91), ==, 0x00); /* ERRSTS */
 /* 3.2.26 */
 g_assert_cmpint(qpci_config_readb(dev, 0x93), ==, 0x00); /* TRC */
+
+qtest_end();
 }
 
 #define PAM_RE 1
@@ -179,6 +192,7 @@ static void write_area(uint32_t start, uint32_t end, 
uint8_t value)
 static void test_i440fx_pam(gconstpointer opaque)
 {
 const TestData *s = opaque;
+QPCIBus *bus;
 QPCIDevice *dev;
 int i;
 static struct {
@@ -201,7 +215,8 @@ static void test_i440fx_pam(gconstpointer opaque)
 { 0xEC000, 0xE }, /* BIOS Extension */
 };
 
-dev = qpci_device_find(s->bus, QPCI_DEVFN(0, 0));
+bus = test_start_get_bus(s);
+dev = qpci_device_find(bus, QPCI_DEVFN(0, 0));
 g_assert(dev != NULL);
 
 for (i = 0; i < ARRAY_SIZE(pam_area); i++) {
@@ -254,30 +269,21 @@ static void test_i440fx_pam(gconstpointer opaque)
 /* Verify the area is not our new mask */
 g_assert(!verify_area(pam_area[i].start, pam_area[i].end, 0x82));
 }
+qtest_end();
 }
 
 int main(int argc, char **argv)
 {
 TestData data;
-char *cmdline;
 int ret;
 
 g_test_init(&argc, &argv, NULL);
 
 data.num_cpus = 1;
 
-cmdline = g_strdup_printf("-display none -smp %d", data.num_cpus);
-qtest_start(cmdline);
-g_free(cmdline);
-
-data.bus = qpci_init_pc();
-
 g_test_add_data_func("/i440fx/defaults", &data, test_i440fx_defaults);
 g_test_add_data_func("/i440fx/pam", &data, test_i440fx_pam);
 
 ret = g_test_run();
-
-qtest_end();
-
 return ret;
 }
-- 
1.8.3.1





[Qemu-devel] [PATCH 3/4] i440fx-test: generate temporary firmware blob

2013-11-08 Thread Laszlo Ersek
The blob is 64K in size and contains 0x00..0xFF repeatedly.

The client code added to main() wouldn't make much sense in the long term.
It helps with debugging and it silences gcc about create_firmware() being
unused, and we'll replace it in the next patch anyway.

Signed-off-by: Laszlo Ersek 
---
 tests/i440fx-test.c | 62 +
 1 file changed, 62 insertions(+)

diff --git a/tests/i440fx-test.c b/tests/i440fx-test.c
index dc81fb2..1fe98de 100644
--- a/tests/i440fx-test.c
+++ b/tests/i440fx-test.c
@@ -20,6 +20,11 @@
 
 #include 
 #include 
+#include 
+#include 
+#include 
+#include 
+#include 
 
 #define BROKEN 1
 
@@ -272,13 +277,70 @@ static void test_i440fx_pam(gconstpointer opaque)
 qtest_end();
 }
 
+#define FW_SIZE ((size_t)65536)
+
+/* Create a temporary firmware blob, and return its absolute pathname as a
+ * dynamically allocated string.
+ * The file is closed before the function returns; it should be unlinked after
+ * use.
+ * In case of error, NULL is returned. The function prints the error message.
+ */
+static char *create_firmware(void)
+{
+int ret, fd;
+char *pathname;
+GError *error = NULL;
+
+ret = -1;
+fd = g_file_open_tmp("fw_blob_XX", &pathname, &error);
+if (fd == -1) {
+fprintf(stderr, "unable to create temporary firmware blob: %s\n",
+error->message);
+g_error_free(error);
+} else {
+if (ftruncate(fd, FW_SIZE) == -1) {
+fprintf(stderr, "ftruncate(\"%s\", %zu): %s\n", pathname, FW_SIZE,
+strerror(errno));
+} else {
+void *buf;
+
+buf = mmap(NULL, FW_SIZE, PROT_WRITE, MAP_SHARED, fd, 0);
+if (buf == MAP_FAILED) {
+fprintf(stderr, "mmap(\"%s\", %zu): %s\n", pathname, FW_SIZE,
+strerror(errno));
+} else {
+size_t i;
+
+for (i = 0; i < FW_SIZE; ++i) {
+((char unsigned *)buf)[i] = i;
+}
+munmap(buf, FW_SIZE);
+ret = 0;
+}
+}
+close(fd);
+if (ret == -1) {
+unlink(pathname);
+g_free(pathname);
+}
+}
+
+return ret == -1 ? NULL : pathname;
+}
+
 int main(int argc, char **argv)
 {
+char *fw_pathname;
 TestData data;
 int ret;
 
 g_test_init(&argc, &argv, NULL);
 
+fw_pathname = create_firmware();
+g_assert(fw_pathname != NULL);
+unlink(fw_pathname);
+g_free(fw_pathname);
+
 data.num_cpus = 1;
 
 g_test_add_data_func("/i440fx/defaults", &data, test_i440fx_defaults);
-- 
1.8.3.1





[Qemu-devel] [PATCH 4/4] i440fx-test: verify firmware under 4G and 1M, both -bios and -pflash

2013-11-08 Thread Laszlo Ersek
Check whether the firmware is not hidden by other memory regions.

Qemu is started in paused mode: it shouldn't try to interpret generated
garbage.

Signed-off-by: Laszlo Ersek 
---
 tests/i440fx-test.c | 81 +
 1 file changed, 75 insertions(+), 6 deletions(-)

diff --git a/tests/i440fx-test.c b/tests/i440fx-test.c
index 1fe98de..43adcb8 100644
--- a/tests/i440fx-test.c
+++ b/tests/i440fx-test.c
@@ -35,6 +35,11 @@ typedef struct TestData
 int num_cpus;
 } TestData;
 
+typedef struct FirmwareTestFixture {
+/* decides whether we're testing -bios or -pflash */
+bool is_bios;
+} FirmwareTestFixture;
+
 static QPCIBus *test_start_get_bus(const TestData *s)
 {
 char *cmdline;
@@ -278,6 +283,7 @@ static void test_i440fx_pam(gconstpointer opaque)
 }
 
 #define FW_SIZE ((size_t)65536)
+#define ISA_BIOS_MAXSZ ((size_t)(128 * 1024))
 
 /* Create a temporary firmware blob, and return its absolute pathname as a
  * dynamically allocated string.
@@ -328,23 +334,86 @@ static char *create_firmware(void)
 return ret == -1 ? NULL : pathname;
 }
 
-int main(int argc, char **argv)
+static void test_i440fx_firmware(FirmwareTestFixture *fixture,
+ gconstpointer user_data)
 {
-char *fw_pathname;
-TestData data;
-int ret;
-
-g_test_init(&argc, &argv, NULL);
+char *fw_pathname, *cmdline;
+char unsigned *buf;
+size_t i, isa_bios_size;
 
 fw_pathname = create_firmware();
 g_assert(fw_pathname != NULL);
+
+/* Better hope the user didn't put metacharacters in TMPDIR and co. */
+cmdline = g_strdup_printf("-S -display none %s %s",
+  fixture->is_bios ? "-bios" : "-pflash",
+  fw_pathname);
+g_test_message("qemu cmdline: %s", cmdline);
+qtest_start(cmdline);
+g_free(cmdline);
+
+/* Qemu has loaded the firmware (because qtest_start() only returns after
+ * the QMP handshake completes). We must unlink the firmware blob right
+ * here, because any assertion firing below would leak it in the
+ * filesystem. This is also the reason why we recreate the blob every time
+ * this function is invoked.
+ */
 unlink(fw_pathname);
 g_free(fw_pathname);
 
+/* check below 4G */
+buf = g_malloc0(FW_SIZE);
+memread(0x1ULL - FW_SIZE, buf, FW_SIZE);
+for (i = 0; i < FW_SIZE; ++i) {
+g_assert_cmphex(buf[i], ==, (char unsigned)i);
+}
+
+/* check in ISA space too */
+memset(buf, 0, FW_SIZE);
+isa_bios_size = ISA_BIOS_MAXSZ < FW_SIZE ? ISA_BIOS_MAXSZ : FW_SIZE;
+memread(0x10 - isa_bios_size, buf, isa_bios_size);
+for (i = 0; i < isa_bios_size; ++i) {
+g_assert_cmphex(buf[i], ==,
+(char unsigned)((FW_SIZE - isa_bios_size) + i));
+}
+
+g_free(buf);
+qtest_end();
+}
+
+static void add_firmware_test(const char *testpath,
+  void (*setup_fixture)(FirmwareTestFixture *f,
+gconstpointer test_data))
+{
+g_test_add(testpath, FirmwareTestFixture, NULL, setup_fixture,
+   test_i440fx_firmware, NULL);
+}
+
+static void request_bios(FirmwareTestFixture *fixture,
+ gconstpointer user_data)
+{
+fixture->is_bios = true;
+}
+
+static void request_pflash(FirmwareTestFixture *fixture,
+   gconstpointer user_data)
+{
+fixture->is_bios = false;
+}
+
+int main(int argc, char **argv)
+{
+TestData data;
+int ret;
+
+g_test_init(&argc, &argv, NULL);
+
 data.num_cpus = 1;
 
 g_test_add_data_func("/i440fx/defaults", &data, test_i440fx_defaults);
 g_test_add_data_func("/i440fx/pam", &data, test_i440fx_pam);
+add_firmware_test("/i440fx/firmware/bios", request_bios);
+add_firmware_test("/i440fx/firmware/pflash", request_pflash);
 
 ret = g_test_run();
 return ret;
-- 
1.8.3.1




[Qemu-devel] [Bug 1100843] Re: Live Migration Causes Performance Issues

2013-11-08 Thread Chris J Arges
** Changed in: qemu-kvm (Ubuntu Quantal)
 Assignee: Chris J Arges (arges) => (unassigned)

** Changed in: qemu-kvm (Ubuntu Raring)
 Assignee: Chris J Arges (arges) => (unassigned)

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1100843

Title:
  Live Migration Causes Performance Issues

Status in QEMU:
  New
Status in “qemu-kvm” package in Ubuntu:
  Fix Released
Status in “qemu-kvm” source package in Precise:
  Fix Released
Status in “qemu-kvm” source package in Quantal:
  Triaged
Status in “qemu-kvm” source package in Raring:
  Triaged
Status in “qemu-kvm” source package in Saucy:
  Fix Released

Bug description:
  SRU Justification
  [Impact]
   * Users of QEMU that save their memory states using savevm/loadvm or migrate 
experience worse performance after the migration/loadvm. To workaround these 
issues VMs must be completely rebooted. Optimally we should be able to restore 
a VM's memory state an expect no performance issue.

  [Test Case]

   * savevm/loadvm:
     - Create a VM and install a test suite such as lmbench.
     - Get numbers right after boot and record them.
     - Open up the qemu monitor and type the following:
   stop
   savevm 0
   loadvm 0
   c
     - Measure performance and record numbers.
     - Compare if numbers are within margin of error.
   * migrate:
     - Create VM, install lmbench, get numbers.
     - Open up qemu monitor and type the following:
   stop
   migrate "exec:dd of=~/save.vm"
   quit
     - Start a new VM using qemu but add the following argument:
   -incoming "exec:dd if=~/save.vm"
     - Run performance test and compare.

   If performance measured is similar then we pass the test case.

  [Regression Potential]

   * The fix is a backport of two upstream patches:
  ad0b5321f1f797274603ebbe20108b0750baee94
  211ea74022f51164a7729030b28eec90b6c99a08

  One patch allows QEMU to use THP if its enabled.
  The other patch changes logic to not memset pages to zero when loading memory 
for the vm (on an incoming migration).

   * I've also run the qa-regression-testing test-qemu.py script and it
  passes all tests.

  [Additional Information]

  Kernels from 3.2 onwards are affected, and all have the config:
  CONFIG_TRANSPARENT_HUGEPAGE_MADVISE=y. Therefore enabling THP is
  applicable.

  --

  I have 2 physical hosts running Ubuntu Precise.  With 1.0+noroms-
  0ubuntu14.7 and qemu-kvm 1.2.0+noroms-0ubuntu7 (source from quantal,
  built for Precise with pbuilder.) I attempted to build qemu-1.3.0 debs
  from source to test, but libvirt seems to have an issue with it that I
  haven't been able to track down yet.

   I'm seeing a performance degradation after live migration on Precise,
  but not Lucid.  These hosts are managed by libvirt (tested both
  0.9.8-2ubuntu17 and 1.0.0-0ubuntu4) in conjunction with OpenNebula.  I
  don't seem to have this problem with lucid guests (running a number of
  standard kernels, 3.2.5 mainline and backported linux-
  image-3.2.0-35-generic as well.)

  I first noticed this problem with phoronix doing compilation tests,
  and then tried lmbench where even simple calls experience performance
  degradation.

  I've attempted to post to the kvm mailing list, but so far the only
  suggestion was it may be related to transparent hugepages not being
  used after migration, but this didn't pan out.  Someone else has a
  similar problem here -
  http://thread.gmane.org/gmane.comp.emulators.kvm.devel/100592

  qemu command line example: /usr/bin/kvm -name one-2 -S -M pc-1.2 -cpu
  Westmere -enable-kvm -m 73728 -smp 16,sockets=2,cores=8,threads=1
  -uuid f89e31a4-4945-c12c-6544-149ba0746c2f -no-user-config -nodefaults
  -chardev
  socket,id=charmonitor,path=/var/lib/libvirt/qemu/one-2.monitor,server,nowait
  -mon chardev=charmonitor,id=monitor,mode=control -rtc
  base=utc,driftfix=slew -no-kvm-pit-reinjection -no-shutdown -device
  piix3-usb-uhci,id=usb,bus=pci.0,addr=0x1.0x2 -drive
  file=/var/lib/one//datastores/0/2/disk.0,if=none,id=drive-virtio-
  disk0,format=raw,cache=none -device virtio-blk-
  pci,scsi=off,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-
  disk0,bootindex=1 -drive
  file=/var/lib/one//datastores/0/2/disk.1,if=none,id=drive-
  ide0-0-0,readonly=on,format=raw -device ide-cd,bus=ide.0,unit=0,drive
  =drive-ide0-0-0,id=ide0-0-0 -netdev
  tap,fd=23,id=hostnet0,vhost=on,vhostfd=25 -device virtio-net-
  pci,netdev=hostnet0,id=net0,mac=02:00:0a:64:02:fe,bus=pci.0,addr=0x3
  -vnc 0.0.0.0:2,password -vga cirrus -incoming tcp:0.0.0.0:49155
  -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x5

  Disk backend is LVM running on SAN via FC connection (using symlink
  from /var/lib/one/datastores/0/2/disk.0 above)

  ubuntu-12.04 - first boot
  ==
  Simple syscall: 0.0527 microseconds
  Simple read: 0.1143 microseconds
  Simple write: 0.0953 microsec

Re: [Qemu-devel] [PATCH] PPC: fix PCI configuration space MemoryRegions for grackle/uninorth

2013-11-08 Thread Mark Cave-Ayland

On 08/11/13 03:20, Alexander Graf wrote:


On 11.10.2013, at 12:53, Mark Cave-Ayland  wrote:


OpenBIOS prior to SVN r1225 had a horrible bug when accessing PCI
configuration space for PPC Mac architectures - instead of writing the PCI
configuration data value to the data register address, it would instead write
it to the data register address plus the PCI configuration address.

For this reason, the MemoryRegions for the PCI data register for
grackle/uninorth were extremely large in order to accomodate the entire PCI
configuration space being accessed during OpenBIOS PCI bus enumeration. Now
that the OpenBIOS images have been updated, reduce the MemoryRegion sizes down
to a single 32-bit register as intended.

Signed-off-by: Mark Cave-Ayland
CC: Hervé Poussineau
CC: Andreas Färber
CC: Alexander Graf


With this patch applied, mac99 emulation seems to break:

   
http://award.ath.cx/results/288-alex/x86/kvm.qemu-git-tcg.ppc-debian.mac99-G4.etch.e1000.reboot/debug/serial-vm1.log


Alex


Hi Alex,

Thanks for the heads-up - with the information above I was able to 
reproduce this fairly easily. I had look at some of the uninorth 
drivers, and while it's not particularly apparent from Linux that the 
PCI configuration data is accessed via MMIO rather than ioport access, 
FreeBSD seems to suggest that this is the case: 
http://code.google.com/p/freebsd-head/source/browse/sys/powerpc/powermac/uninorth.c?spec=svnc6989e24706228678e454517dad4ad465a36e556&r=c6989e24706228678e454517dad4ad465a36e556#274.


The key is that the QEMU uninorth host bridge contains a hack to allow 
PCI configuration mechanism #1 as used by OpenBIOS to work at all (see 
unin_get_config_reg() in hw/pci-host/uninorth.c) which is why I didn't 
notice it in my OpenBIOS boot tests; and in fact, the name of the 
uninorth PCI configuration data MemoryRegions have a "-data" rather than 
a "-idx" suffix is also a big clue.


Hence please find a revised version of the patch which is unaltered for 
grackle, and only changes the MemoryRegion size for the PCI 
configuration address register for uninorth so that the PCI 
configuration data space is still accessible using MMIO. This resolves 
the issue for me, so if you're satisifed that it works for you then I'll 
post a revised version to the list.



ATB,

Mark.
diff --git a/hw/pci-host/grackle.c b/hw/pci-host/grackle.c
index 4991ec4..d70c519 100644
--- a/hw/pci-host/grackle.c
+++ b/hw/pci-host/grackle.c
@@ -105,9 +105,9 @@ static int pci_grackle_init_device(SysBusDevice *dev)
 phb = PCI_HOST_BRIDGE(dev);
 
 memory_region_init_io(&phb->conf_mem, OBJECT(dev), &pci_host_conf_le_ops,
-  dev, "pci-conf-idx", 0x1000);
+  dev, "pci-conf-idx", 4);
 memory_region_init_io(&phb->data_mem, OBJECT(dev), &pci_host_data_le_ops,
-  dev, "pci-data-idx", 0x1000);
+  dev, "pci-data-idx", 4);
 sysbus_init_mmio(dev, &phb->conf_mem);
 sysbus_init_mmio(dev, &phb->data_mem);
 
diff --git a/hw/pci-host/uninorth.c b/hw/pci-host/uninorth.c
index 91530cd..2238646 100644
--- a/hw/pci-host/uninorth.c
+++ b/hw/pci-host/uninorth.c
@@ -153,7 +153,7 @@ static int pci_unin_main_init_device(SysBusDevice *dev)
 h = PCI_HOST_BRIDGE(dev);
 
 memory_region_init_io(&h->conf_mem, OBJECT(h), &pci_host_conf_le_ops,
-  dev, "pci-conf-idx", 0x1000);
+  dev, "pci-conf-idx", 4);
 memory_region_init_io(&h->data_mem, OBJECT(h), &unin_data_ops, dev,
   "pci-conf-data", 0x1000);
 sysbus_init_mmio(dev, &h->conf_mem);
@@ -171,7 +171,7 @@ static int pci_u3_agp_init_device(SysBusDevice *dev)
 h = PCI_HOST_BRIDGE(dev);
 
 memory_region_init_io(&h->conf_mem, OBJECT(h), &pci_host_conf_le_ops,
-  dev, "pci-conf-idx", 0x1000);
+  dev, "pci-conf-idx", 4);
 memory_region_init_io(&h->data_mem, OBJECT(h), &unin_data_ops, dev,
   "pci-conf-data", 0x1000);
 sysbus_init_mmio(dev, &h->conf_mem);
@@ -188,7 +188,7 @@ static int pci_unin_agp_init_device(SysBusDevice *dev)
 h = PCI_HOST_BRIDGE(dev);
 
 memory_region_init_io(&h->conf_mem, OBJECT(h), &pci_host_conf_le_ops,
-  dev, "pci-conf-idx", 0x1000);
+  dev, "pci-conf-idx", 4);
 memory_region_init_io(&h->data_mem, OBJECT(h), &pci_host_data_le_ops,
   dev, "pci-conf-data", 0x1000);
 sysbus_init_mmio(dev, &h->conf_mem);
@@ -204,7 +204,7 @@ static int pci_unin_internal_init_device(SysBusDevice *dev)
 h = PCI_HOST_BRIDGE(dev);
 
 memory_region_init_io(&h->conf_mem, OBJECT(h), &pci_host_conf_le_ops,
-  dev, "pci-conf-idx", 0x1000);
+  dev, "pci-conf-idx", 4);
 memory_region_init_io(&h->data_mem, OBJECT(h), &pci_host_data_le_ops,
   dev, "pci-conf-data", 0x1000);
 

Re: [Qemu-devel] [PATCH V5 6/6] qemu-iotests: add test for qcow2 snapshot

2013-11-08 Thread Jeff Cody
On Tue, Nov 05, 2013 at 08:01:29AM +0800, Wenchao Xia wrote:
> This test will focus on the low level procedure of qcow2 snapshot
> operations, now it covers only the create operation. Overlap error
> paths are not checked since no good way to trigger those errors.
> 
> Signed-off-by: Wenchao Xia 
> ---
>  tests/qemu-iotests/070   |  214 
> ++
>  tests/qemu-iotests/070.out   |   35 ++
>  tests/qemu-iotests/common.filter |7 ++
>  tests/qemu-iotests/group |1 +
>  4 files changed, 257 insertions(+), 0 deletions(-)
>  create mode 100755 tests/qemu-iotests/070
>  create mode 100644 tests/qemu-iotests/070.out
> 
> diff --git a/tests/qemu-iotests/070 b/tests/qemu-iotests/070
> new file mode 100755
> index 000..37ada84
> --- /dev/null
> +++ b/tests/qemu-iotests/070
> @@ -0,0 +1,214 @@
> +#!/bin/bash
> +#
> +# qcow2 internal snapshot test
> +#
> +# Copyright (C) 2013 IBM, Inc.
> +#
> +# This program is free software; you can redistribute it and/or modify
> +# it under the terms of the GNU General Public License as published by
> +# the Free Software Foundation; either version 2 of the License, or
> +# (at your option) any later version.
> +#
> +# This program is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License
> +# along with this program.  If not, see .
> +#
> +owner=xiaw...@linux.vnet.ibm.com
> +
> +seq=`basename $0`
> +echo "QA output created by $seq"
> +
> +here=`pwd`
> +tmp=/tmp/$$
> +status=1 # failure is the default!
> +
> +_cleanup()
> +{
> +_cleanup_test_img
> +rm $TEST_DIR/blkdebug.conf

$TEST_DIR needs quoting (also in later uses in this file as well)

> +}
> +trap "_cleanup; exit \$status" 0 1 2 3 15
> +
> +# get standard environment, filters and checks
> +. ./common.rc
> +. ./common.filter
> +. ./common.pattern
> +
> +# only test qcow2
> +_supported_fmt qcow2
> +_supported_proto generic
> +# bind the errno correctly and filter the output of image check and qemu-img,
> +# if you want to run it on other OS
> +_supported_os Linux
> +
> +
> +IMGOPTS="compat=1.1"
> +
> +CLUSTER_SIZE=65536
> +
> +SIZE=1G
> +
> +BLKDBG_TEST_IMG="blkdebug:$TEST_DIR/blkdebug.conf:$TEST_IMG"
> +
> +errno=5
> +
> +once=on
> +
> +imm=off
> +
> +
> +# Start test, note that the injected errors are related to qcow2's snapshot
> +# logic closely, see qcow2-snapshot.c for more details.
> +
> +# path 1: fail in L1 table allocation for snapshot
> +echo
> +echo "Path 1: fail in allocation of L1 table"
> +
> +_make_test_img $SIZE
> +
> +cat > $TEST_DIR/blkdebug.conf < +[inject-error]
> +event = "cluster_alloc"
> +errno = "$errno"
> +immediately = "$imm"
> +once = "$once"
> +EOF
> +
> +$QEMU_IMG snapshot -c snap1 $BLKDBG_TEST_IMG
> +$QEMU_IMG snapshot -l $TEST_IMG

Both $BLKDB_TEST_IMG and $TEST_IMG need quoting (also in later uses in
this file)

> +_check_test_img 2>&1
> +
> +
> +# path 2: fail in update new L1 table
> +echo
> +echo "Path 2: fail in update new L1 table for snapshot"
> +
> +_make_test_img $SIZE
> +
> +cat > $TEST_DIR/blkdebug.conf < +[inject-error]
> +event = "snapshot_l1_update"
> +errno = "$errno"
> +immediately = "$imm"
> +once = "$once"
> +EOF
> +
> +$QEMU_IMG snapshot -c snap1 $BLKDBG_TEST_IMG 2>&1 | _filter_number
> +$QEMU_IMG snapshot -l $TEST_IMG
> +_check_test_img 2>&1
> +
> +# path 3: fail in update refcount block before write snapshot list
> +echo
> +echo "Path 3: fail in update refcount block before write snapshot list"
> +
> +_make_test_img $SIZE
> +
> +cat > $TEST_DIR/blkdebug.conf < +[set-state]
> +state = "1"
> +event = "snapshot_l1_update"
> +new_state = "2"
> +
> +[inject-error]
> +state = "2"
> +event = "refblock_update_part"
> +errno = "$errno"
> +immediately = "$imm"
> +once = "$once"
> +
> +[set-state]
> +state = "2"
> +event = "refblock_alloc"
> +new_state = "3"
> +EOF
> +
> +$QEMU_IMG snapshot -c snap1 $BLKDBG_TEST_IMG 2>&1 | _filter_number
> +$QEMU_IMG snapshot -l $TEST_IMG
> +_check_test_img 2>&1
> +
> +# path 4: fail in snapshot list allocation or its flush it is possible
> +# qcow2_alloc_clusters() not fail immediately since cache hit, but in any
> +# case, no error should be found in image check.
> +echo
> +echo "Path 4: fail in snapshot list allocation or its flush"
> +
> +_make_test_img $SIZE
> +
> +cat > $TEST_DIR/blkdebug.conf < +[set-state]
> +state = "1"
> +event = "snapshot_l1_update"
> +new_state = "2"
> +
> +[inject-error]
> +state = "2"
> +event = "cluster_alloc"
> +errno = "$errno"
> +immediately = "$imm"
> +once = "$once"
> +EOF
> +
> +# fail directly or in flush, are both OK.
> +err=`$QEMU_IMG snapshot -c snap1 $BLKDBG_TEST_IMG 2>&1 | grep "Failed" | 
> grep "allocation" | grep "list"`
> +if ! test -z "$err"
> +then

[Qemu-devel] [Bug 1243287] Re: [KVM/QEMU][ARM][SAUCY] fails to boot cloud-image due to host kvm fail

2013-11-08 Thread Andreas Färber
Manoj, dmidecode seems highly x86-specific according to 
http://www.nongnu.org/dmidecode/, so not running it on arm does seem a correct 
solution for closing this bug. Suggest you assign it back to cloud-init to have 
that fixed.
An alternative to checking for the command's presence would be checking uname 
or whatever Python offers as equivalent.

What do you expect to read from 0xf to 0xf on your arm mach-virt
guest? I suggest you file a separate bug report more understandable to
QEMU/KVM developers if there's a valid use case failing, along with
versions, command line and anything else needed for someone to actually
investigate this beyond speculation.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1243287

Title:
  [KVM/QEMU][ARM][SAUCY] fails to boot cloud-image due to host kvm fail

Status in QEMU:
  Confirmed

Bug description:
  On booting the cloud image using qemu/kvm fails with the following
  error:

  Cloud-init v. 0.7.3 running 'init' at Thu, 03 Oct 2013 16:45:21 +. Up 
360.78 seconds.
  ci-info: +Net device info+
  ci-info: ++--+---+---+---+
  ci-info: | Device | Up | Address | Mask | Hw-Address |
  ci-info: ++--+---+---+---+
  ci-info: | lo | True | 127.0.0.1 | 255.0.0.0 | . |
  ci-info: | eth0 | True | 10.0.2.15 | 255.255.255.0 | 52:54:00:12:34:56 |
  ci-info: ++--+---+---+---+
  ci-info: ++Route 
info++
  ci-info: 
+---+-+--+---+---+---+
  ci-info: | Route | Destination | Gateway | Genmask | Interface | Flags |
  ci-info: 
+---+-+--+---+---+---+
  ci-info: | 0 | 0.0.0.0 | 10.0.2.2 | 0.0.0.0 | eth0 | UG |
  ci-info: | 1 | 10.0.2.0 | 0.0.0.0 | 255.255.255.0 | eth0 | U |
  ci-info: 
+---+-+--+---+---+---+
  error: kvm run failed Function not implemented

  /usr/lib/python2.7/dist-
  packages/cloudinit/sources/DataSourceAltCloud.py assumes that
  dmidecode command is availabe (ie it assumes that system is x86) on
  arm systems there is no dmidecode command so host kvm fails with the
  message "error: kvm run failed Function not implemented"

  The patch makes get_cloud_type() function return with UNKNOWN for ARM
  systems.

  I was able to boot the cloud-image on ARM after applying this change.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1243287/+subscriptions



[Qemu-devel] [PATCH] e1000/rtl8139: update HMP NIC when every bit is written

2013-11-08 Thread Vlad Yasevich
What about this approach?  This only updates the monitory when all the
bits have been written to.

-vlad

-- >8 --
Subject: [PATCH] e1000/rtl8139: update HMP NIC when every bit is written

We currently just update the HMP NIC info when the last bit of macaddr
is written. This assumes that guest driver will write all the macaddr
from bit 0 to bit 5 when it changes the macaddr, this is the current
behavior of linux driver (e1000/rtl8139cp), but we can't do this
assumption.

The macaddr that is used for rx-filter will be updated when every bit
is changed. This patch updates the e1000/rtl8139 nic to update HMP NIC
info when every bit has been changed. It will be same as virtio-net.

Signed-off-by: Vlad Yasevich 
---
 hw/net/e1000.c   | 17 -
 hw/net/rtl8139.c | 14 +-
 2 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/hw/net/e1000.c b/hw/net/e1000.c
index 8387443..a5967ed 100644
--- a/hw/net/e1000.c
+++ b/hw/net/e1000.c
@@ -149,6 +149,10 @@ typedef struct E1000State_st {
 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
 uint32_t compat_flags;
+uint32_t mac_changed;
+#define E1000_RA0_CHANGED 0
+#define E1000_RA1_CHANGED 1
+#define E1000_RA_ALL_CHANGED (E1000_RA0_CHANGED|E1000_RA1_CHANGED)
 } E1000State;
 
 #define TYPE_E1000 "e1000"
@@ -402,6 +406,7 @@ static void e1000_reset(void *opaque)
 d->mac_reg[RA + 1] |= (i < 2) ? macaddr[i + 4] << (8 * i) : 0;
 }
 qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
+d->mac_changed = 0;
 }
 
 static void
@@ -1106,10 +,20 @@ mac_writereg(E1000State *s, int index, uint32_t val)
 
 s->mac_reg[index] = val;
 
-if (index == RA + 1) {
+switch (index) {
+case RA:
+s->mac_changed |= E1000_RA0_CHANGED;
+break;
+case (RA + 1):
+s->mac_changed |= E1000_RA1_CHANGED;
+break;
+}
+
+if (s->mac_changed ==  E1000_RA_ALL_CHANGED) {
 macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
 macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
 qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
+   s->mac_changed = 0;
 }
 }
 
diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c
index 5329f44..6dac10c 100644
--- a/hw/net/rtl8139.c
+++ b/hw/net/rtl8139.c
@@ -476,6 +476,8 @@ typedef struct RTL8139State {
 
 uint16_t CpCmd;
 uint8_t  TxThresh;
+uint8_t  mac_changed;
+#define RTL8139_MAC_CHANGED_ALL 0x3F
 
 NICState *nic;
 NICConf conf;
@@ -1215,6 +1217,7 @@ static void rtl8139_reset(DeviceState *d)
 /* restore MAC address */
 memcpy(s->phys, s->conf.macaddr.a, 6);
 qemu_format_nic_info_str(qemu_get_queue(s->nic), s->phys);
+s->mac_changed = 0;
 
 /* reset interrupt mask */
 s->IntrStatus = 0;
@@ -2741,12 +2744,13 @@ static void rtl8139_io_writeb(void *opaque, uint8_t 
addr, uint32_t val)
 
 switch (addr)
 {
-case MAC0 ... MAC0+4:
-s->phys[addr - MAC0] = val;
-break;
-case MAC0+5:
+case MAC0 ... MAC0+5:
 s->phys[addr - MAC0] = val;
-qemu_format_nic_info_str(qemu_get_queue(s->nic), s->phys);
+s->mac_changed |= (1 << (addr - MAC0));
+if (s->mac_changed == RTL8139_MAC_CHANGED_ALL) {
+qemu_format_nic_info_str(qemu_get_queue(s->nic), s->phys);
+s->mac_changed = 0;
+}
 break;
 case MAC0+6 ... MAC0+7:
 /* reserved */
-- 
1.8.4.2




Re: [Qemu-devel] [PATCHv7 00/17] block: logical block provisioning enhancements

2013-11-08 Thread Peter Lieven

Am 08.11.2013 um 19:03 schrieb ronnie sahlberg :

> It would mean that any version of libiscsi from 1.1 or later would
> work   and there would not be the issues such as
> UNMAP is only available from 1.2 and forward,   WRITESAME* is only
> availabel from 1.3.

I think 1.3.0 is not a big requirement when the recent version is 1.8.0

> SANITIZE only being available from 1.9 …

Sanitize we do not use at all currently.

Peter

> 
> 
> On Fri, Nov 8, 2013 at 9:52 AM, ronnie sahlberg
>  wrote:
>> For better support for older versions of libiscsi
>> I think it would be good to convert all the
>> iscsi_unmap_task/iscsi_writesame*_task/iscsi_*_task   functions with
>> calls to the much more genric iscsi_scsi_command_sync().
>> 
>> iscsi_scsi_command_sync() and iscsi_scsi_command_async() have been
>> available since prior to version 1.1 and can be used to send arbitrary
>> scsi opcodes to the target.
>> 
>> 
>> iscsi_scsi_command_async() is already used instead of
>> iscs_read16/write16_async() since the read16/write16 helpers were
>> added to libiscsi at a much later stage and there are examples of its
>> use.
>> 
>> 
>> Using iscsi_scsi_command_[a]sync() instead of for example
>> iscsi_unmap_task() would mean that you can use the UNMAP opcode
>> always, regardless version of libiscsi.
>> It would mean that you need to build the CDB directly inside
>> block/iscsi.c but that is not hard.
>> 
>> 
>> I.e.  change block/iscsi.c to only use iscsi_scsi_command_[a]sync()
>> for all opcodes it wants to send to the target.
>> 
>> 
>> 
>> 
>> On Wed, Nov 6, 2013 at 1:16 PM, Paolo Bonzini  wrote:
>>> Il 06/11/2013 20:38, Peter Lieven ha scritto:
 
 Am 06.11.2013 um 14:09 schrieb Paolo Bonzini :
 
> Il 06/11/2013 14:08, Stefan Hajnoczi ha scritto:
>> Thanks, applied to my block-next tree:
>> https://github.com/stefanha/qemu/commits/block-next
>> 
>> This will go into QEMU 1.8.  Since it's a new feature that touches core
>> block layer code and several block drivers, I can't merge it into
>> 1.7-rc.
> 
> Yay!  With these patches I can properly emulate WRITE SAME in the SCSI
> layer.
 
 Yes, you can, but only when write same is used to write zeroes. The other
 case is still unhandled or needs emulation.
>>> 
>>> WRITE SAME with UNMAP is incorrect in the current QEMU code.  Your patch
>>> let me fix it so that the payload can be read back with no change, *and*
>>> sectors are unmapped when possible.
>>> 
>>> Paolo
>>> 




Re: [Qemu-devel] [PATCHv7 00/17] block: logical block provisioning enhancements

2013-11-08 Thread Peter Lieven

Am 08.11.2013 um 18:52 schrieb ronnie sahlberg :

> For better support for older versions of libiscsi
> I think it would be good to convert all the
> iscsi_unmap_task/iscsi_writesame*_task/iscsi_*_task   functions with
> calls to the much more genric iscsi_scsi_command_sync().
> 
> iscsi_scsi_command_sync() and iscsi_scsi_command_async() have been
> available since prior to version 1.1 and can be used to send arbitrary
> scsi opcodes to the target.
> 
> 
> iscsi_scsi_command_async() is already used instead of
> iscs_read16/write16_async() since the read16/write16 helpers were
> added to libiscsi at a much later stage and there are examples of its
> use.
> 
> 
> Using iscsi_scsi_command_[a]sync() instead of for example
> iscsi_unmap_task() would mean that you can use the UNMAP opcode
> always, regardless version of libiscsi.
> It would mean that you need to build the CDB directly inside
> block/iscsi.c but that is not hard.
> 
> 
> I.e.  change block/iscsi.c to only use iscsi_scsi_command_[a]sync()
> for all opcodes it wants to send to the target.
> 
> 
> 


Hi Ronnie,

i would prefer to use the helper functions. It makes the code much, much
easier to read.

In general you are talking about the case that we have a totally
outdated libiscsi version, but a absolutely up to date qemu version. 

If someone wants to compile a recent qemu on an old distro, this
is working, but simply not with all features enabled.

We keep basic support for read, write, flush. This works with any
libiscsi version. If someone wants to have support for write_zeroes,
discard or get_block_status he simply must use a recent libiscsi
version.

Peter


> 
> On Wed, Nov 6, 2013 at 1:16 PM, Paolo Bonzini  wrote:
>> Il 06/11/2013 20:38, Peter Lieven ha scritto:
>>> 
>>> Am 06.11.2013 um 14:09 schrieb Paolo Bonzini :
>>> 
 Il 06/11/2013 14:08, Stefan Hajnoczi ha scritto:
> Thanks, applied to my block-next tree:
> https://github.com/stefanha/qemu/commits/block-next
> 
> This will go into QEMU 1.8.  Since it's a new feature that touches core
> block layer code and several block drivers, I can't merge it into
> 1.7-rc.
 
 Yay!  With these patches I can properly emulate WRITE SAME in the SCSI
 layer.
>>> 
>>> Yes, you can, but only when write same is used to write zeroes. The other
>>> case is still unhandled or needs emulation.
>> 
>> WRITE SAME with UNMAP is incorrect in the current QEMU code.  Your patch
>> let me fix it so that the payload can be read back with no change, *and*
>> sectors are unmapped when possible.
>> 
>> Paolo
>> 




[Qemu-devel] [Bug 1243287] Re: [KVM/QEMU][ARM][SAUCY] fails to boot cloud-image due to host kvm fail

2013-11-08 Thread Peter Maydell
(1) If you are trying to mmap /dev/mem then you are going to be
accessing physical addresses. This is obviously totally board specific.
In particular the memory layout between a Midway system and a KVM
virtual machine is likely to be different (you don't say what QEMU
configuration you're using) so it is unsurprising that looking at
/dev/mem address zero gives different effects between the host and the
VM. If dmidecode wants to access physical address space it needs to be
aware of the physical memory layout of every board it might run on (and
that is an enormous range of systems for ARM).

(2) The guest kernel has sufficient privilege to cause kvm to exit (the
easiest way it can do this is to do something that KVM doesn't support,
such as trying to access emulated devices via LDM/STM or some other
"complex" instruction). If you let your guest userspace open /dev/mem it
has equivalent privilege and can do the same thing (again, by trying LDM
on device MMIO space)

(3) My kernel sources don't have any references to CONFIG_STRICT_MEM,
and the only two google hits are references back to this bug report, so
I'm not sure what this is.

My best guess is that there are two underlying bugs here:
a. CONFIG_STRICT_MEM (whatever that is) either doesn't work with ARM or doesn't 
work with KVM or doesn't work with KVM/ARM
b. dmidecode is making bad assumptions about the physical memory layout of the 
machine it is running on and needs to be fixed somehow (what does it think is 
the actual data it is copying out of physical memory??)

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1243287

Title:
  [KVM/QEMU][ARM][SAUCY] fails to boot cloud-image due to host kvm fail

Status in QEMU:
  Confirmed

Bug description:
  On booting the cloud image using qemu/kvm fails with the following
  error:

  Cloud-init v. 0.7.3 running 'init' at Thu, 03 Oct 2013 16:45:21 +. Up 
360.78 seconds.
  ci-info: +Net device info+
  ci-info: ++--+---+---+---+
  ci-info: | Device | Up | Address | Mask | Hw-Address |
  ci-info: ++--+---+---+---+
  ci-info: | lo | True | 127.0.0.1 | 255.0.0.0 | . |
  ci-info: | eth0 | True | 10.0.2.15 | 255.255.255.0 | 52:54:00:12:34:56 |
  ci-info: ++--+---+---+---+
  ci-info: ++Route 
info++
  ci-info: 
+---+-+--+---+---+---+
  ci-info: | Route | Destination | Gateway | Genmask | Interface | Flags |
  ci-info: 
+---+-+--+---+---+---+
  ci-info: | 0 | 0.0.0.0 | 10.0.2.2 | 0.0.0.0 | eth0 | UG |
  ci-info: | 1 | 10.0.2.0 | 0.0.0.0 | 255.255.255.0 | eth0 | U |
  ci-info: 
+---+-+--+---+---+---+
  error: kvm run failed Function not implemented

  /usr/lib/python2.7/dist-
  packages/cloudinit/sources/DataSourceAltCloud.py assumes that
  dmidecode command is availabe (ie it assumes that system is x86) on
  arm systems there is no dmidecode command so host kvm fails with the
  message "error: kvm run failed Function not implemented"

  The patch makes get_cloud_type() function return with UNKNOWN for ARM
  systems.

  I was able to boot the cloud-image on ARM after applying this change.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1243287/+subscriptions



Re: [Qemu-devel] [PATCHv7 00/17] block: logical block provisioning enhancements

2013-11-08 Thread ronnie sahlberg
It would mean that any version of libiscsi from 1.1 or later would
work   and there would not be the issues such as
UNMAP is only available from 1.2 and forward,   WRITESAME* is only
availabel from 1.3.
SANITIZE only being available from 1.9 ...


On Fri, Nov 8, 2013 at 9:52 AM, ronnie sahlberg
 wrote:
> For better support for older versions of libiscsi
> I think it would be good to convert all the
> iscsi_unmap_task/iscsi_writesame*_task/iscsi_*_task   functions with
> calls to the much more genric iscsi_scsi_command_sync().
>
> iscsi_scsi_command_sync() and iscsi_scsi_command_async() have been
> available since prior to version 1.1 and can be used to send arbitrary
> scsi opcodes to the target.
>
>
> iscsi_scsi_command_async() is already used instead of
> iscs_read16/write16_async() since the read16/write16 helpers were
> added to libiscsi at a much later stage and there are examples of its
> use.
>
>
> Using iscsi_scsi_command_[a]sync() instead of for example
> iscsi_unmap_task() would mean that you can use the UNMAP opcode
> always, regardless version of libiscsi.
> It would mean that you need to build the CDB directly inside
> block/iscsi.c but that is not hard.
>
>
> I.e.  change block/iscsi.c to only use iscsi_scsi_command_[a]sync()
> for all opcodes it wants to send to the target.
>
>
>
>
> On Wed, Nov 6, 2013 at 1:16 PM, Paolo Bonzini  wrote:
>> Il 06/11/2013 20:38, Peter Lieven ha scritto:
>>>
>>> Am 06.11.2013 um 14:09 schrieb Paolo Bonzini :
>>>
 Il 06/11/2013 14:08, Stefan Hajnoczi ha scritto:
> Thanks, applied to my block-next tree:
> https://github.com/stefanha/qemu/commits/block-next
>
> This will go into QEMU 1.8.  Since it's a new feature that touches core
> block layer code and several block drivers, I can't merge it into
> 1.7-rc.

 Yay!  With these patches I can properly emulate WRITE SAME in the SCSI
 layer.
>>>
>>> Yes, you can, but only when write same is used to write zeroes. The other
>>> case is still unhandled or needs emulation.
>>
>> WRITE SAME with UNMAP is incorrect in the current QEMU code.  Your patch
>> let me fix it so that the payload can be read back with no change, *and*
>> sectors are unmapped when possible.
>>
>> Paolo
>>



Re: [Qemu-devel] [PATCHv7 00/17] block: logical block provisioning enhancements

2013-11-08 Thread ronnie sahlberg
For better support for older versions of libiscsi
I think it would be good to convert all the
iscsi_unmap_task/iscsi_writesame*_task/iscsi_*_task   functions with
calls to the much more genric iscsi_scsi_command_sync().

iscsi_scsi_command_sync() and iscsi_scsi_command_async() have been
available since prior to version 1.1 and can be used to send arbitrary
scsi opcodes to the target.


iscsi_scsi_command_async() is already used instead of
iscs_read16/write16_async() since the read16/write16 helpers were
added to libiscsi at a much later stage and there are examples of its
use.


Using iscsi_scsi_command_[a]sync() instead of for example
iscsi_unmap_task() would mean that you can use the UNMAP opcode
always, regardless version of libiscsi.
It would mean that you need to build the CDB directly inside
block/iscsi.c but that is not hard.


I.e.  change block/iscsi.c to only use iscsi_scsi_command_[a]sync()
for all opcodes it wants to send to the target.




On Wed, Nov 6, 2013 at 1:16 PM, Paolo Bonzini  wrote:
> Il 06/11/2013 20:38, Peter Lieven ha scritto:
>>
>> Am 06.11.2013 um 14:09 schrieb Paolo Bonzini :
>>
>>> Il 06/11/2013 14:08, Stefan Hajnoczi ha scritto:
 Thanks, applied to my block-next tree:
 https://github.com/stefanha/qemu/commits/block-next

 This will go into QEMU 1.8.  Since it's a new feature that touches core
 block layer code and several block drivers, I can't merge it into
 1.7-rc.
>>>
>>> Yay!  With these patches I can properly emulate WRITE SAME in the SCSI
>>> layer.
>>
>> Yes, you can, but only when write same is used to write zeroes. The other
>> case is still unhandled or needs emulation.
>
> WRITE SAME with UNMAP is incorrect in the current QEMU code.  Your patch
> let me fix it so that the payload can be read back with no change, *and*
> sectors are unmapped when possible.
>
> Paolo
>



[Qemu-devel] [Bug 1243287] Re: [KVM/QEMU][ARM][SAUCY] fails to boot cloud-image due to host kvm fail

2013-11-08 Thread Manoj Iyer
** Changed in: qemu
   Status: New => Confirmed

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1243287

Title:
  [KVM/QEMU][ARM][SAUCY] fails to boot cloud-image due to host kvm fail

Status in QEMU:
  Confirmed

Bug description:
  On booting the cloud image using qemu/kvm fails with the following
  error:

  Cloud-init v. 0.7.3 running 'init' at Thu, 03 Oct 2013 16:45:21 +. Up 
360.78 seconds.
  ci-info: +Net device info+
  ci-info: ++--+---+---+---+
  ci-info: | Device | Up | Address | Mask | Hw-Address |
  ci-info: ++--+---+---+---+
  ci-info: | lo | True | 127.0.0.1 | 255.0.0.0 | . |
  ci-info: | eth0 | True | 10.0.2.15 | 255.255.255.0 | 52:54:00:12:34:56 |
  ci-info: ++--+---+---+---+
  ci-info: ++Route 
info++
  ci-info: 
+---+-+--+---+---+---+
  ci-info: | Route | Destination | Gateway | Genmask | Interface | Flags |
  ci-info: 
+---+-+--+---+---+---+
  ci-info: | 0 | 0.0.0.0 | 10.0.2.2 | 0.0.0.0 | eth0 | UG |
  ci-info: | 1 | 10.0.2.0 | 0.0.0.0 | 255.255.255.0 | eth0 | U |
  ci-info: 
+---+-+--+---+---+---+
  error: kvm run failed Function not implemented

  /usr/lib/python2.7/dist-
  packages/cloudinit/sources/DataSourceAltCloud.py assumes that
  dmidecode command is availabe (ie it assumes that system is x86) on
  arm systems there is no dmidecode command so host kvm fails with the
  message "error: kvm run failed Function not implemented"

  The patch makes get_cloud_type() function return with UNKNOWN for ARM
  systems.

  I was able to boot the cloud-image on ARM after applying this change.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1243287/+subscriptions



Re: [Qemu-devel] [PATCH] virtio-net: Correctly store multicast filter entries

2013-11-08 Thread Vlad Yasevich

On 11/08/2013 11:07 AM, Vlad Yasevich wrote:

Commit 921ac5d0f3a0df869db5ce4edf752f51d8b1596a
 virtio-net: remove layout assumptions for ctrl vq
introduced a regression where the multicast address filter
entries are written to the beginning of the mac table array,
thus overwriting any unicast addresses that may have been
programmed in the filter.
The multicast addresses should be written after all the
unicast addresses.

Signed-off-by: Vlad Yasevich 


Please ignore.  Just saw pull pull request with similar patch.

-vlad


---
  hw/net/virtio-net.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 22dbd05..94e8b68 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -657,7 +657,8 @@ static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
  }

  if (n->mac_table.in_use + mac_data.entries <= MAC_TABLE_ENTRIES) {
-s = iov_to_buf(iov, iov_cnt, 0, n->mac_table.macs,
+s = iov_to_buf(iov, iov_cnt, 0,
+   n->mac_table.macs + (n->mac_table.in_use * ETH_ALEN),
 mac_data.entries * ETH_ALEN);
  if (s != mac_data.entries * ETH_ALEN) {
  goto error;






Re: [Qemu-devel] [RFC PATCH] i386: Add _PXM method to ACPI CPU objects

2013-11-08 Thread Igor Mammedov
On Fri, 8 Nov 2013 12:22:12 +0200
Vasilis Liaskovitis  wrote:

> Hi,
> 
> On Thu, Nov 07, 2013 at 03:03:42PM +0200, Michael S. Tsirkin wrote:
> > On Thu, Nov 07, 2013 at 01:41:59PM +0100, Vasilis Liaskovitis wrote:
> > > This patch adds a _PXM method to ACPI CPU objects for the pc machine. The 
> > > _PXM
> > > value is derived from the passed in guest info, same way as CPU SRAT 
> > > entries.
> > > 
> > > The motivation for this patch is a CPU hot-unplug/hot-plug bug observed 
> > > when
> > > using a 3.11 linux guest kernel on a multi-NUMA node qemu/kvm VM. The 
> > > linux
> > > guest kernel parses the SRAT CPU entries at boot time and stores them in 
> > > the
> > > array __apicid_to_node. When a CPU is hot-removed, the linux guest kernel
> > > resets the removed CPU's __apicid_to_node entry to NO_NUMA_NODE (kernel 
> > > commit
> > > c4c60524). When the removed cpu is hot-added again, the linux kernel 
> > > looks up
> > > the hot-added cpu object's _PXM method instead of somehow re-discovering 
> > > the
> > > SRAT entry info. With current qemu/seabios, the _PXM method is not found, 
> > > and
> > > the CPU is thus hot-plugged in the default NUMA node 0. (The problem does 
> > > not
> > > show up on initial hotplug of a cpu; the PXM method is still not found in 
> > > this
> > > case, but the kernel still has the correct proximity value from the CPU's 
> > > SRAT
> > > entry stored in __apicid_to_node)
> > > 
> > > ACPI spec mentions that the _PXM method is the correct way to determine
> > > proximity information at hot-add time.
> > 
> > Where does it say this?
> > I found this:
> > If the Local APIC ID / Local SAPIC ID / Local x2APIC ID of a dynamically
> > added processor is not present in the System Resource Affinity Table
> > (SRAT), a _PXM object must exist for the processor’s device or one of
> > its ancestors in the ACPI Namespace.
> > 
> > Does this mean that linux is buggy, and should be fixed up to look up
> > the apic ID in SRAT?
> 
> The quote above suggests that if SRAT is absent, _PXM should be present.
> Seabios/qemu provide SRAT entries, and  no _PXM. The fact that the kernel
> resets the parse SRAT info on hot-remove time looks like a kernel problem.
> 
> But As Toshi Kani mentioned in the original thread, here is a quote from ACPI
> 5.0, stating _PXM and only _PXM should be used at hot-plug time:
> 
> ===
> 17.2.1 System Resource Affinity Table Definition
> 
> This optional System Resource Affinity Table (SRAT) provides the boot
> time description of the processor and memory ranges belonging to a
> system locality. OSPM will consume the SRAT only at boot time. OSPM
> should use _PXM for any devices that are hot-added into the system after
> boot up.
> 
> 
> So in this sense, the kernel is correct (kernel only uses _PXM at hot-plug 
> time)
> , and qemu/Seabios should have _PXM methods for hot operations.

in terms of RFC SHOULD doesn't mean MUST, and in my interpretation of above is
that SRAT parsed once but it doesn't mean that OS should forget data from it.
Anyway we surely can have both in QEMU.

> 
> > 
> > > So far, qemu/seabios do not provide this
> > > method for CPUs. So regardless of kernel behaviour, it is a good idea to 
> > > add
> > > this _PXM method. Since ACPI table generation has recently been moved from
> > > seabios to qemu, we do this in qemu.
> > > 
> > > Note that the above hot-remove/hot-add scenario has been tested on an 
> > > older
> > > qemu + non-upstreamed patches for cpu hot-removal support, and not on qemu
> > > master (since cpu-del support is still not on master). The only testing 
> > > done
> > > with qemu/seabios master and this patch, are successful boots of 
> > > multi-node
> > > linux and windows8 guests.
> > > 
> > > For the initial discussion on seabios and linux-acpi lists see
> > > http://www.spinics.net/lists/linux-acpi/msg47058.html
> > > 
> > > Signed-off-by: Vasilis Liaskovitis 
> > > Reviewed-by: Thilo Fromm 
> > 
> > Even if this is a linux bug, I have no issue with working around
> > it in qemu.
> > 
> > But I think proper testing needs to be done with rebased upport for cpu-del.
> 
> Ok, I can try to rebase cpu-del support for testing. If there are cpu-del bits
> already somewhere (Igor?) and not merged yet, please point me to them.
> 
> > 
> > > ---
> > >  hw/i386/acpi-build.c  |2 ++
> > >  hw/i386/ssdt-proc.dsl |2 ++
> > >  2 files changed, 4 insertions(+)
> > > 
> > > diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> > > index 6cfa044..9373f5e 100644
> > > --- a/hw/i386/acpi-build.c
> > > +++ b/hw/i386/acpi-build.c
> > > @@ -603,6 +603,7 @@ static inline char acpi_get_hex(uint32_t val)
> > >  #define ACPI_PROC_OFFSET_CPUHEX (*ssdt_proc_name - *ssdt_proc_start + 2)
> > >  #define ACPI_PROC_OFFSET_CPUID1 (*ssdt_proc_name - *ssdt_proc_start + 4)
> > >  #define ACPI_PROC_OFFSET_CPUID2 (*ssdt_proc_id - *ssdt_proc_start)
> > > +#define ACPI_PROC_OFFSET_CPUPXM (*ssdt_proc_pxm - *ssdt_proc_start)
> > >  #define AC

Re: [Qemu-devel] [PATCH] exec: fix regression by making system-memory region UINT64_MAX size

2013-11-08 Thread Laszlo Ersek
On 11/08/13 18:15, Paolo Bonzini wrote:
> Il 08/11/2013 18:09, Andreas Färber ha scritto:
>> I don't have personal experience with using external files there yet,
>> but I was hoping that using -pflash pc-bios/bios.bin would just work
>> since that'll be symlinked for execution from build directory iiuc.
>>
>> My thinking was the test could then verify that the BIOS does not read
>> as all 0xff, whereas Paolo's suggestion sounds more elaborate, ruling
>> out actual 0xff within SeaBIOS by having a positive pattern to check for.
> 
> Yeah, that's also a good test and easier!

Believe it or not, I did think of both 0x00..0xFF (from a small static
file) and using an actual bios image :) I think I'd prefer 0x00..0xFF
(with a temporary file as Paolo suggested) because "what is it" looks
more attractive than "what is it not" to me.

Also, opening the packaged bios binary from the build dir would need
extra symlinking again; mkstemp() seems cleaner.

Thanks!
Laszlo



Re: [Qemu-devel] [PATCH V4 5/5] qemu-iotests: add test for snapshot in qemu-img convert

2013-11-08 Thread Jeff Cody
On Fri, Oct 11, 2013 at 10:33:31AM +0800, Wenchao Xia wrote:
> Signed-off-by: Wenchao Xia 
> ---
>  tests/qemu-iotests/058 |   19 ++-
>  tests/qemu-iotests/058.out |   12 
>  2 files changed, 30 insertions(+), 1 deletions(-)
> 
> diff --git a/tests/qemu-iotests/058 b/tests/qemu-iotests/058
> index 5b821cf..d4987ae 100755
> --- a/tests/qemu-iotests/058
> +++ b/tests/qemu-iotests/058
> @@ -1,6 +1,6 @@
>  #!/bin/bash
>  #
> -# Test export internal snapshot by qemu-nbd.
> +# Test export internal snapshot by qemu-nbd, convert it by qemu-img.
>  #
>  # Copyright (C) 2013 IBM, Inc.
>  #
> @@ -33,6 +33,8 @@ status=1# failure is the default!
>  nbd_snapshot_port=10850
>  nbd_snapshot_img="nbd:127.0.0.1:$nbd_snapshot_port"
>  
> +converted_image=$TEST_IMG.converted
> +
>  _export_nbd_snapshot()
>  {
>  $QEMU_NBD -v -t -b 127.0.0.1 -p $nbd_snapshot_port "$TEST_IMG" -l $1 &
> @@ -53,6 +55,7 @@ _cleanup()
>  kill $NBD_SNAPSHOT_PID
>  fi
>  _cleanup_test_img
> +rm -f $converted_image

Please quote $converted_image (especially with rm -f) - it is also
used unquoted later on in this file, as well.

>  }
>  trap "_cleanup; exit \$status" 0 1 2 3 15
>  
> @@ -96,6 +99,20 @@ echo "== verifying the exported snapshot with patterns =="
>  $QEMU_IO -c 'read -P 0xa 0x1000 0x1000' $nbd_snapshot_img | _filter_qemu_io
>  $QEMU_IO -c 'read -P 0xb 0x2000 0x1000' $nbd_snapshot_img | _filter_qemu_io
>  
> +$QEMU_IMG convert $TEST_IMG -l sn1 -O qcow2 $converted_image

$TEST_IMG needs quoting here, and again below

> +
> +echo
> +echo "== verifying the converted snapshot with patterns =="
> +$QEMU_IO -c 'read -P 0xa 0x1000 0x1000' $converted_image | _filter_qemu_io
> +$QEMU_IO -c 'read -P 0xb 0x2000 0x1000' $converted_image | _filter_qemu_io
> +
> +$QEMU_IMG convert $TEST_IMG -l snapshot.name=sn1 -O qcow2 $converted_image
> +
> +echo
> +echo "== verifying the converted snapshot with patterns =="
> +$QEMU_IO -c 'read -P 0xa 0x1000 0x1000' $converted_image | _filter_qemu_io
> +$QEMU_IO -c 'read -P 0xb 0x2000 0x1000' $converted_image | _filter_qemu_io
> +
>  # success, all done
>  echo "*** done"
>  rm -f $seq.full
> diff --git a/tests/qemu-iotests/058.out b/tests/qemu-iotests/058.out
> index cc4b8ca..a8381b9 100644
> --- a/tests/qemu-iotests/058.out
> +++ b/tests/qemu-iotests/058.out
> @@ -29,4 +29,16 @@ read 4096/4096 bytes at offset 4096
>  4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
>  read 4096/4096 bytes at offset 8192
>  4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
> +
> +== verifying the converted snapshot with patterns ==
> +read 4096/4096 bytes at offset 4096
> +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
> +read 4096/4096 bytes at offset 8192
> +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
> +
> +== verifying the converted snapshot with patterns ==
> +read 4096/4096 bytes at offset 4096
> +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
> +read 4096/4096 bytes at offset 8192
> +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
>  *** done
> -- 
> 1.7.1
> 
> 



Re: [Qemu-devel] [PATCH] exec: fix regression by making system-memory region UINT64_MAX size

2013-11-08 Thread Paolo Bonzini
Il 08/11/2013 18:09, Andreas Färber ha scritto:
> I don't have personal experience with using external files there yet,
> but I was hoping that using -pflash pc-bios/bios.bin would just work
> since that'll be symlinked for execution from build directory iiuc.
> 
> My thinking was the test could then verify that the BIOS does not read
> as all 0xff, whereas Paolo's suggestion sounds more elaborate, ruling
> out actual 0xff within SeaBIOS by having a positive pattern to check for.

Yeah, that's also a good test and easier!

Paolo



Re: [Qemu-devel] [PATCH V4 3/5] qemu-iotests: add 058 internal snapshot export with qemu-nbd case

2013-11-08 Thread Jeff Cody
On Fri, Oct 11, 2013 at 10:33:29AM +0800, Wenchao Xia wrote:
> Signed-off-by: Wenchao Xia 
> ---
>  tests/qemu-iotests/058 |  102 
> 
>  tests/qemu-iotests/058.out |   32 ++
>  tests/qemu-iotests/check   |1 +
>  tests/qemu-iotests/group   |1 +
>  4 files changed, 136 insertions(+), 0 deletions(-)
>  create mode 100755 tests/qemu-iotests/058
>  create mode 100644 tests/qemu-iotests/058.out
> 
> diff --git a/tests/qemu-iotests/058 b/tests/qemu-iotests/058
> new file mode 100755
> index 000..5b821cf
> --- /dev/null
> +++ b/tests/qemu-iotests/058
> @@ -0,0 +1,102 @@
> +#!/bin/bash
> +#
> +# Test export internal snapshot by qemu-nbd.
> +#
> +# Copyright (C) 2013 IBM, Inc.
> +#
> +# Based on 029.
> +#
> +# This program is free software; you can redistribute it and/or modify
> +# it under the terms of the GNU General Public License as published by
> +# the Free Software Foundation; either version 2 of the License, or
> +# (at your option) any later version.
> +#
> +# This program is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License
> +# along with this program.  If not, see .
> +#
> +
> +# creator
> +owner=xiaw...@linux.vnet.ibm.com
> +
> +seq=`basename $0`
> +echo "QA output created by $seq"
> +
> +here=`pwd`
> +tmp=/tmp/$$
> +status=1 # failure is the default!
> +
> +nbd_snapshot_port=10850
> +nbd_snapshot_img="nbd:127.0.0.1:$nbd_snapshot_port"
> +
> +_export_nbd_snapshot()
> +{
> +$QEMU_NBD -v -t -b 127.0.0.1 -p $nbd_snapshot_port "$TEST_IMG" -l $1 &
> +NBD_SNAPSHOT_PID=$!
> +sleep 1
> +}
> +
> +_export_nbd_snapshot1()
> +{
> +$QEMU_NBD -v -t -b 127.0.0.1 -p $nbd_snapshot_port $TEST_IMG -l 
> snapshot.name=$1 &

Please quote $TEST_IMG, so that it works with spaced pathnames.

> +NBD_SNAPSHOT_PID=$!
> +sleep 1
> +}
> +
> +_cleanup()
> +{
> +if [ -n "$NBD_SNAPSHOT_PID" ]; then
> +kill $NBD_SNAPSHOT_PID
> +fi
> +_cleanup_test_img
> +}
> +trap "_cleanup; exit \$status" 0 1 2 3 15
> +
> +# get standard environment, filters and checks
> +. ./common.rc
> +. ./common.filter
> +. ./common.pattern
> +
> +_supported_fmt qcow2
> +_supported_proto generic
> +
> +echo
> +echo "== preparing image =="
> +_make_test_img 64M
> +$QEMU_IO -c 'write -P 0xa 0x1000 0x1000' $TEST_IMG | _filter_qemu_io
> +$QEMU_IO -c 'write -P 0xb 0x2000 0x1000' $TEST_IMG | _filter_qemu_io
> +$QEMU_IMG snapshot -c sn1 $TEST_IMG
> +$QEMU_IO -c 'write -P 0xc 0x1000 0x1000' $TEST_IMG | _filter_qemu_io
> +$QEMU_IO -c 'write -P 0xd 0x2000 0x1000' $TEST_IMG | _filter_qemu_io
> +_check_test_img
> +
> +echo
> +echo "== verifying the image file with patterns =="
> +$QEMU_IO -c 'read -P 0xc 0x1000 0x1000' $TEST_IMG | _filter_qemu_io
> +$QEMU_IO -c 'read -P 0xd 0x2000 0x1000' $TEST_IMG | _filter_qemu_io

More quoting needing ($TEST_IMG again)

> +
> +_export_nbd_snapshot sn1
> +
> +echo
> +echo "== verifying the exported snapshot with patterns =="
> +$QEMU_IO -c 'read -P 0xa 0x1000 0x1000' $nbd_snapshot_img | _filter_qemu_io
> +$QEMU_IO -c 'read -P 0xb 0x2000 0x1000' $nbd_snapshot_img | _filter_qemu_io
> +
> +kill $NBD_SNAPSHOT_PID
> +sleep 1
> +
> +_export_nbd_snapshot1 sn1
> +
> +echo
> +echo "== verifying the exported snapshot with patterns =="
> +$QEMU_IO -c 'read -P 0xa 0x1000 0x1000' $nbd_snapshot_img | _filter_qemu_io
> +$QEMU_IO -c 'read -P 0xb 0x2000 0x1000' $nbd_snapshot_img | _filter_qemu_io
> +
> +# success, all done
> +echo "*** done"
> +rm -f $seq.full
> +status=0
> diff --git a/tests/qemu-iotests/058.out b/tests/qemu-iotests/058.out
> new file mode 100644
> index 000..cc4b8ca
> --- /dev/null
> +++ b/tests/qemu-iotests/058.out
> @@ -0,0 +1,32 @@
> +QA output created by 058
> +
> +== preparing image ==
> +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
> +wrote 4096/4096 bytes at offset 4096
> +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
> +wrote 4096/4096 bytes at offset 8192
> +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
> +wrote 4096/4096 bytes at offset 4096
> +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
> +wrote 4096/4096 bytes at offset 8192
> +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
> +No errors were found on the image.
> +
> +== verifying the image file with patterns ==
> +read 4096/4096 bytes at offset 4096
> +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
> +read 4096/4096 bytes at offset 8192
> +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
> +
> +== verifying the exported snapshot with patterns ==
> +read 4096/4096 bytes at offset 4096
> +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
> +read 4096/4096 bytes at offset 8192
> +4 KiB, X ops; XX:XX:XX.X (

Re: [Qemu-devel] [PATCH] exec: fix regression by making system-memory region UINT64_MAX size

2013-11-08 Thread Andreas Färber
Am 08.11.2013 17:19, schrieb Laszlo Ersek:
> On 11/08/13 16:42, Andreas Färber wrote:
>> Jordan or Laszlo,
>>
>> Can either of you please add a small test case to i440fx-test using
>> -pflash and doing a read in the PCI hole (or wherever exactly) so that
>> we can avoid regressing yet again? :)
> 
> For -pflash we need a small test file. I'm thinking about creating a 512
> byte (1 sector) big file, and modifying the qemu command line in
> "tests/i440fx-test.c".
> 
> I'm not very familiar with external files in tests though. Can I model
> it on "qdict-test-data.txt"?
> 
> "qdict-test-data.txt" is located in the root source directory. When
> configure runs outside the root source directory (= separate build dir),
> it symlinks it. And, the "check-qdict.c" test program opens it (with
> fopen()) simply by basename (no path prefix). Can I follow that?

I don't have personal experience with using external files there yet,
but I was hoping that using -pflash pc-bios/bios.bin would just work
since that'll be symlinked for execution from build directory iiuc.

My thinking was the test could then verify that the BIOS does not read
as all 0xff, whereas Paolo's suggestion sounds more elaborate, ruling
out actual 0xff within SeaBIOS by having a positive pattern to check for.

Regards,
Andreas

-- 
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg



Re: [Qemu-devel] [PATCH v2] target-ppc: move POWER7+ to a separate family

2013-11-08 Thread Andreas Färber
Am 08.11.2013 15:54, schrieb Alexey Kardashevskiy:
> On 11/09/2013 12:44 AM, Andreas Färber wrote:
>> Am 08.11.2013 03:37, schrieb Alexey Kardashevskiy:
>>> So far POWER7+ was a part of POWER7 family. However it has a different
>>> PVR base value so in order to support PVR masks, it needs a separate
>>> family class.
>>>
>>
>> Alexey,
>>
>>> Another reason to make a POWER7+ family is that its name in the device
>>> tree (/proc/device-tree/cpus/cpu*) should be "Power7+" but not "Power7"
>>> and this cannot be easily fixed without a new family class.
>>>
>>> This adds a new family class, PVR base and mask values and moves
>>> Power7+ v2.1 CPU to a new family. The class init function is copied
>>> from the POWER7 family.
>>>
>>> Signed-off-by: Alexey Kardashevskiy 
>>> ---
>>> Changes:
>>> v2:
>>> * added VSX enable bit
>>> ---
>>>  target-ppc/cpu-models.c |  2 +-
>>>  target-ppc/cpu-models.h |  2 ++
>>>  target-ppc/translate_init.c | 38 ++
>>>  3 files changed, 41 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/target-ppc/cpu-models.c b/target-ppc/cpu-models.c
>>> index 04d88c5..7c9466f 100644
>>> --- a/target-ppc/cpu-models.c
>>> +++ b/target-ppc/cpu-models.c
>>> @@ -1140,7 +1140,7 @@
>>>  "POWER7 v2.1")
>>>  POWERPC_DEF("POWER7_v2.3",   CPU_POWERPC_POWER7_v23, 
>>> POWER7,
>>>  "POWER7 v2.3")
>>> -POWERPC_DEF("POWER7+_v2.1",  CPU_POWERPC_POWER7P_v21,
>>> POWER7,
>>> +POWERPC_DEF("POWER7+_v2.1",  CPU_POWERPC_POWER7P_v21,
>>> POWER7P,
>>>  "POWER7+ v2.1")
>>>  POWERPC_DEF("POWER8_v1.0",   CPU_POWERPC_POWER8_v10, 
>>> POWER8,
>>>  "POWER8 v1.0")
>>> diff --git a/target-ppc/cpu-models.h b/target-ppc/cpu-models.h
>>> index 731ec4a..49ba4a4 100644
>>> --- a/target-ppc/cpu-models.h
>>> +++ b/target-ppc/cpu-models.h
>>> @@ -558,6 +558,8 @@ enum {
>>>  CPU_POWERPC_POWER7_v20 = 0x003F0200,
>>>  CPU_POWERPC_POWER7_v21 = 0x003F0201,
>>>  CPU_POWERPC_POWER7_v23 = 0x003F0203,
>>> +CPU_POWERPC_POWER7P_BASE   = 0x004A,
>>> +CPU_POWERPC_POWER7P_MASK   = 0x,
>>>  CPU_POWERPC_POWER7P_v21= 0x004A0201,
>>>  CPU_POWERPC_POWER8_BASE= 0x004B,
>>>  CPU_POWERPC_POWER8_MASK= 0x,
>>> diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
>>> index 35d1389..c030a20 100644
>>> --- a/target-ppc/translate_init.c
>>> +++ b/target-ppc/translate_init.c
>>> @@ -7253,6 +7253,44 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc, void *data)
>>>  pcc->l1_icache_size = 0x8000;
>>>  }
>>>  
>>> +POWERPC_FAMILY(POWER7P)(ObjectClass *oc, void *data)
>>> +{
>>> +DeviceClass *dc = DEVICE_CLASS(oc);
>>> +PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
>>> +
>>> +dc->fw_name = "PowerPC,POWER7+";
>>
>> Apart from the commit message differing from the code...
> 
> 
> In what part?

The spelling of POWER7. You write it should be "Power7+" but implement
it as upper-case "POWER7+" (ignoring the "PowerPC," prefix, that is).

>> We've had this discussion before: Jacques reported that on his POWER7+
>> box only "POWER7" is shown, not "POWER7+", equivalent to my POWER5+ box
>> showing only "PowerPC,POWER5". Compare my commit, which documents this:
>>
>> http://git.qemu.org/?p=qemu.git;a=commit;h=793826cd460828975591f289de78672af4a47ef9
>>
>> So, adding a POWER7P family seems correct to me, just the fw_name seems
>> wrong - or you'll need to investigate further why there are conflicting
>> reports of how it is shown. Possibly based on revision or pHyp vs. SLOF?
> 
> 
> Yes we have had this discussion. Paul said it should "POWER7+". The only
> P7+ machine I have handy shows "+":
> 
> [aik@vpl4 ~]$ ls -d /proc/device-tree/cpus/PowerPC*
> /proc/device-tree/cpus/PowerPC,POWER7+@0
> /proc/device-tree/cpus/PowerPC,POWER7+@2c
> /proc/device-tree/cpus/PowerPC,POWER7+@10
> /proc/device-tree/cpus/PowerPC,POWER7+@30
> /proc/device-tree/cpus/PowerPC,POWER7+@14
> /proc/device-tree/cpus/PowerPC,POWER7+@34
> /proc/device-tree/cpus/PowerPC,POWER7+@18
> /proc/device-tree/cpus/PowerPC,POWER7+@38
> /proc/device-tree/cpus/PowerPC,POWER7+@1c
> /proc/device-tree/cpus/PowerPC,POWER7+@3c
> /proc/device-tree/cpus/PowerPC,POWER7+@20
> /proc/device-tree/cpus/PowerPC,POWER7+@4
> /proc/device-tree/cpus/PowerPC,POWER7+@24
> /proc/device-tree/cpus/PowerPC,POWER7+@8
> /proc/device-tree/cpus/PowerPC,POWER7+@28
> /proc/device-tree/cpus/PowerPC,POWER7+@c
> 
> And this is a host, not a guest. I do not see any good reason to make dt
> names different.
> 
> And this does not really matter if there is "+" or not for anybody as far
> as we concerned, ppc64_cpu works either way.

Right, it may not matter, but I expect you to reference the above commit
id and explain why it should be POWER7+ after all. You failed to come up
with that answer before that patch got applied, so we need to correct
me/it no

Re: [Qemu-devel] [PATCH 1/7] usb: remove old usb-host code

2013-11-08 Thread Jan Kiszka
On 2013-11-08 16:39, Gerd Hoffmann wrote:
>   Hi,
> 
>> OK, then here is the first issue I ran into while trying libusbx (git
>> head, i.e. 1.0.17+: The new stack causes significant latency issues that
>> makes it almost unusable for pass-through of USB audio devices (some
>> headset in my case). Reverting usb-linux and disabling libusb over QEMU
>> git head makes things work again. I'll have to stick with this for now
>> as it is affecting my work environment.
>>
>> Any spontaneous ideas how to analyse or even resolve this?
> 
> Try setting isobsize property to something smaller than 32 (which is the
> default).

OK, isobsize=2 and isobufs=32 helped, possibly other combinations as
well - but not just reducing isobsize or increasing isobufs. Any theory
about this? How can we find better defaults?

Jan

-- 
Siemens AG, Corporate Technology, CT RTC ITP SES-DE
Corporate Competence Center Embedded Linux



[Qemu-devel] [PULL 1/3] net: disallow to specify multicast MAC address

2013-11-08 Thread Stefan Hajnoczi
From: Dmitry Krivenok 

[Assigning a multicast MAC address to a NIC leads to confusing behavior.
Reject multicast MAC addresses so users are alerted to their error
straight away.

The "net/eth.h" in6_addr rename prevents a name collision with
 on Linux.
-- Stefan]

Signed-off-by: Dmitry V. Krivenok 
Reviewed-by: Amos Kong 
Signed-off-by: Stefan Hajnoczi 
---
 include/net/eth.h | 6 +++---
 net/net.c | 6 ++
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/include/net/eth.h b/include/net/eth.h
index 1d48e06..b3273b8 100644
--- a/include/net/eth.h
+++ b/include/net/eth.h
@@ -84,7 +84,7 @@ typedef struct ip_pseudo_header {
 } ip_pseudo_header;
 
 /* IPv6 address */
-struct in6_addr {
+struct in6_address {
 union {
 uint8_t __u6_addr8[16];
 } __in6_u;
@@ -105,8 +105,8 @@ struct ip6_header {
 uint8_t  ip6_un3_ecn;  /* 2 bits ECN, top 6 bits payload length */
 } ip6_un3;
 } ip6_ctlun;
-struct in6_addr ip6_src; /* source address */
-struct in6_addr ip6_dst; /* destination address */
+struct in6_address ip6_src;/* source address */
+struct in6_address ip6_dst;/* destination address */
 };
 
 struct ip6_ext_hdr {
diff --git a/net/net.c b/net/net.c
index c330c9a..870d3bb 100644
--- a/net/net.c
+++ b/net/net.c
@@ -27,6 +27,7 @@
 #include "clients.h"
 #include "hub.h"
 #include "net/slirp.h"
+#include "net/eth.h"
 #include "util.h"
 
 #include "monitor/monitor.h"
@@ -689,6 +690,11 @@ static int net_init_nic(const NetClientOptions *opts, 
const char *name,
 error_report("invalid syntax for ethernet address");
 return -1;
 }
+if (nic->has_macaddr &&
+is_multicast_ether_addr(nd->macaddr.a)) {
+error_report("NIC cannot have multicast MAC address (odd 1st byte)");
+return -1;
+}
 qemu_macaddr_default_if_unset(&nd->macaddr);
 
 if (nic->has_vectors) {
-- 
1.8.3.1




[Qemu-devel] [PULL 3/3] virtio-net: broken RX filtering logic fixed

2013-11-08 Thread Stefan Hajnoczi
From: Dmitry Fleytman 

Upon processing of VIRTIO_NET_CTRL_MAC_TABLE_SET command
multicast list overwrites unicast list in mac_table.
This leads to broken logic for both unicast and multicast RX filtering.

Signed-off-by: Dmitry Fleytman 
Signed-off-by: Stefan Hajnoczi 
---
 hw/net/virtio-net.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index ae51d96..613f144 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -657,7 +657,8 @@ static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
 }
 
 if (n->mac_table.in_use + mac_data.entries <= MAC_TABLE_ENTRIES) {
-s = iov_to_buf(iov, iov_cnt, 0, n->mac_table.macs,
+s = iov_to_buf(iov, iov_cnt, 0,
+   &n->mac_table.macs[n->mac_table.in_use * ETH_ALEN],
mac_data.entries * ETH_ALEN);
 if (s != mac_data.entries * ETH_ALEN) {
 goto error;
-- 
1.8.3.1




[Qemu-devel] [PULL for-1.7 0/3] Net patches

2013-11-08 Thread Stefan Hajnoczi
Bug fixes for QEMU 1.7.

The following changes since commit 964668b03d26f0b5baa5e5aff0c966f4fcb76e9e:

  Update version for 1.7.0-rc0 release (2013-11-06 21:49:39 -0800)

are available in the git repository at:

  git://github.com/stefanha/qemu.git net

for you to fetch changes up to cc386e96727442f5b67052d4e0a602f6f652ffe6:

  virtio-net: broken RX filtering logic fixed (2013-11-08 17:32:34 +0100)


Dmitry Fleytman (1):
  virtio-net: broken RX filtering logic fixed

Dmitry Krivenok (1):
  net: disallow to specify multicast MAC address

Sergey Fedorov (1):
  net: fix qemu_flush_queued_packets() in presence of a hub

 hw/net/virtio-net.c | 3 ++-
 include/net/eth.h   | 6 +++---
 net/net.c   | 7 ++-
 3 files changed, 11 insertions(+), 5 deletions(-)

-- 
1.8.3.1




[Qemu-devel] [PULL 2/3] net: fix qemu_flush_queued_packets() in presence of a hub

2013-11-08 Thread Stefan Hajnoczi
From: Sergey Fedorov 

Do not return after net_hub_flush(). Always flush callee network client
incoming queue.

Signed-off-by: Sergey Fedorov 
Signed-off-by: Stefan Hajnoczi 
---
 net/net.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/net.c b/net/net.c
index 870d3bb..0a88e68 100644
--- a/net/net.c
+++ b/net/net.c
@@ -443,7 +443,6 @@ void qemu_flush_queued_packets(NetClientState *nc)
 if (net_hub_flush(nc->peer)) {
 qemu_notify_event();
 }
-return;
 }
 if (qemu_net_queue_flush(nc->incoming_queue)) {
 /* We emptied the queue successfully, signal to the IO thread to repoll
-- 
1.8.3.1




Re: [Qemu-devel] [PATCH 0/2] Re: exec: fix regression by making system-memory region UINT64_MAX size

2013-11-08 Thread Igor Mammedov
On Thu,  7 Nov 2013 23:23:57 +0100
Laszlo Ersek  wrote:

> On 11/07/13 22:24, Marcel Apfelbaum wrote:
> 
> > Why pci-hole and system.flash collide? IMHO we should not play with
> > priorities here, better solve the collision.
> 
> What about this "beautiful" series? It produces
Laszlo,
 there is patch that removes PCI-hole aliases at all
 mapping PCI address space with -1 priority in system address space.
 so all access flash region range will go to it sice it's priority 0 

> 
> memory
> -000f (prio 0, RW): system
>   [...]
>   6000-ffdf (prio 0, RW): alias pci-hole @pci
>   6000-ffdf
>   [...]
>   ffe0- (prio 0, R-): system.flash
> 
> and I can run OVMF with it. It also stays within i386/pc.
> 
> Re 2/2, note that "below_4g_mem_size" never exceeds 0xe000 in
> pc_init1(), so the subtraction is safe.
> 
> Thanks
> Laszlo
> 
> Laszlo Ersek (2):
>   i386/pc: propagate flash size from pc_system_flash_init() to
> pc_init1()
>   i386/pc_piix: the pci-hole should end where the system flash starts
> 
>  include/hw/i386/pc.h |  6 --
>  hw/i386/pc.c |  5 +++--
>  hw/i386/pc_piix.c|  5 +++--
>  hw/i386/pc_q35.c |  3 ++-
>  hw/i386/pc_sysfw.c   | 10 +++---
>  5 files changed, 19 insertions(+), 10 deletions(-)
> 
> -- 
> 1.8.3.1
> 
> 


-- 
Regards,
  Igor



Re: [Qemu-devel] [PATCH V6 2/5] qemu-img: Add infinite loop checking in bdrv_new_open()

2013-11-08 Thread Stefan Weil
See more suggestions below.

Am 08.11.2013 17:19, schrieb Jeff Cody:
> On Tue, Nov 05, 2013 at 10:09:18PM -0500, Xu Wang wrote:
>> Every image should be checked if there is infinite loop in backing
>> file chain before open it. So infinite loop check was added into.

... if there is an infinite loop in the backing
file chain before opening it. ...

>> bdrv_new_open(). If @filename is opened without the flag
>> BDRV_O_NO_BACKING, the infinite loop check should be called.
>>
>> Signed-off-by: Xu Wang 
>> ---
>>  qemu-img.c | 8 
>>  1 file changed, 8 insertions(+)
>>
>> diff --git a/qemu-img.c b/qemu-img.c
>> index d5ec45b..3af7996 100644
>> --- a/qemu-img.c
>> +++ b/qemu-img.c
>> @@ -281,6 +281,14 @@ static BlockDriverState *bdrv_new_open(const char 
>> *filename,
>>  drv = NULL;
>>  }
>>  
>> +/* check backing file loop if the whole chain need to be opened */

... if the whole chain needs to be opened ...

>> +if (!(flags & BDRV_O_NO_BACKING) &&
>> +!bdrv_backing_chain_okay(filename, fmt)) {
>> +error_report("bdrv_new_open: Open %s failed. There is loop exists "
>> + "in the backing chain", filename);
> I suggest rewording this for grammar; something like:
>  +error_report("bdrv_new_open: Open %s failed. An infinite loop 
> exists "
>  + "in the backing chain", filename);
>
>
>> +goto fail;
>> +}
>> +
>>  ret = bdrv_open(bs, filename, NULL, flags, drv, &local_err);
>>  if (ret < 0) {
>>  error_report("Could not open '%s': %s", filename,
>> -- 
>> 1.8.1.4





Re: [Qemu-devel] [PATCH] exec: fix regression by making system-memory region UINT64_MAX size

2013-11-08 Thread Paolo Bonzini
Il 08/11/2013 17:19, Laszlo Ersek ha scritto:
> I'm not very familiar with external files in tests though. Can I model
> it on "qdict-test-data.txt"?
> 
> "qdict-test-data.txt" is located in the root source directory. When
> configure runs outside the root source directory (= separate build dir),
> it symlinks it. And, the "check-qdict.c" test program opens it (with
> fopen()) simply by basename (no path prefix). Can I follow that?

Yes, tests are always run from the root build directory.  Symlinking
files is not particularly awesome, but it works.

A patch to move qdict-test-data.txt to tests/ is welcome. :)

You can create a temporary file as well from the qtest.  That's probably
easier; just write 64 kB of 0x00 0x01 0x02 0x03...0xFF 0x00 0x01...
Also, please test both -bios and -pflash.

Paolo



Re: [Qemu-devel] [PATCH] exec: fix regression by making system-memory region UINT64_MAX size

2013-11-08 Thread Marcel Apfelbaum
On Fri, 2013-11-08 at 17:12 +0100, Paolo Bonzini wrote:
> Il 08/11/2013 16:08, Marcel Apfelbaum ha scritto:
> > Actually, as I see, the default behavior of "system" memory region
> > is to use unassigned_mem_ops that has valid.accepts method returning
> > false (no read/write methods). I don't see that read all-ones/ignore
> > writes is implemented.
> 
> Right, it's read-all-zeroes (see unassigned_mem_read).  It was
> read-all-ones for a brief time, then it got changed back to read-all-zeroes.
I remember something, Jan's patches got reverted because the modification
was system wide and not only for pci address space.
> 
> > This was the main reason I submitted this patch. I had *no* clue that
> > it would impact so much the system...
> 
> Yeah, it's not your fault.  But it should have been held back until 1.8.
>  Do you agree with reverting it?
Given the actual impact on version's stability, I agree
it is the right thing to do.

> 
> > I still think the patch is needed ans the guests will benefit from
> > more accurate PCI spec emulation.
> 
> Only buggy guests :) but yes, I agree it's a good thing to have.
Yes, it may be a driver there making some a wrong decision and
"reading-all-ones" may give it a clue. (the same for writes)

Thanks,
Marcel

> 
> Paolo






Re: [Qemu-devel] [PATCH V6 2/5] qemu-img: Add infinite loop checking in bdrv_new_open()

2013-11-08 Thread Jeff Cody
On Tue, Nov 05, 2013 at 10:09:18PM -0500, Xu Wang wrote:
> Every image should be checked if there is infinite loop in backing
> file chain before open it. So infinite loop check was added into
> bdrv_new_open(). If @filename is opened without the flag
> BDRV_O_NO_BACKING, the infinite loop check should be called.
> 
> Signed-off-by: Xu Wang 
> ---
>  qemu-img.c | 8 
>  1 file changed, 8 insertions(+)
> 
> diff --git a/qemu-img.c b/qemu-img.c
> index d5ec45b..3af7996 100644
> --- a/qemu-img.c
> +++ b/qemu-img.c
> @@ -281,6 +281,14 @@ static BlockDriverState *bdrv_new_open(const char 
> *filename,
>  drv = NULL;
>  }
>  
> +/* check backing file loop if the whole chain need to be opened */
> +if (!(flags & BDRV_O_NO_BACKING) &&
> +!bdrv_backing_chain_okay(filename, fmt)) {
> +error_report("bdrv_new_open: Open %s failed. There is loop exists "
> + "in the backing chain", filename);

I suggest rewording this for grammar; something like:
 +error_report("bdrv_new_open: Open %s failed. An infinite loop exists "
 + "in the backing chain", filename);


> +goto fail;
> +}
> +
>  ret = bdrv_open(bs, filename, NULL, flags, drv, &local_err);
>  if (ret < 0) {
>  error_report("Could not open '%s': %s", filename,
> -- 
> 1.8.1.4
> 
> 



Re: [Qemu-devel] [PATCH] exec: fix regression by making system-memory region UINT64_MAX size

2013-11-08 Thread Laszlo Ersek
On 11/08/13 16:42, Andreas Färber wrote:
> Am 07.11.2013 21:27, schrieb Jordan Justen:
>> On Sun, Nov 3, 2013 at 12:48 PM, Marcel Apfelbaum  
>> wrote:
>>> The commit:
>>>
>>> Commit: a53ae8e934cd54686875b5bcfc2f434244ee55d6
>>> Author: Marcel Apfelbaum 
>>> Date:   Mon Sep 16 11:21:16 2013 +0300
>>>
>>> hw/pci: partially handle pci master abort
>>>
>>> introduced a regression on make check:
>>
>> Laszlo pointed out that my OVMF flash support series was not working
>> with QEMU master. It was working with QEMU 1.6.0. I then bisected the
>> issue to this commit. It seems this commit regresses -pflash support
>> for both KVM and non-KVM modes.
>>
>> Can you reproduce the issue this with command?
>> x86_64-softmmu/qemu-system-x86_64 -pflash pc-bios/bios.bin
>> (with or without adding -enable-kvm)
> 
> Jordan or Laszlo,
> 
> Can either of you please add a small test case to i440fx-test using
> -pflash and doing a read in the PCI hole (or wherever exactly) so that
> we can avoid regressing yet again? :)

For -pflash we need a small test file. I'm thinking about creating a 512
byte (1 sector) big file, and modifying the qemu command line in
"tests/i440fx-test.c".

I'm not very familiar with external files in tests though. Can I model
it on "qdict-test-data.txt"?

"qdict-test-data.txt" is located in the root source directory. When
configure runs outside the root source directory (= separate build dir),
it symlinks it. And, the "check-qdict.c" test program opens it (with
fopen()) simply by basename (no path prefix). Can I follow that?

Thanks
Laszlo



Re: [Qemu-devel] [PATCH] exec: fix regression by making system-memory region UINT64_MAX size

2013-11-08 Thread Paolo Bonzini
Il 08/11/2013 16:08, Marcel Apfelbaum ha scritto:
> Actually, as I see, the default behavior of "system" memory region
> is to use unassigned_mem_ops that has valid.accepts method returning
> false (no read/write methods). I don't see that read all-ones/ignore
> writes is implemented.

Right, it's read-all-zeroes (see unassigned_mem_read).  It was
read-all-ones for a brief time, then it got changed back to read-all-zeroes.

> This was the main reason I submitted this patch. I had *no* clue that
> it would impact so much the system...

Yeah, it's not your fault.  But it should have been held back until 1.8.
 Do you agree with reverting it?

> I still think the patch is needed ans the guests will benefit from
> more accurate PCI spec emulation.

Only buggy guests :) but yes, I agree it's a good thing to have.

Paolo



[Qemu-devel] [PATCH] virtio-net: Correctly store multicast filter entries

2013-11-08 Thread Vlad Yasevich
Commit 921ac5d0f3a0df869db5ce4edf752f51d8b1596a
 virtio-net: remove layout assumptions for ctrl vq
introduced a regression where the multicast address filter
entries are written to the beginning of the mac table array,
thus overwriting any unicast addresses that may have been
programmed in the filter.
The multicast addresses should be written after all the
unicast addresses.

Signed-off-by: Vlad Yasevich 
---
 hw/net/virtio-net.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 22dbd05..94e8b68 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -657,7 +657,8 @@ static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
 }
 
 if (n->mac_table.in_use + mac_data.entries <= MAC_TABLE_ENTRIES) {
-s = iov_to_buf(iov, iov_cnt, 0, n->mac_table.macs,
+s = iov_to_buf(iov, iov_cnt, 0,
+   n->mac_table.macs + (n->mac_table.in_use * ETH_ALEN),
mac_data.entries * ETH_ALEN);
 if (s != mac_data.entries * ETH_ALEN) {
 goto error;
-- 
1.8.4.2




Re: [Qemu-devel] [PATCH 00/14] VSX Stage 4

2013-11-08 Thread Andreas Färber
Hi,

Am 06.11.2013 21:31, schrieb Tom Musta:
> This is the fourth and final series of patches that add emulation support
> to QEMU for the PowerPC Vector Scalar Extension (VSX). 
[...]
>  target-ppc/cpu.h|4 +-
>  target-ppc/fpu_helper.c |  191 
> ---
>  target-ppc/helper.h |   18 
>  target-ppc/translate.c  |  110 +++--
>  target-ppc/translate_init.c |2 +-
>  5 files changed, 232 insertions(+), 93 deletions(-)

Please make it clear from all your mail (and future commit) subjects
that this is about PowerPC. Traditionally that would be "target-ppc: ",
Alex sometimes uses "PPC: ".

Thanks,
Andreas

-- 
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg



Re: [Qemu-devel] [PATCH v2] e1000: initial link negotiation on mac osx

2013-11-08 Thread Gabriel L. Somlo
On Fri, Nov 08, 2013 at 02:39:25PM +0100, Stefan Hajnoczi wrote:
> On Fri, Nov 08, 2013 at 12:12:52AM +0100, Alexander Graf wrote:
>> We can easily modify SeaBIOS to just loop through all PCI devices,
>> look for an e1000 and initialize it far enough for XNU, no?
>> After all, it sounds like that's closer to the way a real Mac works.
> 
> I'd much prefer Alex's suggestion so we avoid putting guest-specific
> hacks into QEMU.
> 
> If there is really no better solution, please make an "extra" behavior
> disabled by default and accessible through a device property.  For
> example -device e1000,xnu-preinit-hack=on.

I agree too, in principle. OTOH I'm a bit worried that teaching SeaBIOS
about e1000, and then getting that change upstreamed there might be
a whole different size of problem to solve :)

I will however give that a shot first, and fall back to
"xnu-preinit-hack=on" only if that doesn't work out...

Thanks,
--Gabriel



Re: [Qemu-devel] [PATCH] exec: fix regression by making system-memory region UINT64_MAX size

2013-11-08 Thread Andreas Färber
Am 07.11.2013 21:27, schrieb Jordan Justen:
> On Sun, Nov 3, 2013 at 12:48 PM, Marcel Apfelbaum  wrote:
>> The commit:
>>
>> Commit: a53ae8e934cd54686875b5bcfc2f434244ee55d6
>> Author: Marcel Apfelbaum 
>> Date:   Mon Sep 16 11:21:16 2013 +0300
>>
>> hw/pci: partially handle pci master abort
>>
>> introduced a regression on make check:
> 
> Laszlo pointed out that my OVMF flash support series was not working
> with QEMU master. It was working with QEMU 1.6.0. I then bisected the
> issue to this commit. It seems this commit regresses -pflash support
> for both KVM and non-KVM modes.
> 
> Can you reproduce the issue this with command?
> x86_64-softmmu/qemu-system-x86_64 -pflash pc-bios/bios.bin
> (with or without adding -enable-kvm)

Jordan or Laszlo,

Can either of you please add a small test case to i440fx-test using
-pflash and doing a read in the PCI hole (or wherever exactly) so that
we can avoid regressing yet again? :)

Regards,
Andreas

-- 
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg



Re: [Qemu-devel] [PATCH 1/7] usb: remove old usb-host code

2013-11-08 Thread Gerd Hoffmann
  Hi,

> OK, then here is the first issue I ran into while trying libusbx (git
> head, i.e. 1.0.17+: The new stack causes significant latency issues that
> makes it almost unusable for pass-through of USB audio devices (some
> headset in my case). Reverting usb-linux and disabling libusb over QEMU
> git head makes things work again. I'll have to stick with this for now
> as it is affecting my work environment.
> 
> Any spontaneous ideas how to analyse or even resolve this?

Try setting isobsize property to something smaller than 32 (which is the
default).

cheers,
  Gerd






Re: [Qemu-devel] [PATCH 1/2] i386/pc: propagate flash size from pc_system_flash_init() to pc_init1()

2013-11-08 Thread Peter Maydell
On 8 November 2013 15:27, Laszlo Ersek  wrote:
> On 11/08/13 16:16, Peter Maydell wrote:
>> That said, having to pass the size of a sub-sub-region
>> all the way back up the call stack is very odd and makes
>> me wonder if it's really the right way to do it...
>> The top level shouldn't have to care like that about
>> details of the bottom of the callstack.
>
> I agree. It's just that system.flash and pci-hole are siblings in the
> same container (they shouldn't overlap, *or* they should have clearly
> different priorities between them). We have two call chains rooted in
> pc_init1(), and the "ends" of those chains need to coordinate with each
> other (they set up the two regions, respectively, and both need the
> boundary between them). We could introduce a new global, but that's not
> exactly a step forward :)

I suspect the correct approach for the pci-hole is to
use priorities. (Also possibly to refactor the code so
that the things creating memory regions directly in the
top level container aren't four levels down the callstack.
The natural thing would be for the callgraph to be vaguely
parallel to memory region hierarchy, so all the code
managing a particular container region is in the same
place in the source rather than distributed.)

-- PMM



Re: [Qemu-devel] [PATCH 1/2] i386/pc: propagate flash size from pc_system_flash_init() to pc_init1()

2013-11-08 Thread Laszlo Ersek
On 11/08/13 16:16, Peter Maydell wrote:
> On 8 November 2013 15:07, Laszlo Ersek  wrote:
>> On 11/08/13 07:09, Jordan Justen wrote:
>>> int64_t? :)
>>
>> Heh, yes, I did cringe when I wrote that, but if you check the
>> bottom-most function, where the assignment happens,
>> pc_system_flash_init(), it declares the local "size" variable as
>> int64_t. I've given up on arguing for sensible unsigned types so I just
>> went with the flow
> 
> That's a bug in that function which should be fixed.
> This is a memory region size and those are uint64_t.
> 
> That said, having to pass the size of a sub-sub-region
> all the way back up the call stack is very odd and makes
> me wonder if it's really the right way to do it...
> The top level shouldn't have to care like that about
> details of the bottom of the callstack.

I agree. It's just that system.flash and pci-hole are siblings in the
same container (they shouldn't overlap, *or* they should have clearly
different priorities between them). We have two call chains rooted in
pc_init1(), and the "ends" of those chains need to coordinate with each
other (they set up the two regions, respectively, and both need the
boundary between them). We could introduce a new global, but that's not
exactly a step forward :)

Thanks
Laszlo



Re: [Qemu-devel] Multi-head support RFC

2013-11-08 Thread John Baboval

On 11/06/2013 06:44 PM, Dave Airlie wrote:

On Wed, Nov 6, 2013 at 8:57 PM, Gerd Hoffmann  wrote:

   Hi,


It currently just adds multiple DisplaySurfaces to the QemuConsole,
now Gerd said he thought I should be using multiple QemuConsoles but I
really didn't think this was a good idea,

Why?

It's a fair question and I haven't tried the other way yet and I fully
intend to do further  investigating,

I think my main reason was the current console at least when
interacting with gtk frontend are done on a console per tab, now if we
have multiple outputs I would want them to be visible at the same
time, now I understand we could fix this but we'd need some sort of
console grouping to say this group of consoles represent this set of
outputs,

Though further to that I'm not sure how we'd want to represent
multiple graphic cards I assume we'd want to be able to see them all
at once, but still have some sort of binding between separate outputs
on one card.


Multiple graphic cards is actually a reasonable argument for the 
multiple QemuConsole route, since each would have its own GraphicHwOps, 
so it would simply be a matter of having the UI give the user a 
mechanism for choosing which card to attach a new display to.




Dave.







Re: [Qemu-devel] [PATCH 1/2] i386/pc: propagate flash size from pc_system_flash_init() to pc_init1()

2013-11-08 Thread Peter Maydell
On 8 November 2013 15:07, Laszlo Ersek  wrote:
> On 11/08/13 07:09, Jordan Justen wrote:
>> int64_t? :)
>
> Heh, yes, I did cringe when I wrote that, but if you check the
> bottom-most function, where the assignment happens,
> pc_system_flash_init(), it declares the local "size" variable as
> int64_t. I've given up on arguing for sensible unsigned types so I just
> went with the flow

That's a bug in that function which should be fixed.
This is a memory region size and those are uint64_t.

That said, having to pass the size of a sub-sub-region
all the way back up the call stack is very odd and makes
me wonder if it's really the right way to do it...
The top level shouldn't have to care like that about
details of the bottom of the callstack.

thanks
-- PMM



Re: [Qemu-devel] [PATCH] exec: fix regression by making system-memory region UINT64_MAX size

2013-11-08 Thread Marcel Apfelbaum
On Fri, 2013-11-08 at 10:44 +, Peter Maydell wrote:
> On 8 November 2013 08:05, Paolo Bonzini  wrote:
> > Il 07/11/2013 22:51, Peter Maydell ha scritto:
> >>> > 1. Not all architectures have the behavior: "Address space that is not 
> >>> > RAM(and friends)
> >>> > is for sure PCI". Only x86 behaves like this (I think).
> >>
> >> More specifically, the x86 pc behaves like this. Other
> >> x86 based systems could in theory behave differently
> >> (not that we actually model any, I think).
> >
> > After Marcel's patch, we have changed behavior for at least all boards
> > that pass get_system_memory() to pci_register_bus or pci_bus_new:
> >
> > * mips/gt64xxx_pci.c
> >
> > * pci-host/bonito.c
> >
> > * pci-host/ppce500.c
> >
> > * ppc/ppc4xx_pci.c
> >
> > * sh4/sh_pci.c
> 
> Oh, right. Ideally those boards should not do that (I fixed
> the versatile pci controller not to do that a while back) but
> it's a long standing behaviour so it would be better had we
> not broken it.
> 
> I think it should in general be fixable by just having those
> pci controllers create an empty memory region to pass in,
> like versatile:
> 
> memory_region_init(&s->pci_io_space, OBJECT(s), "pci_io", 1ULL << 32);
> memory_region_init(&s->pci_mem_space, OBJECT(s), "pci_mem", 1ULL << 32);
> 
> pci_bus_new_inplace(&s->pci_bus, sizeof(s->pci_bus), DEVICE(obj), "pci",
> &s->pci_mem_space, &s->pci_io_space,
> PCI_DEVFN(11, 0), TYPE_PCI_BUS);
> 
> That doesn't affect where PCI DMA goes. It might also
> require mapping an alias into that region at wherever
> the pci hole is on those boards.
> 
> Kind of awkward to do and test at this point in the release cycle though.
> 
> > These now will not go anymore through unassigned_mem_ops, which is a
> > behavioral change for MIPS boards (gt64xxx_pci and bonito) at least.
> >
> > Furthermore, the default behavior of the memory API _is_ read
> > all-ones/ignore writes, so I'm not sure what's the benefit of adding a
> > separate region for master abort...
> 
> It gives you a place set the appropriate PCI controller or device
> register status bits on abort by a PCI device access, IIRC.

Right, thanks for pointing this out. This was indeed the patches direction.

But even the first patch has meaning by itself and not just a preparation.
As I mentioned in other mail in this thread "read all-ones/ignore writes"
is not implemented yet asbeacuse unassigned_mem_ops has no read/write methods
and valid.accepts returns false.

Thanks,
Marcel

> 
> -- PMM
> 






Re: [Qemu-devel] [PATCH v2 00/39] bitmap handling optimization

2013-11-08 Thread Chegu Vinod

On 11/6/2013 5:04 AM, Juan Quintela wrote:

Hi

[v2]
In this version:
- fixed all the comments from last versions (thanks Eric)
- kvm migration bitmap is synchronized using bitmap operations
- qemu bitmap -> migration bitmap is synchronized using bitmap operations
If bitmaps are not properly aligned, we fall back to old code.
Code survives virt-tests, so should be in quite good shape.

ToDo list:

- vga ram by default is not aligned in a page number multiple of 64,

   it could be optimized.  Kraxel?  It syncs the kvm bitmap at least 1
   a second or so? bitmap is only 2048 pages (16MB by default).
   We need to change the ram_addr only

- vga: still more, after we finish migration, vga code continues
   synchronizing the kvm bitmap on source machine.  Notice that there
   is no graphics client connected to the VGA.  Worth investigating?

- I haven't yet meassure speed differences on big hosts.  Vinod?

- Depending of performance, more optimizations to do.

- debugging printf's still on the code, just to see if we are taking
   (or not) the optimized paths.

And that is all.  Please test & comment.

Thanks, Juan.

[v1]
This series split the dirty bitmap (8 bits per page, only three used)
into 3 individual bitmaps.  Once the conversion is done, operations
are handled by bitmap operations, not bit by bit.

- *_DIRTY_FLAG flags are gone, now we use memory.h DIRTY_MEMORY_*
everywhere.

- We set/reset each flag individually
   (set_dirty_flags(0xff&~CODE_DIRTY_FLAG)) are gone.

- Rename several functions to clarify/make consistent things.

- I know it dont't pass checkpatch for long lines, propper submission
   should pass it. We have to have long lines, short variable names, or
   ugly line splitting :p

- DIRTY_MEMORY_NUM: how can one include exec/memory.h into cpu-all.h?
   #include it don't work, as a workaround, I have copied its value, but
   any better idea?  I can always create "exec/migration-flags.h", though.

- The meat of the code is patch 19.  Rest of patches are quite easy
(even that one is not too complex).

Only optimizations done so far are
set_dirty_range()/clear_dirty_range() that now operates with
bitmap_set/clear.

Note for Xen: cpu_physical_memory_set_dirty_range() was wrong for xen,
see comment on patch.

It passes virt-test migration tests, so it should be perfect.

I post it to ask for comments.

ToDo list:

- create a lock for the bitmaps and fold migration bitmap into this
   one.  This would avoid a copy and make things easier?

- As this code uses/abuses bitmaps, we need to change the type of the
   index from int to long.  With an int index, we can only access a
   maximum of 8TB guest (yes, this is not urgent, we have a couple of
   years to do it).

- merging KVM <-> QEMU bitmap as a bitmap and not bit-by-bit.

- spliting the KVM bitmap synchronization into chunks, i.e. not
   synchronize all memory, just enough to continue with migration.

Any further ideas/needs?

Thanks, Juan.

PD.  Why it took so long?

  Because I was trying to integrate the bitmap on the MemoryRegion
  abstraction.  Would have make the code cleaner, but hit dead-end
  after dead-end.  As practical terms, TCG don't know about
  MemoryRegions, it has been ported to run on top of them, but
  don't use them effective


The following changes since commit c2d30667760e3d7b81290d801e567d4f758825ca:

   rtc: remove dead SQW IRQ code (2013-11-05 20:04:03 -0800)

are available in the git repository at:

   git://github.com/juanquintela/qemu.git bitmap-v2.next

for you to fetch changes up to d91eff97e6f36612eb22d57c2b6c2623f73d3997:

   migration: synchronize memory bitmap 64bits at a time (2013-11-06 13:54:56 
+0100)


Juan Quintela (39):
   Move prototypes to memory.h
   memory: cpu_physical_memory_set_dirty_flags() result is never used
   memory: cpu_physical_memory_set_dirty_range() return void
   exec: use accessor function to know if memory is dirty
   memory: create function to set a single dirty bit
   exec: create function to get a single dirty bit
   memory: make cpu_physical_memory_is_dirty return bool
   exec: simplify notdirty_mem_write()
   memory: all users of cpu_physical_memory_get_dirty used only one flag
   memory: set single dirty flags when possible
   memory: cpu_physical_memory_set_dirty_range() allways dirty all flags
   memory: cpu_physical_memory_mask_dirty_range() always clear a single flag
   memory: use DIRTY_MEMORY_* instead of *_DIRTY_FLAG
   memory: use bit 2 for migration
   memory: make sure that client is always inside range
   memory: only resize dirty bitmap when memory size increases
   memory: cpu_physical_memory_clear_dirty_flag() result is never used
   bitmap: Add bitmap_zero_extend operation
   memory: split dirty bitmap into three
   memory: unfold cpu_physical_memory_clear_dirty_flag() in its only user
  

Re: [Qemu-devel] [PATCH] exec: fix regression by making system-memory region UINT64_MAX size

2013-11-08 Thread Marcel Apfelbaum
On Fri, 2013-11-08 at 09:05 +0100, Paolo Bonzini wrote:
> Il 07/11/2013 22:51, Peter Maydell ha scritto:
> >> > 1. Not all architectures have the behavior: "Address space that is not 
> >> > RAM(and friends)
> >> > is for sure PCI". Only x86 behaves like this (I think).
> > 
> > More specifically, the x86 pc behaves like this. Other
> > x86 based systems could in theory behave differently
> > (not that we actually model any, I think).
> 
> After Marcel's patch, we have changed behavior for at least all boards
> that pass get_system_memory() to pci_register_bus or pci_bus_new:
> 
> * mips/gt64xxx_pci.c
> 
> * pci-host/bonito.c
> 
> * pci-host/ppce500.c
> 
> * ppc/ppc4xx_pci.c
> 
> * sh4/sh_pci.c
> 
> These now will not go anymore through unassigned_mem_ops, which is a
> behavioral change for MIPS boards (gt64xxx_pci and bonito) at least.
> 
> Furthermore, the default behavior of the memory API _is_ read
> all-ones/ignore writes, so I'm not sure what's the benefit of adding a
> separate region for master abort...
Actually, as I see, the default behavior of "system" memory region
is to use unassigned_mem_ops that has valid.accepts method returning
false (no read/write methods). I don't see that read all-ones/ignore
writes is implemented.
This was the main reason I submitted this patch. I had *no* clue that
it would impact so much the system...
I still think the patch is needed ans the guests will benefit from
more accurate PCI spec emulation.

Thanks,
Marcel

> 
> Paolo






Re: [Qemu-devel] [PATCH 00/14] VSX Stage 4

2013-11-08 Thread Tom Musta
On 11/7/2013 6:23 PM, Richard Henderson wrote:
> On 11/07/2013 06:31 AM, Tom Musta wrote:
>> The single-precision scalar arithmetic instructions all interpret the most
>> significant 64 bits of a VSR as a single precision floating point number
>> stored in double precision format (similar to the standard PowerPC floating
>> point single precision instructions).  Thus a common theme in the supporting
>> code is rounding of an intermediate double-precision number to single 
>> precision.
> 
> Modulo my comments wrt the actual computation of fma, the patches all look 
> fine.
> 
> But I'd like to also mention a pre-existing flaw/niggle in the ppc port.
> 
> The conversions to/from in-register representation for the single-precision
> values should never raise exceptions.  Yet we always use
> 
> d.d = float32_to_float64(f.f, &env->fp_status);
> f.f = float64_to_float32(d.d, &env->fp_status);
> 
> The use of env->fp_status is either wrong or extremely misleading.  It sure
> looks like the operation affects global cpu state.  It may be that that state
> is never copied back to the "real" fpscr and so doesn't actually affect cpu
> state, but how can I see that for sure?
> 
> I think it would be better to implement ConvertSPtoDP_NP and ConvertSP64toSP
> exactly as written in the spec.
> 
> Or at minimum use a dummy fp_status that's not associated with env.  It should
> not matter what the "real" rounding mode is in either case, since values that
> are not exactly representable as single-precision values give undefined 
> results.

Richard:

Thanks for your comments.  I concur with this comment on fp_status.

I am looking into the comments on fused multiply-add.





Re: [Qemu-devel] [PATCH] 82571 emulation

2013-11-08 Thread Stefan Hajnoczi
On Thu, Nov 07, 2013 at 11:06:06AM -0800, akepner wrote:
> First cut at emulating 82571. I'm sure there are lots of rough
> edges, but it's working for me.
> 
> Pretty obviously I started with a copy of the e1000 code, and
> modified just what was necessary to get it working. I'd like
> to also do a s/e1000_/e1000e/ to uniqify identifiers, but for
> now I've left it as it is to make comparison with e1000 easier.
> 
> Signed-off-by: Arthur Kepner 
> ---

Cool, thanks for sharing!  You've probably seen it already since your
email generally follows the conventions, but here's the link for
submitting patches:

http://qemu-project.org/Contribute/SubmitAPatch

If you stuck to the e1000.c code for the most part then there's probably
not much to comment on yet except a few suggestions for getting the
patch into a mergable state:

 * Sharing code with e1000.c may be reasonable, you decide

 * After renaming remaining code to e1000e, please drop dead-code or
   unused parts

 * Please audit the vmstate (migration data fields) to see if cruft can
   be dropped.  In particular, it's tricky to stay backwards-compatible
   with live migration so things tend to get appended rather than
   replaced.

   Since e1000e is starting from a fresh slate, you can delete any
   duplicated or workaround vmstate fields which were solely used for
   backwards compatibility during migration.

 * Please describe your testing: which guest OSes and versions?

 * Please add an indication of missing features from the hardware spec
   which you haven't implemented.  If this is literally just a hack of
   the e1000 code to make the Linux e1000e driver happy, then it may
   still need more work to be generally usable by other OSes.

 * Please test live migration several times with the NIC up and down

If any of this seems overwhelming or more effort than you were prepared
to go through, please ask questions and maybe people will turn up who
can help.

Thanks,
Stefan



Re: [Qemu-devel] [PATCH V6 1/5] block/qemu-img: Refine and export infinite loop checking in collect_image_info_list()

2013-11-08 Thread Jeff Cody
On Fri, Nov 08, 2013 at 06:53:27AM -0700, Eric Blake wrote:
> On 11/08/2013 03:19 AM, Fam Zheng wrote:
> > 
> >> +BlockDriverState *bs;
> >> +BlockDriver *drv;
> >> +char fbuf[1024];
> > 
> > Could use PATH_MAX.
> 
> PATH_MAX is undefined on some platforms, and could also be defined to
> something larger than a page which could lead to nastiness if you end up
> overflowing the stack.  I personally prefer malloc'd buffers rather than
> attempting to guess at how large to size things, although the rest of
> the code base also has similar caps at 1024 so this isn't making it worse.
>

A quick grep through the code shows ~57 arrays allocated using 1024,
and ~63 allocated using PATH_MAX.  Clearly not all of the 1024
allocation cases are pathname related, but certainly some of them are.

Maybe it makes sense to have a QEMU_PATH_MAX defined to 1024 in qemu,
so at least we are consistent everywhere.  (To clarify for Xu, I am
not talking about this patch series at all, just in general).

-Jeff



Re: [Qemu-devel] [PATCH] spapr: add "compat" machine option

2013-11-08 Thread Andreas Färber
Am 08.11.2013 15:57, schrieb Alexey Kardashevskiy:
> On 11/09/2013 12:20 AM, Andreas Färber wrote:
> 
>> When I am finally through with review of Igor's patches then he can
>> implement that for x86 and we/you can copy or adapt it for ppc. No need
>> to do big experiments for a concretely needed ppc feature.
> 
> Does it mean I better stop wasting your time and relax and you guys do this
> -cpu subptions thing yourselves? Thanks.

No, I meant don't try to mess with translating -cpu to -global and
looking up the class name that Igor mentioned. (Depending on when that
is being done it may introduce subtle bugs like real -global being
overridden.)

If you wait for us then you'll wait for some time...

Andreas

-- 
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg



Re: [Qemu-devel] [PATCH 1/2] i386/pc: propagate flash size from pc_system_flash_init() to pc_init1()

2013-11-08 Thread Laszlo Ersek
On 11/08/13 07:09, Jordan Justen wrote:
> On Thu, Nov 7, 2013 at 2:23 PM, Laszlo Ersek  wrote:
>> ... upwards through the following call chain:
>>
>>   pc_init1() | pc_q35_init()
>> pc_memory_init()
>>   pc_system_firmware_init()
>> pc_system_flash_init()
>>
>> Signed-off-by: Laszlo Ersek 
>> ---
>>  include/hw/i386/pc.h |  6 --
>>  hw/i386/pc.c |  5 +++--
>>  hw/i386/pc_piix.c|  3 ++-
>>  hw/i386/pc_q35.c |  3 ++-
>>  hw/i386/pc_sysfw.c   | 10 +++---
>>  5 files changed, 18 insertions(+), 9 deletions(-)
>>
>> diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
>> index 03cc0ba..a9b938e 100644
>> --- a/include/hw/i386/pc.h
>> +++ b/include/hw/i386/pc.h
>> @@ -148,7 +148,8 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory,
>> ram_addr_t above_4g_mem_size,
>> MemoryRegion *rom_memory,
>> MemoryRegion **ram_memory,
>> -   PcGuestInfo *guest_info);
>> +   PcGuestInfo *guest_info,
>> +   int64_t **flash_size);
> 
> Do you want *flash_size here, and at various other points? After
> changing that I could build, and it seemed to fix the symptom.

Aaargh. I hacked this up at night in a rush, trying to respond with it
"interactively" in the thread, and I was apparently confused by the
double indirection just a bit higher in the param list (at ram_memory).
What a horrible mistake. Of course you're right.

> Also, should we call it firmware_size?

No preference, I don't mind :)

> int64_t? :)

Heh, yes, I did cringe when I wrote that, but if you check the
bottom-most function, where the assignment happens,
pc_system_flash_init(), it declares the local "size" variable as
int64_t. I've given up on arguing for sensible unsigned types so I just
went with the flow.

(Hm, actually, now I think the int64_t caused me so much pain and
suffering that I can justifiedly blame my fsckup with the pointers on
it! :))

Anyway, I think Paolo wants to revert Marcel's patch for 1.7. I'm not
sure about the direction for 1.8; the pci-master-abort change has many
more tentacles than I anticipated.

I expect OVMF (incl. your new flash series) will run fine on v1.7.0 once
it's released (but we should certainly test it soon after), so neither
of my qemu patches (prio changing or resizing) should be necessary.

Thanks!
Laszlo

> 
> -Jordan
> 
>>  qemu_irq *pc_allocate_cpu_irq(void);
>>  DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus);
>>  void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi,
>> @@ -232,7 +233,8 @@ static inline bool isa_ne2000_init(ISABus *bus, int 
>> base, int irq, NICInfo *nd)
>>
>>  /* pc_sysfw.c */
>>  void pc_system_firmware_init(MemoryRegion *rom_memory,
>> - bool isapc_ram_fw);
>> + bool isapc_ram_fw,
>> + int64_t **flash_size);
>>
>>  /* pvpanic.c */
>>  void pvpanic_init(ISABus *bus);
>> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
>> index 12c436e..3ec18aa 100644
>> --- a/hw/i386/pc.c
>> +++ b/hw/i386/pc.c
>> @@ -1152,7 +1152,8 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory,
>> ram_addr_t above_4g_mem_size,
>> MemoryRegion *rom_memory,
>> MemoryRegion **ram_memory,
>> -   PcGuestInfo *guest_info)
>> +   PcGuestInfo *guest_info,
>> +   int64_t **flash_size)
>>  {
>>  int linux_boot, i;
>>  MemoryRegion *ram, *option_rom_mr;
>> @@ -1186,7 +1187,7 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory,
>>
>>
>>  /* Initialize PC system firmware */
>> -pc_system_firmware_init(rom_memory, guest_info->isapc_ram_fw);
>> +pc_system_firmware_init(rom_memory, guest_info->isapc_ram_fw, 
>> flash_size);
>>
>>  option_rom_mr = g_malloc(sizeof(*option_rom_mr));
>>  memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE);
>> diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
>> index 4fdb7b6..6e2c027 100644
>> --- a/hw/i386/pc_piix.c
>> +++ b/hw/i386/pc_piix.c
>> @@ -89,6 +89,7 @@ static void pc_init1(QEMUMachineInitArgs *args,
>>  DeviceState *icc_bridge;
>>  FWCfgState *fw_cfg = NULL;
>>  PcGuestInfo *guest_info;
>> +int64_t flash_size = 0;
>>
>>  if (xen_enabled() && xen_hvm_init(&ram_memory) != 0) {
>>  fprintf(stderr, "xen hardware virtual machine initialisation 
>> failed\n");
>> @@ -135,7 +136,7 @@ static void pc_init1(QEMUMachineInitArgs *args,
>> args->kernel_filename, args->kernel_cmdline,
>> args->initrd_filename,
>> below_4g_mem_size, above_4g_mem_size,
>> -   rom_memory, &ram_memory, guest_info);
>> +   rom_memory, &ram_memory, guest_info, &flash_size);
>>  }
>>
>>  gsi

Re: [Qemu-devel] [PATCH] spapr: add "compat" machine option

2013-11-08 Thread Alexey Kardashevskiy
On 11/09/2013 12:20 AM, Andreas Färber wrote:

> When I am finally through with review of Igor's patches then he can
> implement that for x86 and we/you can copy or adapt it for ppc. No need
> to do big experiments for a concretely needed ppc feature.

Does it mean I better stop wasting your time and relax and you guys do this
-cpu subptions thing yourselves? Thanks.


-- 
Alexey



Re: [Qemu-devel] [PATCH v2] target-ppc: move POWER7+ to a separate family

2013-11-08 Thread Alexey Kardashevskiy
On 11/09/2013 12:44 AM, Andreas Färber wrote:
> Am 08.11.2013 03:37, schrieb Alexey Kardashevskiy:
>> So far POWER7+ was a part of POWER7 family. However it has a different
>> PVR base value so in order to support PVR masks, it needs a separate
>> family class.
>>
> 
> Alexey,
> 
>> Another reason to make a POWER7+ family is that its name in the device
>> tree (/proc/device-tree/cpus/cpu*) should be "Power7+" but not "Power7"
>> and this cannot be easily fixed without a new family class.
>>
>> This adds a new family class, PVR base and mask values and moves
>> Power7+ v2.1 CPU to a new family. The class init function is copied
>> from the POWER7 family.
>>
>> Signed-off-by: Alexey Kardashevskiy 
>> ---
>> Changes:
>> v2:
>> * added VSX enable bit
>> ---
>>  target-ppc/cpu-models.c |  2 +-
>>  target-ppc/cpu-models.h |  2 ++
>>  target-ppc/translate_init.c | 38 ++
>>  3 files changed, 41 insertions(+), 1 deletion(-)
>>
>> diff --git a/target-ppc/cpu-models.c b/target-ppc/cpu-models.c
>> index 04d88c5..7c9466f 100644
>> --- a/target-ppc/cpu-models.c
>> +++ b/target-ppc/cpu-models.c
>> @@ -1140,7 +1140,7 @@
>>  "POWER7 v2.1")
>>  POWERPC_DEF("POWER7_v2.3",   CPU_POWERPC_POWER7_v23, POWER7,
>>  "POWER7 v2.3")
>> -POWERPC_DEF("POWER7+_v2.1",  CPU_POWERPC_POWER7P_v21,POWER7,
>> +POWERPC_DEF("POWER7+_v2.1",  CPU_POWERPC_POWER7P_v21,
>> POWER7P,
>>  "POWER7+ v2.1")
>>  POWERPC_DEF("POWER8_v1.0",   CPU_POWERPC_POWER8_v10, POWER8,
>>  "POWER8 v1.0")
>> diff --git a/target-ppc/cpu-models.h b/target-ppc/cpu-models.h
>> index 731ec4a..49ba4a4 100644
>> --- a/target-ppc/cpu-models.h
>> +++ b/target-ppc/cpu-models.h
>> @@ -558,6 +558,8 @@ enum {
>>  CPU_POWERPC_POWER7_v20 = 0x003F0200,
>>  CPU_POWERPC_POWER7_v21 = 0x003F0201,
>>  CPU_POWERPC_POWER7_v23 = 0x003F0203,
>> +CPU_POWERPC_POWER7P_BASE   = 0x004A,
>> +CPU_POWERPC_POWER7P_MASK   = 0x,
>>  CPU_POWERPC_POWER7P_v21= 0x004A0201,
>>  CPU_POWERPC_POWER8_BASE= 0x004B,
>>  CPU_POWERPC_POWER8_MASK= 0x,
>> diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
>> index 35d1389..c030a20 100644
>> --- a/target-ppc/translate_init.c
>> +++ b/target-ppc/translate_init.c
>> @@ -7253,6 +7253,44 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc, void *data)
>>  pcc->l1_icache_size = 0x8000;
>>  }
>>  
>> +POWERPC_FAMILY(POWER7P)(ObjectClass *oc, void *data)
>> +{
>> +DeviceClass *dc = DEVICE_CLASS(oc);
>> +PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
>> +
>> +dc->fw_name = "PowerPC,POWER7+";
> 
> Apart from the commit message differing from the code...


In what part?


> We've had this discussion before: Jacques reported that on his POWER7+
> box only "POWER7" is shown, not "POWER7+", equivalent to my POWER5+ box
> showing only "PowerPC,POWER5". Compare my commit, which documents this:
> 
> http://git.qemu.org/?p=qemu.git;a=commit;h=793826cd460828975591f289de78672af4a47ef9
> 
> So, adding a POWER7P family seems correct to me, just the fw_name seems
> wrong - or you'll need to investigate further why there are conflicting
> reports of how it is shown. Possibly based on revision or pHyp vs. SLOF?


Yes we have had this discussion. Paul said it should "POWER7+". The only
P7+ machine I have handy shows "+":

[aik@vpl4 ~]$ ls -d /proc/device-tree/cpus/PowerPC*
/proc/device-tree/cpus/PowerPC,POWER7+@0
/proc/device-tree/cpus/PowerPC,POWER7+@2c
/proc/device-tree/cpus/PowerPC,POWER7+@10
/proc/device-tree/cpus/PowerPC,POWER7+@30
/proc/device-tree/cpus/PowerPC,POWER7+@14
/proc/device-tree/cpus/PowerPC,POWER7+@34
/proc/device-tree/cpus/PowerPC,POWER7+@18
/proc/device-tree/cpus/PowerPC,POWER7+@38
/proc/device-tree/cpus/PowerPC,POWER7+@1c
/proc/device-tree/cpus/PowerPC,POWER7+@3c
/proc/device-tree/cpus/PowerPC,POWER7+@20
/proc/device-tree/cpus/PowerPC,POWER7+@4
/proc/device-tree/cpus/PowerPC,POWER7+@24
/proc/device-tree/cpus/PowerPC,POWER7+@8
/proc/device-tree/cpus/PowerPC,POWER7+@28
/proc/device-tree/cpus/PowerPC,POWER7+@c

And this is a host, not a guest. I do not see any good reason to make dt
names different.

And this does not really matter if there is "+" or not for anybody as far
as we concerned, ppc64_cpu works either way.


> 
> Regards,
> Andreas
> 
>> +dc->desc = "POWER7+";
>> +pcc->pvr = CPU_POWERPC_POWER7P_BASE;
>> +pcc->pvr_mask = CPU_POWERPC_POWER7P_MASK;
>> +pcc->init_proc = init_proc_POWER7;
>> +pcc->check_pow = check_pow_nocheck;
>> +pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL | PPC_STRING | PPC_MFTB |
>> +   PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
>> +   PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE |
>> +   PPC_FLOAT_STFIWX |
>> +   PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_

Re: [Qemu-devel] [PATCH] util/error: Save errno from clobbering

2013-11-08 Thread Stefan Hajnoczi
On Thu, Nov 07, 2013 at 08:10:29PM +0100, Max Reitz wrote:
> There may be calls to error_setg() and especially error_setg_errno()
> which blindly (and until now wrongly) assume these functions not to
> clobber errno (e.g., they pass errno to error_setg_errno() and return
> -errno afterwards). Instead of trying to find and fix all of these
> constructs, just make sure error_setg() and error_setg_errno() indeed do
> not clobber errno.
> 
> Suggested-by: Eric Blake 
> Signed-off-by: Max Reitz 
> ---
>  util/error.c | 6 ++
>  1 file changed, 6 insertions(+)

Thanks, applied to my block-next tree:
https://github.com/stefanha/qemu/commits/block-next

Stefan



Re: [Qemu-devel] [PATCH] MAINTAINERS: Add netmap maintainers

2013-11-08 Thread Stefan Hajnoczi
On Wed, Nov 06, 2013 at 06:34:55PM +0100, Vincenzo Maffione wrote:
> Signed-off-by: Vincenzo Maffione 
> ---
>  MAINTAINERS | 8 
>  1 file changed, 8 insertions(+)

Thanks, applied to my net-next tree:
https://github.com/stefanha/qemu/commits/net-next

Stefan



Re: [Qemu-devel] [PATCH 1/7] usb: remove old usb-host code

2013-11-08 Thread Jan Kiszka
On 2013-10-09 13:42, Gerd Hoffmann wrote:
>   Hi,
> 
 Only very recent distros fulfill the need of >= 1.0.13, so you naturally
 fall back to this code. I just realized that even the factory build of
 OpenSUSE is still on libusb-1.0.9. Current Ubuntu versions are on 1.0.12
 at best. Didn't check others so far.
>>>
>>> Ouch.  The 1.0.13 release is one year old by now.
>>>
>>> Fedora 19 is at 1.0.16 btw.
>>>
 So isn't this step a bit too early?
>>>
>>> There is always the 'git revert' option in case it turns out there are
>>> too many issues ...
>>
>> So what to do? Do you expect all the other distros to catch up regarding
>> libusb until QEMU 1.7 is released?
> 
> They will update once they figure this is needed for qemu 1.7 :)
> Updates will probably not yet be available at release time though.
> 
> If we revert, thereby continue fallback to the old code, chances are
> high that nothing happens and we'll face the same issue for qemu 1.8.
> 
> Given that there are some known issues in the host-linux code which are
> fixed in host-libusb I really want get rid of it.

OK, then here is the first issue I ran into while trying libusbx (git
head, i.e. 1.0.17+: The new stack causes significant latency issues that
makes it almost unusable for pass-through of USB audio devices (some
headset in my case). Reverting usb-linux and disabling libusb over QEMU
git head makes things work again. I'll have to stick with this for now
as it is affecting my work environment.

Any spontaneous ideas how to analyse or even resolve this?

Jan

-- 
Siemens AG, Corporate Technology, CT RTC ITP SES-DE
Corporate Competence Center Embedded Linux



Re: [Qemu-devel] [PATCH V6 1/5] block/qemu-img: Refine and export infinite loop checking in collect_image_info_list()

2013-11-08 Thread Eric Blake
On 11/08/2013 03:19 AM, Fam Zheng wrote:
> 
>> +BlockDriverState *bs;
>> +BlockDriver *drv;
>> +char fbuf[1024];
> 
> Could use PATH_MAX.

PATH_MAX is undefined on some platforms, and could also be defined to
something larger than a page which could lead to nastiness if you end up
overflowing the stack.  I personally prefer malloc'd buffers rather than
attempting to guess at how large to size things, although the rest of
the code base also has similar caps at 1024 so this isn't making it worse.

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] [PATCH for-1.7 v2] block: Print its file name if backing file opening failed

2013-11-08 Thread Eric Blake
On 11/07/2013 08:26 PM, Fam Zheng wrote:
> If backing file doesn't exist, the error message is confusing and
> misleading:
> 
> $ qemu /tmp/a.qcow2
> qemu: could not open disk image /tmp/a.qcow2: Could not open file: No
> such file or directory
> 

> This is not intuitive. It's better to have the missing file's name in
> the error message. With this patch:
> 
> $ qemu-io -c 'read 0 512' /tmp/a.qcow2
> qemu-io: can't open device /tmp/a.qcow2: Could not open backing
> file: Could not open '/stor/vm/arch.raw': No such file or directory
> no file open, try 'help open'
> 
> Which is a little bit better.
> 
> Signed-off-by: Fam Zheng 
> 
> ---
> v2: Don't leak local_err (Eric).

Thanks.

Reviewed-by: Eric Blake 

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] [PATCH v2] target-ppc: move POWER7+ to a separate family

2013-11-08 Thread Andreas Färber
Am 08.11.2013 03:37, schrieb Alexey Kardashevskiy:
> So far POWER7+ was a part of POWER7 family. However it has a different
> PVR base value so in order to support PVR masks, it needs a separate
> family class.
> 

Alexey,

> Another reason to make a POWER7+ family is that its name in the device
> tree (/proc/device-tree/cpus/cpu*) should be "Power7+" but not "Power7"
> and this cannot be easily fixed without a new family class.
> 
> This adds a new family class, PVR base and mask values and moves
> Power7+ v2.1 CPU to a new family. The class init function is copied
> from the POWER7 family.
> 
> Signed-off-by: Alexey Kardashevskiy 
> ---
> Changes:
> v2:
> * added VSX enable bit
> ---
>  target-ppc/cpu-models.c |  2 +-
>  target-ppc/cpu-models.h |  2 ++
>  target-ppc/translate_init.c | 38 ++
>  3 files changed, 41 insertions(+), 1 deletion(-)
> 
> diff --git a/target-ppc/cpu-models.c b/target-ppc/cpu-models.c
> index 04d88c5..7c9466f 100644
> --- a/target-ppc/cpu-models.c
> +++ b/target-ppc/cpu-models.c
> @@ -1140,7 +1140,7 @@
>  "POWER7 v2.1")
>  POWERPC_DEF("POWER7_v2.3",   CPU_POWERPC_POWER7_v23, POWER7,
>  "POWER7 v2.3")
> -POWERPC_DEF("POWER7+_v2.1",  CPU_POWERPC_POWER7P_v21,POWER7,
> +POWERPC_DEF("POWER7+_v2.1",  CPU_POWERPC_POWER7P_v21,POWER7P,
>  "POWER7+ v2.1")
>  POWERPC_DEF("POWER8_v1.0",   CPU_POWERPC_POWER8_v10, POWER8,
>  "POWER8 v1.0")
> diff --git a/target-ppc/cpu-models.h b/target-ppc/cpu-models.h
> index 731ec4a..49ba4a4 100644
> --- a/target-ppc/cpu-models.h
> +++ b/target-ppc/cpu-models.h
> @@ -558,6 +558,8 @@ enum {
>  CPU_POWERPC_POWER7_v20 = 0x003F0200,
>  CPU_POWERPC_POWER7_v21 = 0x003F0201,
>  CPU_POWERPC_POWER7_v23 = 0x003F0203,
> +CPU_POWERPC_POWER7P_BASE   = 0x004A,
> +CPU_POWERPC_POWER7P_MASK   = 0x,
>  CPU_POWERPC_POWER7P_v21= 0x004A0201,
>  CPU_POWERPC_POWER8_BASE= 0x004B,
>  CPU_POWERPC_POWER8_MASK= 0x,
> diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
> index 35d1389..c030a20 100644
> --- a/target-ppc/translate_init.c
> +++ b/target-ppc/translate_init.c
> @@ -7253,6 +7253,44 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc, void *data)
>  pcc->l1_icache_size = 0x8000;
>  }
>  
> +POWERPC_FAMILY(POWER7P)(ObjectClass *oc, void *data)
> +{
> +DeviceClass *dc = DEVICE_CLASS(oc);
> +PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
> +
> +dc->fw_name = "PowerPC,POWER7+";

Apart from the commit message differing from the code...

We've had this discussion before: Jacques reported that on his POWER7+
box only "POWER7" is shown, not "POWER7+", equivalent to my POWER5+ box
showing only "PowerPC,POWER5". Compare my commit, which documents this:

http://git.qemu.org/?p=qemu.git;a=commit;h=793826cd460828975591f289de78672af4a47ef9

So, adding a POWER7P family seems correct to me, just the fw_name seems
wrong - or you'll need to investigate further why there are conflicting
reports of how it is shown. Possibly based on revision or pHyp vs. SLOF?

Regards,
Andreas

> +dc->desc = "POWER7+";
> +pcc->pvr = CPU_POWERPC_POWER7P_BASE;
> +pcc->pvr_mask = CPU_POWERPC_POWER7P_MASK;
> +pcc->init_proc = init_proc_POWER7;
> +pcc->check_pow = check_pow_nocheck;
> +pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL | PPC_STRING | PPC_MFTB |
> +   PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
> +   PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE |
> +   PPC_FLOAT_STFIWX |
> +   PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
> +   PPC_MEM_SYNC | PPC_MEM_EIEIO |
> +   PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
> +   PPC_64B | PPC_ALTIVEC |
> +   PPC_SEGMENT_64B | PPC_SLBI |
> +   PPC_POPCNTB | PPC_POPCNTWD;
> +pcc->insns_flags2 = PPC2_VSX | PPC2_DFP | PPC2_DBRX | PPC2_ISA205;
> +pcc->msr_mask = 0x8204FF37ULL;
> +pcc->mmu_model = POWERPC_MMU_2_06;
> +#if defined(CONFIG_SOFTMMU)
> +pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault;
> +#endif
> +pcc->excp_model = POWERPC_EXCP_POWER7;
> +pcc->bus_model = PPC_FLAGS_INPUT_POWER7;
> +pcc->bfd_mach = bfd_mach_ppc64;
> +pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
> + POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
> + POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR |
> + POWERPC_FLAG_VSX;
> +pcc->l1_dcache_size = 0x8000;
> +pcc->l1_icache_size = 0x8000;
> +}
> +
>  POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
>  {
>  DeviceClass *dc = DEVICE_CLASS(oc);
> 


-- 
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nü

Re: [Qemu-devel] [PATCH v2] e1000: initial link negotiation on mac osx

2013-11-08 Thread Stefan Hajnoczi
On Fri, Nov 08, 2013 at 12:12:52AM +0100, Alexander Graf wrote:
> Am 07.11.2013 um 21:28 schrieb "Gabriel L. Somlo" :
> 
> > Some guest operating systems' drivers (particularly Mac OS X)
> > expect the link state to be pre-initialized by an earlier
> > component such as a proprietary BIOS. This patch injects
> > additional LSC events upon PHY reset, allowing the OS X driver
> > to successfully complete initial link negotiation. This is a
> > follow-up to commit 372254c6e5c078fb13b236bb648d2b9b2b0c70f1,
> > which works around the OS X driver's failure to properly set
> > up the MAC address.
> > 
> > Signed-off-by: Gabriel Somlo 
> > ---
> > 
> > On Thu, Nov 07, 2013 at 08:28:47PM +0100, Paolo Bonzini wrote:
> >> Is there any way to work around this in the guest?  Such as using a
> >> UEFI driver for e1000 or something like that.
> > 
> > Currently OS X boots on top of SeaBIOS and Chameleon, neither of which
> > know anything about the e1000 hardware. On real hardware, the XNU e1000
> > driver expects the proprietary BIOS to set things up "just right", and
> > doesn't have to bother jumping through all the hoops to properly
> > initialize the hardware from scratch (after all, the XNU driver
> > developers only have to care about a limited range of carefully
> > controlled hardware).
> > 
> > In the VM/guest scenario, QEMU is the only piece that has any knowledge
> > of the e1000 hardware, so having it prep things for the guest would be
> > the path of least resistance. Using a completely different alternative
> > to SeaBIOS (one that would/could assume e1000 is present and would know
> > enough about it to configure it just right) sounds a lot less feasible.
> 
> I'm not sure I agree. We can easily modify SeaBIOS to just loop through all 
> PCI devices, look for an e1000 and initialize it far enough for XNU, no?
> 
> After all, it sounds like that's closer to the way a real Mac works.

I'd much prefer Alex's suggestion so we avoid putting guest-specific
hacks into QEMU.

If there is really no better solution, please make an "extra" behavior
disabled by default and accessible through a device property.  For
example -device e1000,xnu-preinit-hack=on.

Stefan



Re: [Qemu-devel] [PATCH v2 0/2] spapr: add "compat" machine option

2013-11-08 Thread Andreas Färber
Am 08.11.2013 09:22, schrieb Alexey Kardashevskiy:
> On 11/08/2013 01:01 AM, Andreas Färber wrote:
>> Please do separate these global preparations from the actual new ppc
>> property. Elsewhere it was discussed whether to use a readable string
>> value, which might hint at a dynamic property of type string or maybe
>> towards an enum (/me no experience with those yet and whether that works
>> better with dynamic or static).
> 
> 
> Ufff... I did not get the part about a hint. Everything is string
> in the command line :)

Your "compat" and "compat1" properties in the previous patch were of
type int. :)

The question I raised was how to best implement Alex' suggestion of
using literal "power6" rather than 206 as value.

Andreas

-- 
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg



Re: [Qemu-devel] [PATCH] spapr: add "compat" machine option

2013-11-08 Thread Andreas Färber
Am 08.11.2013 09:22, schrieb Alexey Kardashevskiy:
> On 11/08/2013 12:36 AM, Igor Mammedov wrote:
>> On Thu,  7 Nov 2013 20:11:51 +1100
>> Alexey Kardashevskiy  wrote:
>>
>>> On 11/06/2013 12:53 AM, Andreas Färber wrote:> Am 05.11.2013 10:52, schrieb 
>>> Paolo Bonzini:
> Il 05/11/2013 10:16, Alexander Graf ha scritto:
>>
>> On 05.11.2013, at 10:06, Paolo Bonzini  wrote:
>>
>>> Il 30/09/2013 14:57, Alexey Kardashevskiy ha scritto:
>> Why is the option under -machine instead of -cpu?
 Because it is still the same CPU and the guest will still read the real
 PVR from the hardware (which it may not support but this is why we need
 compatibility mode).
>>>
>>> How do you support migration from a newer to an older CPU then?  I think
>>> the guest should never see anything about the hardware CPU model.
>>
>> POWER can't model that. It always leaks the host CPU information into 
>> the guest. It's the guest kernel's responsibility to not expose that 
>> change to user space.
>>
>> Yes, it's broken :). I'm not even sure there is any sensible way to do 
>> live migration between different CPU types.
>
> Still in my opinion it should be "-cpu", not "-machine".  Even if it's
> just a "virtual" CPU model.

 PowerPC currently does not have -cpu option parsing. If you need to
 implement it, I would ask for a generic hook in CPUClass set by
 TYPE_POWERPC_CPU, so that the logic does not get hardcoded in cpu_init,
 and for the p=v parsing logic to be so generic as to just set property p
 to value v on the CPU instance. I.e. please make the compatibility
 settings a static property or dynamic property of the CPU.

 Maybe the parsing code could even live in generic qom/cpu.c, overridden
 by x86/sparc and reused for arm? Somewhere down my to-do list but
 patches appreciated...
>>>
>>>
>>> I spent some time today trying to digest what you said, still having 
>>> problems
>>> with understanding of what you meant and what Igor meant about global 
>>> variables
>>> (I just do not see the point in them).
>>>
>>> Below is the result of my excercise. At the moment I would just like to know
>>> if I am going in the right direction or not.
>>
>> what I've had in mind was a bit simpler and more implicit approach instead of
>> setting properties on each CPU instance explicitly. It could done using
>> existing "global properties" mechanism.
>>
>> in current code -cpu type,foo1=x,foo2=y... are saved into cpu_model string
>> which is parsed by target specific cpu_init() effectively parsing cpu_model
>> each time when creating a CPU.
>>
>> So to avoid fixing every target I suggest to leave cpu_model be as it is and
>>
>> add translation hook that will convert "type,foo1=x,foo2=y..." virtually
>> into a set of following options:
>>  "-global type.foo1=x -global type.foo2=y ..."
>> these options when registered are transparently applied to each new CPU
>> instance (see device_post_init() for details).
>>
>> now why do we need translation hook,
>>  * not every target is able to handle "type,foo1=x,foo2=y" in terms of
>>properties yet
>>  * legacy feature string might be in non canonical format, like
>>"+foo1,-foo2..." so for compatibility reasons with CLI we need target
>>specific code to convert to canonical format when it becomes available.
>>  * for targets that don't have feature string handling and implementing
>>new features as properties we can implement generic default hook that
>>will convert canonical feature string into global properties.
>>
>> as result we eventually would be able drop cpu_model and use global
>> properties to store CPU features.
> 
> 
> What is wrong doing it in the way the "-machine" switch does it now?
> qemu_get_machine_opts() returns a global list which I can iterate through
> via qemu_opt_foreach() and set every property to a CPU, this will check if
> a property exists and assigns it => happy Aik :)

You are happy w/ ppc, but you would make x86 and sparc users unhappy! ;)
QemuOpts does not support the +feature,-feature notation, just [key=]value.

>> see comments below for pseudo code:
>>> And few question while we are here:
>>> 1. the proposed common code handles both static and dynamic properties.
>>> What is the current QEMU trend about choosing static vs. dynamic? Can do
>>> both in POWERPC, both have benifits.
>> I prefer static, since it's usually less boilerplate code.
>>
>>
>> [...]
>>
>>>
>>> diff --git a/include/qom/cpu.h b/include/qom/cpu.h
>>> index 7739e00..a17cd73 100644
>>> --- a/include/qom/cpu.h
>>> +++ b/include/qom/cpu.h
>>> @@ -327,6 +327,12 @@ static inline hwaddr cpu_get_phys_page_debug(CPUState 
>>> *cpu, vaddr addr)
>>>  #endif
>>>  
>>>  /**
>>> + * cpu_common_set_properties:
>>> + * @cpu: The CPU whose properties to initialize from the command line.
>>> + */
>>> +int cpu_common_set_properties(Object *obj);
>>
>> cpu_tra

[Qemu-devel] [PULL v2 12/37] libqtest: rename qmp() to qmp_discard_response()

2013-11-08 Thread Stefan Hajnoczi
Existing qmp() callers do not expect a response object.  In order to
implement real QMP test cases it will be necessary to inspect the
response object.

Rename qmp() to qmp_discard_response().  Later patches will introduce a
qmp() function that returns the response object and tests that use it.

Signed-off-by: Stefan Hajnoczi 
Reviewed-by: Eric Blake 
Reviewed-by: Andreas Färber 
---
 tests/boot-order-test.c |  4 ++--
 tests/fdc-test.c| 15 +--
 tests/ide-test.c| 10 ++
 tests/libqtest.c| 10 +-
 tests/libqtest.h| 20 ++--
 5 files changed, 32 insertions(+), 27 deletions(-)

diff --git a/tests/boot-order-test.c b/tests/boot-order-test.c
index 4b233d0..da158c3 100644
--- a/tests/boot-order-test.c
+++ b/tests/boot-order-test.c
@@ -41,12 +41,12 @@ static void test_a_boot_order(const char *machine,
 qtest_start(args);
 actual = read_boot_order();
 g_assert_cmphex(actual, ==, expected_boot);
-qmp("{ 'execute': 'system_reset' }");
+qmp_discard_response("{ 'execute': 'system_reset' }");
 /*
  * system_reset only requests reset.  We get a RESET event after
  * the actual reset completes.  Need to wait for that.
  */
-qmp("");/* HACK: wait for event */
+qmp_discard_response("");   /* HACK: wait for event */
 actual = read_boot_order();
 g_assert_cmphex(actual, ==, expected_reboot);
 qtest_quit(global_qtest);
diff --git a/tests/fdc-test.c b/tests/fdc-test.c
index fd198dc..38b5b17 100644
--- a/tests/fdc-test.c
+++ b/tests/fdc-test.c
@@ -290,10 +290,12 @@ static void test_media_insert(void)
 
 /* Insert media in drive. DSKCHK should not be reset until a step pulse
  * is sent. */
-qmp("{'execute':'change', 'arguments':{ 'device':'floppy0', "
-"'target': '%s' }}", test_image);
-qmp(""); /* ignore event (FIXME open -> open transition?!) */
-qmp(""); /* ignore event */
+qmp_discard_response("{'execute':'change', 'arguments':{"
+ " 'device':'floppy0', 'target': '%s' }}",
+ test_image);
+qmp_discard_response(""); /* ignore event
+ (FIXME open -> open transition?!) */
+qmp_discard_response(""); /* ignore event */
 
 dir = inb(FLOPPY_BASE + reg_dir);
 assert_bit_set(dir, DSKCHG);
@@ -322,8 +324,9 @@ static void test_media_change(void)
 
 /* Eject the floppy and check that DSKCHG is set. Reading it out doesn't
  * reset the bit. */
-qmp("{'execute':'eject', 'arguments':{ 'device':'floppy0' }}");
-qmp(""); /* ignore event */
+qmp_discard_response("{'execute':'eject', 'arguments':{"
+ " 'device':'floppy0' }}");
+qmp_discard_response(""); /* ignore event */
 
 dir = inb(FLOPPY_BASE + reg_dir);
 assert_bit_set(dir, DSKCHG);
diff --git a/tests/ide-test.c b/tests/ide-test.c
index bc824a8..d5cec5a 100644
--- a/tests/ide-test.c
+++ b/tests/ide-test.c
@@ -460,8 +460,9 @@ static void test_flush(void)
 tmp_path);
 
 /* Delay the completion of the flush request until we explicitly do it */
-qmp("{'execute':'human-monitor-command', 'arguments': { "
-"'command-line': 'qemu-io ide0-hd0 \"break flush_to_os A\"'} }");
+qmp_discard_response("{'execute':'human-monitor-command', 'arguments': {"
+ " 'command-line':"
+ " 'qemu-io ide0-hd0 \"break flush_to_os A\"'} }");
 
 /* FLUSH CACHE command on device 0*/
 outb(IDE_BASE + reg_device, 0);
@@ -473,8 +474,9 @@ static void test_flush(void)
 assert_bit_clear(data, DF | ERR | DRQ);
 
 /* Complete the command */
-qmp("{'execute':'human-monitor-command', 'arguments': { "
-"'command-line': 'qemu-io ide0-hd0 \"resume A\"'} }");
+qmp_discard_response("{'execute':'human-monitor-command', 'arguments': {"
+ " 'command-line':"
+ " 'qemu-io ide0-hd0 \"resume A\"'} }");
 
 /* Check registers */
 data = inb(IDE_BASE + reg_device);
diff --git a/tests/libqtest.c b/tests/libqtest.c
index bb82069..dc4c983 100644
--- a/tests/libqtest.c
+++ b/tests/libqtest.c
@@ -151,8 +151,8 @@ QTestState *qtest_init(const char *extra_args)
 }
 
 /* Read the QMP greeting and then do the handshake */
-qtest_qmp(s, "");
-qtest_qmp(s, "{ 'execute': 'qmp_capabilities' }");
+qtest_qmp_discard_response(s, "");
+qtest_qmp_discard_response(s, "{ 'execute': 'qmp_capabilities' }");
 
 if (getenv("QTEST_STOP")) {
 kill(qtest_qemu_pid(s), SIGSTOP);
@@ -291,7 +291,7 @@ redo:
 return words;
 }
 
-void qtest_qmpv(QTestState *s, const char *fmt, va_list ap)
+void qtest_qmpv_discard_response(QTestState *s, const char *fmt, va_list ap)
 {
 bool has_reply = false;
 int nesting = 0;
@@ -326,12 +326,12 @@ void qtest_qmpv(QTestState *s, const char *fmt, va_list 
ap)
 }
 }
 
-void qtest_qmp(QTestState *s, const char *f

[Qemu-devel] [PULL v2 14/37] blockdev-test: add test case for drive_add duplicate IDs

2013-11-08 Thread Stefan Hajnoczi
The following should work:

  (qemu) drive_add if=none,id=drive0
  (qemu) drive_del drive0
  (qemu) drive_add if=none,id=drive0

Previous versions of QEMU produced a duplicate ID error because
drive_add leaked the options.

Signed-off-by: Stefan Hajnoczi 
Reviewed-by: Eric Blake 
---
 tests/Makefile|  2 ++
 tests/blockdev-test.c | 59 +++
 2 files changed, 61 insertions(+)
 create mode 100644 tests/blockdev-test.c

diff --git a/tests/Makefile b/tests/Makefile
index f414f2c..973f497 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -68,6 +68,7 @@ check-qtest-i386-y += tests/rtc-test$(EXESUF)
 check-qtest-i386-y += tests/i440fx-test$(EXESUF)
 check-qtest-i386-y += tests/fw_cfg-test$(EXESUF)
 check-qtest-i386-y += tests/qom-test$(EXESUF)
+check-qtest-i386-y += tests/blockdev-test$(EXESUF)
 check-qtest-x86_64-y = $(check-qtest-i386-y)
 gcov-files-i386-y += i386-softmmu/hw/mc146818rtc.c
 gcov-files-x86_64-y = $(subst 
i386-softmmu/,x86_64-softmmu/,$(gcov-files-i386-y))
@@ -200,6 +201,7 @@ tests/tmp105-test$(EXESUF): tests/tmp105-test.o 
$(libqos-omap-obj-y)
 tests/i440fx-test$(EXESUF): tests/i440fx-test.o $(libqos-pc-obj-y)
 tests/fw_cfg-test$(EXESUF): tests/fw_cfg-test.o $(libqos-pc-obj-y)
 tests/qom-test$(EXESUF): tests/qom-test.o
+tests/blockdev-test$(EXESUF): tests/blockdev-test.o $(libqos-pc-obj-y)
 tests/qemu-iotests/socket_scm_helper$(EXESUF): 
tests/qemu-iotests/socket_scm_helper.o
 
 # QTest rules
diff --git a/tests/blockdev-test.c b/tests/blockdev-test.c
new file mode 100644
index 000..c940e00
--- /dev/null
+++ b/tests/blockdev-test.c
@@ -0,0 +1,59 @@
+/*
+ * blockdev.c test cases
+ *
+ * Copyright (C) 2013 Red Hat Inc.
+ *
+ * Authors:
+ *  Stefan Hajnoczi 
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#include 
+#include 
+#include "libqtest.h"
+
+static void test_drive_add_empty(void)
+{
+QDict *response;
+const char *response_return;
+
+/* Start with an empty drive */
+qtest_start("-drive if=none,id=drive0");
+
+/* Delete the drive */
+response = qmp("{\"execute\": \"human-monitor-command\","
+   " \"arguments\": {"
+   "   \"command-line\": \"drive_del drive0\""
+   "}}");
+g_assert(response);
+response_return = qdict_get_try_str(response, "return");
+g_assert(response_return);
+g_assert(strcmp(response_return, "") == 0);
+QDECREF(response);
+
+/* Ensure re-adding the drive works - there should be no duplicate ID error
+ * because the old drive must be gone.
+ */
+response = qmp("{\"execute\": \"human-monitor-command\","
+   " \"arguments\": {"
+   "   \"command-line\": \"drive_add 0 if=none,id=drive0\""
+   "}}");
+g_assert(response);
+response_return = qdict_get_try_str(response, "return");
+g_assert(response_return);
+g_assert(strcmp(response_return, "OK\r\n") == 0);
+QDECREF(response);
+
+qtest_end();
+}
+
+int main(int argc, char **argv)
+{
+g_test_init(&argc, &argv, NULL);
+
+qtest_add_func("/qmp/drive_add_empty", test_drive_add_empty);
+
+return g_test_run();
+}
-- 
1.8.3.1




[Qemu-devel] [PULL v2 11/37] blockdev: fix drive_init() opts and bs_opts leaks

2013-11-08 Thread Stefan Hajnoczi
These memory leaks also make drive_add if=none,id=drive0 without a file=
option leak the options list.  This keeps ID "drive0" around forever.

Signed-off-by: Stefan Hajnoczi 
Reviewed-by: Eric Blake 
---
 blockdev.c | 27 +++
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index b260477..86e6bff 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -341,7 +341,7 @@ static DriveInfo *blockdev_init(QDict *bs_opts,
 qemu_opts_absorb_qdict(opts, bs_opts, &error);
 if (error_is_set(&error)) {
 error_propagate(errp, error);
-return NULL;
+goto early_err;
 }
 
 if (id) {
@@ -361,7 +361,7 @@ static DriveInfo *blockdev_init(QDict *bs_opts,
 if ((buf = qemu_opt_get(opts, "discard")) != NULL) {
 if (bdrv_parse_discard_flags(buf, &bdrv_flags) != 0) {
 error_setg(errp, "invalid discard option");
-return NULL;
+goto early_err;
 }
 }
 
@@ -383,7 +383,7 @@ static DriveInfo *blockdev_init(QDict *bs_opts,
 /* this is the default */
 } else {
error_setg(errp, "invalid aio option");
-   return NULL;
+   goto early_err;
 }
 }
 #endif
@@ -393,13 +393,13 @@ static DriveInfo *blockdev_init(QDict *bs_opts,
 error_printf("Supported formats:");
 bdrv_iterate_format(bdrv_format_print, NULL);
 error_printf("\n");
-return NULL;
+goto early_err;
 }
 
 drv = bdrv_find_format(buf);
 if (!drv) {
 error_setg(errp, "'%s' invalid format", buf);
-return NULL;
+goto early_err;
 }
 }
 
@@ -435,20 +435,20 @@ static DriveInfo *blockdev_init(QDict *bs_opts,
 
 if (!check_throttle_config(&cfg, &error)) {
 error_propagate(errp, error);
-return NULL;
+goto early_err;
 }
 
 on_write_error = BLOCKDEV_ON_ERROR_ENOSPC;
 if ((buf = qemu_opt_get(opts, "werror")) != NULL) {
 if (type != IF_IDE && type != IF_SCSI && type != IF_VIRTIO && type != 
IF_NONE) {
 error_setg(errp, "werror is not supported by this bus type");
-return NULL;
+goto early_err;
 }
 
 on_write_error = parse_block_error_action(buf, 0, &error);
 if (error_is_set(&error)) {
 error_propagate(errp, error);
-return NULL;
+goto early_err;
 }
 }
 
@@ -456,13 +456,13 @@ static DriveInfo *blockdev_init(QDict *bs_opts,
 if ((buf = qemu_opt_get(opts, "rerror")) != NULL) {
 if (type != IF_IDE && type != IF_VIRTIO && type != IF_SCSI && type != 
IF_NONE) {
 error_report("rerror is not supported by this bus type");
-return NULL;
+goto early_err;
 }
 
 on_read_error = parse_block_error_action(buf, 1, &error);
 if (error_is_set(&error)) {
 error_propagate(errp, error);
-return NULL;
+goto early_err;
 }
 }
 
@@ -491,6 +491,8 @@ static DriveInfo *blockdev_init(QDict *bs_opts,
 if (has_driver_specific_opts) {
 file = NULL;
 } else {
+QDECREF(bs_opts);
+qemu_opts_del(opts);
 return dinfo;
 }
 }
@@ -529,12 +531,13 @@ static DriveInfo *blockdev_init(QDict *bs_opts,
 return dinfo;
 
 err:
-qemu_opts_del(opts);
-QDECREF(bs_opts);
 bdrv_unref(dinfo->bdrv);
 g_free(dinfo->id);
 QTAILQ_REMOVE(&drives, dinfo, next);
 g_free(dinfo);
+early_err:
+QDECREF(bs_opts);
+qemu_opts_del(opts);
 return NULL;
 }
 
-- 
1.8.3.1




Re: [Qemu-devel] virtio-net performance on mach-virt.

2013-11-08 Thread Giridhar Maruthy
Hi Paolo,

Many apologies to you, I did not reply to you earlier. I somehow
missed your email.

The command I used was

sudo qemu-system-arm -enable-kvm -kernel zImage -machine type=virt
-display none -cpu cortex-a15 -m 512 -append 'console=ttyAMA0
root=/dev/vda ip=192.168.42.24:192.168.42.1:192.168.42.1:255.255.255.0:::off
rw' -serial stdio -drive file=/dev/sdb2,if=none,cache=writeback,id=foo
-device virtio-blk-device,drive=foo -netdev tap,id=br0,vhost=on
-device virtio-net-device,netdev=br0

Also, I had missed out on this important message among the huge kernel logs
"qemu-system-arm: unable to start vhost net: 38: falling back on
userspace virtio"

I observed that the cause is that set_guest_notifiers is not
implemented in virtio-mmio in qemu.
So I guess either this has to be implemented in virtio-mmio or to use
virtio-pci to move further.

Thanks,
Giridhar


On 31 October 2013 15:19, Paolo Bonzini  wrote:
> Il 31/10/2013 04:32, Giridhar Maruthy ha scritto:
>> On 30 October 2013 22:12, Paolo Bonzini  wrote:
>>> Il 30/10/2013 15:40, Giridhar Maruthy ha scritto:
 Hi All,

 I tried to measure the network performance of guest (mach-virt) with
 virtio-net on an ARM host platform (Samsung exynos). The qemu version
 is 1.6.50.

 I found that with 1GbE NIC on the host, the host iperf gave a speed of
 847Mbits/sec when a local dhcp server was used as a iperf server.

 But the guest gave a speed of 478Mbits/sec which is around 56% compared to 
 host.

 What is the typical guest network efficiency compared to host with 
 virtio-net?
 Any ideas would be helpful.
>>>
>>> Were you using vhost?
>>
>> Yes, I did use vhost in the host kernel configuration and passed
>> vhost=on in qemu, but there is no difference in the performance of
>> guest.
>
> What's your command line?
>
> Paolo
>



[Qemu-devel] [PULL v2 19/37] block: vhdx - add header update capability.

2013-11-08 Thread Stefan Hajnoczi
From: Jeff Cody 

This adds the ability to update the headers in a VHDX image, including
generating a new MS-compatible GUID.

As VHDX depends on uuid.h, VHDX is now a configurable build option.  If
VHDX support is enabled, that will also enable uuid as well.  The
default is to have VHDX enabled.

To enable/disable VHDX:  --enable-vhdx, --disable-vhdx

Signed-off-by: Jeff Cody 
Signed-off-by: Stefan Hajnoczi 
---
 block/Makefile.objs |   2 +-
 block/vhdx.c| 161 +++-
 block/vhdx.h|  14 -
 configure   |  24 
 4 files changed, 196 insertions(+), 5 deletions(-)

diff --git a/block/Makefile.objs b/block/Makefile.objs
index 3bb85b5..e7214de 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -2,7 +2,7 @@ block-obj-y += raw_bsd.o cow.o qcow.o vdi.o vmdk.o cloop.o 
dmg.o bochs.o vpc.o v
 block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o 
qcow2-cache.o
 block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-obj-y += qed-check.o
-block-obj-y += vhdx.o
+block-obj-$(CONFIG_VHDX) += vhdx.o
 block-obj-y += parallels.o blkdebug.o blkverify.o
 block-obj-y += snapshot.o qapi.o
 block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
diff --git a/block/vhdx.c b/block/vhdx.c
index b497c27..7b94c42 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -22,6 +22,7 @@
 #include "block/vhdx.h"
 #include "migration/migration.h"
 
+#include 
 
 /* Several metadata and region table data entries are identified by
  * guids in  a MS-specific GUID format. */
@@ -157,12 +158,41 @@ typedef struct BDRVVHDXState {
 VHDXBatEntry *bat;
 uint64_t bat_offset;
 
+MSGUID session_guid;
+
+
 VHDXParentLocatorHeader parent_header;
 VHDXParentLocatorEntry *parent_entries;
 
 Error *migration_blocker;
 } BDRVVHDXState;
 
+/* Calculates new checksum.
+ *
+ * Zero is substituted during crc calculation for the original crc field
+ * crc_offset: byte offset in buf of the buffer crc
+ * buf: buffer pointer
+ * size: size of buffer (must be > crc_offset+4)
+ *
+ * Note: The resulting checksum is in the CPU endianness, not necessarily
+ *   in the file format endianness (LE).  Any header export to disk should
+ *   make sure that vhdx_header_le_export() is used to convert to the
+ *   correct endianness
+ */
+uint32_t vhdx_update_checksum(uint8_t *buf, size_t size, int crc_offset)
+{
+uint32_t crc;
+
+assert(buf != NULL);
+assert(size > (crc_offset + sizeof(crc)));
+
+memset(buf + crc_offset, 0, sizeof(crc));
+crc =  crc32c(0x, buf, size);
+memcpy(buf + crc_offset, &crc, sizeof(crc));
+
+return crc;
+}
+
 uint32_t vhdx_checksum_calc(uint32_t crc, uint8_t *buf, size_t size,
 int crc_offset)
 {
@@ -214,6 +244,19 @@ bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int 
crc_offset)
 
 
 /*
+ * This generates a UUID that is compliant with the MS GUIDs used
+ * in the VHDX spec (and elsewhere).
+ */
+void vhdx_guid_generate(MSGUID *guid)
+{
+uuid_t uuid;
+assert(guid != NULL);
+
+uuid_generate(uuid);
+memcpy(guid, uuid, sizeof(MSGUID));
+}
+
+/*
  * Per the MS VHDX Specification, for every VHDX file:
  *  - The header section is fixed size - 1 MB
  *  - The header section is always the first "object"
@@ -251,6 +294,113 @@ static void vhdx_header_le_import(VHDXHeader *h)
 le64_to_cpus(&h->log_offset);
 }
 
+/* All VHDX structures on disk are little endian */
+static void vhdx_header_le_export(VHDXHeader *orig_h, VHDXHeader *new_h)
+{
+assert(orig_h != NULL);
+assert(new_h != NULL);
+
+new_h->signature   = cpu_to_le32(orig_h->signature);
+new_h->checksum= cpu_to_le32(orig_h->checksum);
+new_h->sequence_number = cpu_to_le64(orig_h->sequence_number);
+
+new_h->file_write_guid = orig_h->file_write_guid;
+new_h->data_write_guid = orig_h->data_write_guid;
+new_h->log_guid= orig_h->log_guid;
+
+cpu_to_leguids(&new_h->file_write_guid);
+cpu_to_leguids(&new_h->data_write_guid);
+cpu_to_leguids(&new_h->log_guid);
+
+new_h->log_version = cpu_to_le16(orig_h->log_version);
+new_h->version = cpu_to_le16(orig_h->version);
+new_h->log_length  = cpu_to_le32(orig_h->log_length);
+new_h->log_offset  = cpu_to_le64(orig_h->log_offset);
+}
+
+/* Update the VHDX headers
+ *
+ * This follows the VHDX spec procedures for header updates.
+ *
+ *  - non-current header is updated with largest sequence number
+ */
+static int vhdx_update_header(BlockDriverState *bs, BDRVVHDXState *s,
+  bool generate_data_write_guid)
+{
+int ret = 0;
+int hdr_idx = 0;
+uint64_t header_offset = VHDX_HEADER1_OFFSET;
+
+VHDXHeader *active_header;
+VHDXHeader *inactive_header;
+VHDXHeader header_le;
+uint8_t *buffer;
+
+/* operate on the non-current header */
+if (s->curr_header == 0) {
+

[Qemu-devel] [PULL v2 17/37] block/vpc: fix virtual size for images created with disk2vhd

2013-11-08 Thread Stefan Hajnoczi
From: Peter Lieven 

Signed-off-by: Peter Lieven 
Signed-off-by: Stefan Hajnoczi 
---
 block/vpc.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/block/vpc.c b/block/vpc.c
index 627d11c..577cc45 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -211,6 +211,15 @@ static int vpc_open(BlockDriverState *bs, QDict *options, 
int flags,
 bs->total_sectors = (int64_t)
 be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl;
 
+/* images created with disk2vhd report a far higher virtual size
+ * than expected with the cyls * heads * sectors_per_cyl formula.
+ * use the footer->size instead if the image was created with
+ * disk2vhd.
+ */
+if (!strncmp(footer->creator_app, "d2v", 4)) {
+bs->total_sectors = be64_to_cpu(footer->size) / BDRV_SECTOR_SIZE;
+}
+
 /* Allow a maximum disk size of approximately 2 TB */
 if (bs->total_sectors >= 65535LL * 255 * 255) {
 ret = -EFBIG;
-- 
1.8.3.1




Re: [Qemu-devel] [PATCH v3] Fix pc migration from qemu <= 1.5

2013-11-08 Thread David Gibson
On Tue, Nov 05, 2013 at 06:46:27PM -0500, Cole Robinson wrote:
> The following commit introduced a migration incompatibility:
> 
> commit 568f0690fd9aa4d39d84b04c1a5dbb53a915c3fe
> Author: David Gibson 
> Date:   Thu Jun 6 18:48:49 2013 +1000
> 
> pci: Replace pci_find_domain() with more general pci_root_bus_path()
> 
> The issue is that i440fx savevm idstr went from :00:00.0/I440FX to
> :00.0/I440FX. Unfortunately we are stuck with the breakage for
> 1.6 machine types.
> 
> Add a compat property to maintain the busted idstr for the 1.6 machine
> types, but revert to the old style format for 1.7+, and <= 1.5.
> 
> Tested with migration from qemu 1.5, qemu 1.6, and qemu.git.

Bother.  I did think about migration compatibility for x86, but
obviously not quite hard enough :(.  Sorry, folks.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


pgpEv5G4stZDD.pgp
Description: PGP signature


[Qemu-devel] reverting commit a53ae8e934cd54686875b5bcfc2f434244ee55d6 Re: [edk2] [PATCH 0/2] Re: exec: fix regression by making system-memory region UINT64_MAX size

2013-11-08 Thread Paolo Bonzini
Il 07/11/2013 23:23, Laszlo Ersek ha scritto:
> On 11/07/13 22:24, Marcel Apfelbaum wrote:
>> Why pci-hole and system.flash collide? IMHO we should not play with
>> priorities here, better solve the collision.
> 
> What about this "beautiful" series? It produces
> 
> memory
> -000f (prio 0, RW): system
>   [...]
>   6000-ffdf (prio 0, RW): alias pci-hole @pci
>   6000-ffdf
>   [...]
>   ffe0- (prio 0, R-): system.flash
> 
> and I can run OVMF with it. It also stays within i386/pc.

This definitely works, and make sense.

But I think for 1.7 we should just revert the commit.  It can be done
later in 1.8.  And it should have a qtest testcase that shows the effect
of the patch.

Other patches can still be applied to 1.7, but the change definitely had
much bigger ramifications than anticipated.  The patches are

[PATCH for-1.7 v2 5/8] pci: fix address space size for bridge
[PATCH for-1.7 v2 7/8] pc: s/INT64_MAX/UINT64_MAX/
[PATCH for-1.7 v2 8/8] spapr_pci: s/INT64_MAX/UINT64_MAX/

There's also

[PATCH 1/2] split definitions for exec.c and translate-all.c radix trees
[PATCH 2/2] exec: make address spaces 64-bit wide

which however has a 2% perf hit for TCG.  In 1.8 we can apply it and
recover the hit with other optimizations.

Paolo




Re: [Qemu-devel] [PATCH] exec: fix regression by making system-memory region UINT64_MAX size

2013-11-08 Thread Paolo Bonzini
Il 08/11/2013 11:44, Peter Maydell ha scritto:
> On 8 November 2013 08:05, Paolo Bonzini  wrote:
>> Il 07/11/2013 22:51, Peter Maydell ha scritto:
> 1. Not all architectures have the behavior: "Address space that is not 
> RAM(and friends)
> is for sure PCI". Only x86 behaves like this (I think).
>>>
>>> More specifically, the x86 pc behaves like this. Other
>>> x86 based systems could in theory behave differently
>>> (not that we actually model any, I think).
>>
>> After Marcel's patch, we have changed behavior for at least all boards
>> that pass get_system_memory() to pci_register_bus or pci_bus_new:
>>
>> * mips/gt64xxx_pci.c
>>
>> * pci-host/bonito.c
>>
>> * pci-host/ppce500.c
>>
>> * ppc/ppc4xx_pci.c
>>
>> * sh4/sh_pci.c
> 
> Oh, right. Ideally those boards should not do that (I fixed
> the versatile pci controller not to do that a while back) but
> it's a long standing behaviour so it would be better had we
> not broken it.
> 
> I think it should in general be fixable by just having those
> pci controllers create an empty memory region to pass in,
> like versatile:
> 
> memory_region_init(&s->pci_io_space, OBJECT(s), "pci_io", 1ULL << 32);
> memory_region_init(&s->pci_mem_space, OBJECT(s), "pci_mem", 1ULL << 32);
> 
> pci_bus_new_inplace(&s->pci_bus, sizeof(s->pci_bus), DEVICE(obj), "pci",
> &s->pci_mem_space, &s->pci_io_space,
> PCI_DEVFN(11, 0), TYPE_PCI_BUS);
> 
> That doesn't affect where PCI DMA goes. It might also
> require mapping an alias into that region at wherever
> the pci hole is on those boards.
> 
> Kind of awkward to do and test at this point in the release cycle though.

Yes, for now we should just revert the patch.

BTW I tried optimizing MMIO dispatch and managed to save ~30 cycles so I
don't think we should be afraid of increasing the depth of the radix
tree.  All stuff for 1.8 though.

Paolo

Paolo



[Qemu-devel] [PULL v2 02/37] qemu-iotests: Filter out actual image size in 067

2013-11-08 Thread Stefan Hajnoczi
From: Max Reitz 

The actual size of the image file may differ depending on the Linux
kernel currently running on the host. Filtering out this value makes
this test pass in such cases.

Signed-off-by: Max Reitz 
Reviewed-by: Benoit Canet 
Signed-off-by: Stefan Hajnoczi 
---
 tests/qemu-iotests/067 |  2 +-
 tests/qemu-iotests/067.out | 10 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/qemu-iotests/067 b/tests/qemu-iotests/067
index 79dc38b..d025192 100755
--- a/tests/qemu-iotests/067
+++ b/tests/qemu-iotests/067
@@ -45,7 +45,7 @@ function do_run_qemu()
 
 function run_qemu()
 {
-do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qmp
+do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qmp | sed -e 
's/\("actual-size":\s*\)[0-9]\+/\1SIZE/g'
 }
 
 size=128M
diff --git a/tests/qemu-iotests/067.out b/tests/qemu-iotests/067.out
index 4bb9ff9..8d271cc 100644
--- a/tests/qemu-iotests/067.out
+++ b/tests/qemu-iotests/067.out
@@ -6,7 +6,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728
 Testing: -drive file=TEST_DIR/t.qcow2,format=qcow2,if=none,id=disk -device 
virtio-blk-pci,drive=disk,id=virtio0
 QMP_VERSION
 {"return": {}}
-{"return": [{"io-status": "ok", "device": "disk", "locked": false, 
"removable": false, "inserted": {"iops_rd": 0, "image": {"virtual-size": 
134217728, "filename": "TEST_DIR/t.qcow2", "cluster-size": 65536, "format": 
"qcow2", "actual-size": 139264, "format-specific": {"type": "qcow2", "data": 
{"compat": "1.1", "lazy-refcounts": false}}, "dirty-flag": false}, "iops_wr": 
0, "ro": false, "backing_file_depth": 0, "drv": "qcow2", "iops": 0, "bps_wr": 
0, "encrypted": false, "bps": 0, "bps_rd": 0, "file": "TEST_DIR/t.qcow2", 
"encryption_key_missing": false}, "type": "unknown"}, {"io-status": "ok", 
"device": "ide1-cd0", "locked": false, "removable": true, "tray_open": false, 
"type": "unknown"}, {"device": "floppy0", "locked": false, "removable": true, 
"tray_open": false, "type": "unknown"}, {"device": "sd0", "locked": false, 
"removable": true, "tray_open": false, "type": "unknown"}]}
+{"return": [{"io-status": "ok", "device": "disk", "locked": false, 
"removable": false, "inserted": {"iops_rd": 0, "image": {"virtual-size": 
134217728, "filename": "TEST_DIR/t.qcow2", "cluster-size": 65536, "format": 
"qcow2", "actual-size": SIZE, "format-specific": {"type": "qcow2", "data": 
{"compat": "1.1", "lazy-refcounts": false}}, "dirty-flag": false}, "iops_wr": 
0, "ro": false, "backing_file_depth": 0, "drv": "qcow2", "iops": 0, "bps_wr": 
0, "encrypted": false, "bps": 0, "bps_rd": 0, "file": "TEST_DIR/t.qcow2", 
"encryption_key_missing": false}, "type": "unknown"}, {"io-status": "ok", 
"device": "ide1-cd0", "locked": false, "removable": true, "tray_open": false, 
"type": "unknown"}, {"device": "floppy0", "locked": false, "removable": true, 
"tray_open": false, "type": "unknown"}, {"device": "sd0", "locked": false, 
"removable": true, "tray_open": false, "type": "unknown"}]}
 {"return": {}}
 {"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": 
"DEVICE_DELETED", "data": {"path": 
"/machine/peripheral/virtio0/virtio-backend"}}
@@ -24,7 +24,7 @@ QMP_VERSION
 Testing: -drive file=TEST_DIR/t.qcow2,format=qcow2,if=none,id=disk
 QMP_VERSION
 {"return": {}}
-{"return": [{"device": "disk", "locked": false, "removable": true, "inserted": 
{"iops_rd": 0, "image": {"virtual-size": 134217728, "filename": 
"TEST_DIR/t.qcow2", "cluster-size": 65536, "format": "qcow2", "actual-size": 
139264, "format-specific": {"type": "qcow2", "data": {"compat": "1.1", 
"lazy-refcounts": false}}, "dirty-flag": false}, "iops_wr": 0, "ro": false, 
"backing_file_depth": 0, "drv": "qcow2", "iops": 0, "bps_wr": 0, "encrypted": 
false, "bps": 0, "bps_rd": 0, "file": "TEST_DIR/t.qcow2", 
"encryption_key_missing": false}, "tray_open": false, "type": "unknown"}, 
{"io-status": "ok", "device": "ide1-cd0", "locked": false, "removable": true, 
"tray_open": false, "type": "unknown"}, {"device": "floppy0", "locked": false, 
"removable": true, "tray_open": false, "type": "unknown"}, {"device": "sd0", 
"locked": false, "removable": true, "tray_open": false, "type": "unknown"}]}
+{"return": [{"device": "disk", "locked": false, "removable": true, "inserted": 
{"iops_rd": 0, "image": {"virtual-size": 134217728, "filename": 
"TEST_DIR/t.qcow2", "cluster-size": 65536, "format": "qcow2", "actual-size": 
SIZE, "format-specific": {"type": "qcow2", "data": {"compat": "1.1", 
"lazy-refcounts": false}}, "dirty-flag": false}, "iops_wr": 0, "ro": false, 
"backing_file_depth": 0, "drv": "qcow2", "iops": 0, "bps_wr": 0, "encrypted": 
false, "bps": 0, "bps_rd": 0, "file": "TEST_DIR/t.qcow2", 
"encryption_key_missing": false}, "tray_open": false, "type": "unknown"}, 
{"io-status": "ok", "device": "ide1-cd0", "locked": false, "removable": true, 
"tray_open": false, "type": "unknown"}, {"device": "floppy0", "locked": false, 
"removable": true, "tray_open": false, "type": "

[Qemu-devel] [PULL v2 32/37] block: vhdx - fix comment typos in header, fix incorrect struct fields

2013-11-08 Thread Stefan Hajnoczi
From: Jeff Cody 

VHDXPage83Data and VHDXParentLocatorHeader both incorrectly had their
MSGUID fields set as arrays of 16.  This is incorrect (it stems from
an early version where those fields were uint_8 arrays).  Those fields
were, up to this patch, unused.

Also, there were a couple of typos and incorrect wording in comments,
and those have been fixed up as well.

Signed-off-by: Jeff Cody 
Signed-off-by: Stefan Hajnoczi 
---
 block/vhdx.h | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/block/vhdx.h b/block/vhdx.h
index 15486c7..f222d18 100644
--- a/block/vhdx.h
+++ b/block/vhdx.h
@@ -58,7 +58,7 @@
 typedef struct VHDXFileIdentifier {
 uint64_tsignature;  /* "vhdxfile" in ASCII */
 uint16_tcreator[256];   /* optional; utf-16 string to identify
-   the vhdx file creator.  Diagnotistic
+   the vhdx file creator.  Diagnostic
only */
 } VHDXFileIdentifier;
 
@@ -114,8 +114,8 @@ typedef struct QEMU_PACKED VHDXHeader {
there is no valid log. If non-zero,
log entries with this guid are
valid. */
-uint16_tlog_version;/* version of the log format. Mustn't 
be
-   zero, unless log_guid is also zero 
*/
+uint16_tlog_version;/* version of the log format. Must be
+   set to zero */
 uint16_tversion;/* version of the vhdx file.  
Currently,
only supported version is "1" */
 uint32_tlog_length; /* length of the log.  Must be multiple
@@ -281,7 +281,7 @@ typedef struct QEMU_PACKED VHDXVirtualDiskSize {
 } VHDXVirtualDiskSize;
 
 typedef struct QEMU_PACKED VHDXPage83Data {
-MSGUID  page_83_data[16];   /* unique id for scsi devices that
+MSGUID  page_83_data;   /* unique id for scsi devices that
support page 0x83 */
 } VHDXPage83Data;
 
@@ -296,7 +296,7 @@ typedef struct QEMU_PACKED 
VHDXVirtualDiskPhysicalSectorSize {
 } VHDXVirtualDiskPhysicalSectorSize;
 
 typedef struct QEMU_PACKED VHDXParentLocatorHeader {
-MSGUID  locator_type[16];   /* type of the parent virtual disk. */
+MSGUID  locator_type;   /* type of the parent virtual disk. */
 uint16_treserved;
 uint16_tkey_value_count;/* number of key/value pairs for this
locator */
-- 
1.8.3.1




[Qemu-devel] [PULL v2 24/37] block: vhdx code movement - move vhdx_close() above vhdx_open()

2013-11-08 Thread Stefan Hajnoczi
From: Jeff Cody 

Signed-off-by: Jeff Cody 
Signed-off-by: Stefan Hajnoczi 
---
 block/vhdx.c | 22 +++---
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/block/vhdx.c b/block/vhdx.c
index 241703a..3f06ce3 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -778,6 +778,17 @@ exit:
 }
 
 
+static void vhdx_close(BlockDriverState *bs)
+{
+BDRVVHDXState *s = bs->opaque;
+qemu_vfree(s->headers[0]);
+qemu_vfree(s->headers[1]);
+qemu_vfree(s->bat);
+qemu_vfree(s->parent_entries);
+migrate_del_blocker(s->migration_blocker);
+error_free(s->migration_blocker);
+}
+
 static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
  Error **errp)
 {
@@ -1035,17 +1046,6 @@ static coroutine_fn int vhdx_co_writev(BlockDriverState 
*bs, int64_t sector_num,
 }
 
 
-static void vhdx_close(BlockDriverState *bs)
-{
-BDRVVHDXState *s = bs->opaque;
-qemu_vfree(s->headers[0]);
-qemu_vfree(s->headers[1]);
-qemu_vfree(s->bat);
-qemu_vfree(s->parent_entries);
-migrate_del_blocker(s->migration_blocker);
-error_free(s->migration_blocker);
-}
-
 static BlockDriver bdrv_vhdx = {
 .format_name= "vhdx",
 .instance_size  = sizeof(BDRVVHDXState),
-- 
1.8.3.1




Re: [Qemu-devel] [PATCH] exec: fix regression by making system-memory region UINT64_MAX size

2013-11-08 Thread Peter Maydell
On 8 November 2013 08:05, Paolo Bonzini  wrote:
> Il 07/11/2013 22:51, Peter Maydell ha scritto:
>>> > 1. Not all architectures have the behavior: "Address space that is not 
>>> > RAM(and friends)
>>> > is for sure PCI". Only x86 behaves like this (I think).
>>
>> More specifically, the x86 pc behaves like this. Other
>> x86 based systems could in theory behave differently
>> (not that we actually model any, I think).
>
> After Marcel's patch, we have changed behavior for at least all boards
> that pass get_system_memory() to pci_register_bus or pci_bus_new:
>
> * mips/gt64xxx_pci.c
>
> * pci-host/bonito.c
>
> * pci-host/ppce500.c
>
> * ppc/ppc4xx_pci.c
>
> * sh4/sh_pci.c

Oh, right. Ideally those boards should not do that (I fixed
the versatile pci controller not to do that a while back) but
it's a long standing behaviour so it would be better had we
not broken it.

I think it should in general be fixable by just having those
pci controllers create an empty memory region to pass in,
like versatile:

memory_region_init(&s->pci_io_space, OBJECT(s), "pci_io", 1ULL << 32);
memory_region_init(&s->pci_mem_space, OBJECT(s), "pci_mem", 1ULL << 32);

pci_bus_new_inplace(&s->pci_bus, sizeof(s->pci_bus), DEVICE(obj), "pci",
&s->pci_mem_space, &s->pci_io_space,
PCI_DEVFN(11, 0), TYPE_PCI_BUS);

That doesn't affect where PCI DMA goes. It might also
require mapping an alias into that region at wherever
the pci hole is on those boards.

Kind of awkward to do and test at this point in the release cycle though.

> These now will not go anymore through unassigned_mem_ops, which is a
> behavioral change for MIPS boards (gt64xxx_pci and bonito) at least.
>
> Furthermore, the default behavior of the memory API _is_ read
> all-ones/ignore writes, so I'm not sure what's the benefit of adding a
> separate region for master abort...

It gives you a place set the appropriate PCI controller or device
register status bits on abort by a PCI device access, IIRC.

-- PMM



Re: [Qemu-devel] [PATCH V6 5/5] blockdev: Add infinite loop check in drive_init()

2013-11-08 Thread Fam Zheng
On Tue, 11/05 22:09, Xu Wang wrote:
> Check the backing file for a loop during image boot, to avoid a lack or
> response or segfault.
> 
> Signed-off-by: Xu Wang 
> ---
>  blockdev.c | 6 ++
>  1 file changed, 6 insertions(+)
> 
> diff --git a/blockdev.c b/blockdev.c
> index b260477..7c0927f 100644
> --- a/blockdev.c
> +++ b/blockdev.c
> @@ -510,6 +510,12 @@ static DriveInfo *blockdev_init(QDict *bs_opts,
>  
>  bdrv_flags |= ro ? 0 : BDRV_O_RDWR;
>  
> +/* Add backing file loop check */
> +if (!bdrv_backing_chain_okay(file, drv ? drv->format_name : NULL)) {
> +error_report("drive_init: backing file loop check failed");

Please use error_setg("...");

Fam

> +goto err;
> +}
> +
>  QINCREF(bs_opts);
>  ret = bdrv_open(dinfo->bdrv, file, bs_opts, bdrv_flags, drv, &error);
>  
> -- 
> 1.8.1.4
> 



Re: [Qemu-devel] [PATCH V6 3/5] block: Add check infinite loop in bdrv_img_create()

2013-11-08 Thread Fam Zheng
On Tue, 11/05 22:09, Xu Wang wrote:
> Backing file loop should be checked before qemu-img create command
> execution. If loop is found, qemu-img create should be stopped and
> an error printed.
> 
> Signed-off-by: Xu Wang 
> ---
>  block.c | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/block.c b/block.c
> index 3443117..8423e80 100644
> --- a/block.c
> +++ b/block.c
> @@ -4670,15 +4670,15 @@ void bdrv_img_create(const char *filename, const char 
> *fmt,
>  }
>  
>  backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
> +backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
>  if (backing_file && backing_file->value.s) {
> -if (!strcmp(filename, backing_file->value.s)) {
> -error_setg(errp, "Error: Trying to create an image with the "
> - "same filename as the backing file");
> +if (!bdrv_new_chain_okay(filename, backing_file->value.s,
> +backing_fmt->value.s)) {
> +error_report("bdrv_img_create: loop exists, image create 
> failed");

Please use error_setg(errp, "...");

Fam



Re: [Qemu-devel] [PATCH V6 2/5] qemu-img: Add infinite loop checking in bdrv_new_open()

2013-11-08 Thread Fam Zheng
On Tue, 11/05 22:09, Xu Wang wrote:
> Every image should be checked if there is infinite loop in backing
> file chain before open it. So infinite loop check was added into
> bdrv_new_open(). If @filename is opened without the flag
> BDRV_O_NO_BACKING, the infinite loop check should be called.
> 
> Signed-off-by: Xu Wang 

I think this should be squashed into patch 1 so that the checking in
collect_image_info_list is kept along the series.

Fam

> ---
>  qemu-img.c | 8 
>  1 file changed, 8 insertions(+)
> 
> diff --git a/qemu-img.c b/qemu-img.c
> index d5ec45b..3af7996 100644
> --- a/qemu-img.c
> +++ b/qemu-img.c
> @@ -281,6 +281,14 @@ static BlockDriverState *bdrv_new_open(const char 
> *filename,
>  drv = NULL;
>  }
>  
> +/* check backing file loop if the whole chain need to be opened */
> +if (!(flags & BDRV_O_NO_BACKING) &&
> +!bdrv_backing_chain_okay(filename, fmt)) {
> +error_report("bdrv_new_open: Open %s failed. There is loop exists "
> + "in the backing chain", filename);
> +goto fail;
> +}
> +
>  ret = bdrv_open(bs, filename, NULL, flags, drv, &local_err);
>  if (ret < 0) {
>  error_report("Could not open '%s': %s", filename,
> -- 
> 1.8.1.4
> 
> 



[Qemu-devel] [PULL v2 35/37] block: qemu-iotests for vhdx, add write test support

2013-11-08 Thread Stefan Hajnoczi
From: Jeff Cody 

This removes the IMGFMT_GENERIC blocker for read-only, so existing
iotests run read/write tests for vhdx images created by qemu-img (e.g.
tests 001, 002, 003).

In addition, this updates the sample image test for the Hyper-V
created image, to verify we can write it as well.

Signed-off-by: Jeff Cody 
Signed-off-by: Stefan Hajnoczi 
---
 tests/qemu-iotests/064 | 11 +++
 tests/qemu-iotests/064.out | 14 ++
 tests/qemu-iotests/common  |  1 -
 3 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/tests/qemu-iotests/064 b/tests/qemu-iotests/064
index 6789aa6..1c74c31 100755
--- a/tests/qemu-iotests/064
+++ b/tests/qemu-iotests/064
@@ -56,6 +56,17 @@ echo
 echo "=== Verify pattern 0x00, 66M - 1024M ==="
 $QEMU_IO -r -c "read -pP 0x00 66M 958M" "$TEST_IMG" | _filter_qemu_io
 
+echo
+echo "=== Verify pattern write, 0xc3 99M-157M ==="
+$QEMU_IO -c "write -pP 0xc3 99M 58M" "$TEST_IMG" | _filter_qemu_io
+# first verify we didn't write where we should not have
+$QEMU_IO -c "read -pP 0xa5 0 33M" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -pP 0x96 33M 33M" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -pP 0x00 66M 33M" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -pP 0x00 157MM 867MM" "$TEST_IMG" | _filter_qemu_io
+# now verify what we should have actually written
+$QEMU_IO -c "read -pP 0xc3 99M 58M" "$TEST_IMG" | _filter_qemu_io
+
 # success, all done
 echo "*** done"
 rm -f $seq.full
diff --git a/tests/qemu-iotests/064.out b/tests/qemu-iotests/064.out
index b9e8e4a..5346a4e 100644
--- a/tests/qemu-iotests/064.out
+++ b/tests/qemu-iotests/064.out
@@ -11,4 +11,18 @@ read 34603008/34603008 bytes at offset 34603008
 === Verify pattern 0x00, 66M - 1024M ===
 read 1004535808/1004535808 bytes at offset 69206016
 958 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+=== Verify pattern write, 0xc3 99M-157M ===
+wrote 60817408/60817408 bytes at offset 103809024
+58 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 34603008/34603008 bytes at offset 0
+33 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 34603008/34603008 bytes at offset 34603008
+33 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 34603008/34603008 bytes at offset 69206016
+33 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 909115392/909115392 bytes at offset 164626432
+867 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 60817408/60817408 bytes at offset 103809024
+58 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 *** done
diff --git a/tests/qemu-iotests/common b/tests/qemu-iotests/common
index 2932e14..8cde7f1 100644
--- a/tests/qemu-iotests/common
+++ b/tests/qemu-iotests/common
@@ -200,7 +200,6 @@ testlist options
 -vhdx)
 IMGFMT=vhdx
 xpand=false
-IMGFMT_GENERIC=false
 ;;
 
 -rbd)
-- 
1.8.3.1




[Qemu-devel] [PULL v2 28/37] block: vhdx write support

2013-11-08 Thread Stefan Hajnoczi
From: Jeff Cody 

This adds support for writing to VHDX image files, using coroutines.
Writes into the BAT table goes through the VHDX log.  Currently, BAT
table writes occur when expanding a dynamic VHDX file, and allocating a
new BAT entry.

Signed-off-by: Jeff Cody 
Signed-off-by: Stefan Hajnoczi 
---
 block/vhdx.c | 212 +--
 block/vhdx.h |   2 +-
 2 files changed, 209 insertions(+), 5 deletions(-)

diff --git a/block/vhdx.c b/block/vhdx.c
index 574ac4c..050f071 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -914,7 +914,7 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, 
int flags,
 if (payblocks--) {
 /* payload bat entries */
 if ((s->bat[i] & VHDX_BAT_STATE_BIT_MASK) ==
-PAYLOAD_BLOCK_FULL_PRESENT) {
+PAYLOAD_BLOCK_FULLY_PRESENT) {
 ret = vhdx_region_check(s, s->bat[i] & VHDX_BAT_FILE_OFF_MASK,
 s->block_size);
 if (ret < 0) {
@@ -935,7 +935,7 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, 
int flags,
 }
 }
 
-/* TODO: differencing files, write */
+/* TODO: differencing files */
 
 /* Disable migration when VHDX images are used */
 error_set(&s->migration_blocker,
@@ -1040,7 +1040,7 @@ static coroutine_fn int vhdx_co_readv(BlockDriverState 
*bs, int64_t sector_num,
 /* return zero */
 qemu_iovec_memset(&hd_qiov, 0, 0, sinfo.bytes_avail);
 break;
-case PAYLOAD_BLOCK_FULL_PRESENT:
+case PAYLOAD_BLOCK_FULLY_PRESENT:
 qemu_co_mutex_unlock(&s->lock);
 ret = bdrv_co_readv(bs->file,
 sinfo.file_offset >> BDRV_SECTOR_BITS,
@@ -1070,7 +1070,43 @@ exit:
 return ret;
 }
 
+/*
+ * Allocate a new payload block at the end of the file.
+ *
+ * Allocation will happen at 1MB alignment inside the file
+ *
+ * Returns the file offset start of the new payload block
+ */
+static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s,
+uint64_t *new_offset)
+{
+*new_offset = bdrv_getlength(bs->file);
+
+/* per the spec, the address for a block is in units of 1MB */
+*new_offset = ROUND_UP(*new_offset, 1024 * 1024);
+
+return bdrv_truncate(bs->file, *new_offset + s->block_size);
+}
+
+/*
+ * Update the BAT table entry with the new file offset, and the new entry
+ * state */
+static void vhdx_update_bat_table_entry(BlockDriverState *bs, BDRVVHDXState *s,
+   VHDXSectorInfo *sinfo,
+   uint64_t *bat_entry_le,
+   uint64_t *bat_offset, int state)
+{
+/* The BAT entry is a uint64, with 44 bits for the file offset in units of
+ * 1MB, and 3 bits for the block state. */
+s->bat[sinfo->bat_idx]  = ((sinfo->file_offset>>20) <<
+   VHDX_BAT_FILE_OFF_BITS);
 
+s->bat[sinfo->bat_idx] |= state & VHDX_BAT_STATE_BIT_MASK;
+
+*bat_entry_le = cpu_to_le64(s->bat[sinfo->bat_idx]);
+*bat_offset = s->bat_offset + sinfo->bat_idx * sizeof(VHDXBatEntry);
+
+}
 
 /* Per the spec, on the first write of guest-visible data to the file the
  * data write guid must be updated in the header */
@@ -1087,7 +1123,175 @@ int vhdx_user_visible_write(BlockDriverState *bs, 
BDRVVHDXState *s)
 static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t 
sector_num,
   int nb_sectors, QEMUIOVector *qiov)
 {
-return -ENOTSUP;
+int ret = -ENOTSUP;
+BDRVVHDXState *s = bs->opaque;
+VHDXSectorInfo sinfo;
+uint64_t bytes_done = 0;
+uint64_t bat_entry = 0;
+uint64_t bat_entry_offset = 0;
+QEMUIOVector hd_qiov;
+struct iovec iov1 = { 0 };
+struct iovec iov2 = { 0 };
+int sectors_to_write;
+int bat_state;
+uint64_t bat_prior_offset = 0;
+bool bat_update = false;
+
+qemu_iovec_init(&hd_qiov, qiov->niov);
+
+qemu_co_mutex_lock(&s->lock);
+
+ret = vhdx_user_visible_write(bs, s);
+if (ret < 0) {
+goto exit;
+}
+
+while (nb_sectors > 0) {
+bool use_zero_buffers = false;
+bat_update = false;
+if (s->params.data_bits & VHDX_PARAMS_HAS_PARENT) {
+/* not supported yet */
+ret = -ENOTSUP;
+goto exit;
+} else {
+vhdx_block_translate(s, sector_num, nb_sectors, &sinfo);
+sectors_to_write = sinfo.sectors_avail;
+
+qemu_iovec_reset(&hd_qiov);
+/* check the payload block state */
+bat_state = s->bat[sinfo.bat_idx] & VHDX_BAT_STATE_BIT_MASK;
+switch (bat_state) {
+case PAYLOAD_BLOCK_ZERO:
+/* in this case, we need to preserve zero writes for
+ * data that is not part

[Qemu-devel] [PULL v2 23/37] block: vhdx - update log guid in header, and first write tracker

2013-11-08 Thread Stefan Hajnoczi
From: Jeff Cody 

Allow tracking of first file write in the VHDX image, as well as
the ability to update the GUID in the header.  This is in preparation
for log support.

Signed-off-by: Jeff Cody 
Signed-off-by: Stefan Hajnoczi 
---
 block/vhdx.c | 30 --
 block/vhdx.h |  6 ++
 2 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/block/vhdx.c b/block/vhdx.c
index 68663c6..241703a 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -230,7 +230,7 @@ static int vhdx_probe(const uint8_t *buf, int buf_size, 
const char *filename)
  *  - non-current header is updated with largest sequence number
  */
 static int vhdx_update_header(BlockDriverState *bs, BDRVVHDXState *s,
-  bool generate_data_write_guid)
+  bool generate_data_write_guid, MSGUID *log_guid)
 {
 int ret = 0;
 int hdr_idx = 0;
@@ -262,6 +262,11 @@ static int vhdx_update_header(BlockDriverState *bs, 
BDRVVHDXState *s,
 vhdx_guid_generate(&inactive_header->data_write_guid);
 }
 
+/* update the log guid if present */
+if (log_guid) {
+inactive_header->log_guid = *log_guid;
+}
+
 /* the header checksum is not over just the packed size of VHDXHeader,
  * but rather over the entire 'reserved' range for the header, which is
  * 4KB (VHDX_HEADER_SIZE). */
@@ -294,16 +299,16 @@ exit:
  * The VHDX spec calls for header updates to be performed twice, so that both
  * the current and non-current header have valid info
  */
-static int vhdx_update_headers(BlockDriverState *bs, BDRVVHDXState *s,
-   bool generate_data_write_guid)
+int vhdx_update_headers(BlockDriverState *bs, BDRVVHDXState *s,
+bool generate_data_write_guid, MSGUID *log_guid)
 {
 int ret;
 
-ret = vhdx_update_header(bs, s, generate_data_write_guid);
+ret = vhdx_update_header(bs, s, generate_data_write_guid, log_guid);
 if (ret < 0) {
 return ret;
 }
-ret = vhdx_update_header(bs, s, generate_data_write_guid);
+ret = vhdx_update_header(bs, s, generate_data_write_guid, log_guid);
 return ret;
 }
 
@@ -784,6 +789,7 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, 
int flags,
 
 
 s->bat = NULL;
+s->first_visible_write = true;
 
 qemu_co_mutex_init(&s->lock);
 
@@ -864,7 +870,7 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, 
int flags,
 }
 
 if (flags & BDRV_O_RDWR) {
-ret = vhdx_update_headers(bs, s, false);
+ret = vhdx_update_headers(bs, s, false, NULL);
 if (ret < 0) {
 goto fail;
 }
@@ -1010,6 +1016,18 @@ exit:
 
 
 
+/* Per the spec, on the first write of guest-visible data to the file the
+ * data write guid must be updated in the header */
+int vhdx_user_visible_write(BlockDriverState *bs, BDRVVHDXState *s)
+{
+int ret = 0;
+if (s->first_visible_write) {
+s->first_visible_write = false;
+ret = vhdx_update_headers(bs, s, true, NULL);
+}
+return ret;
+}
+
 static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t 
sector_num,
   int nb_sectors, QEMUIOVector *qiov)
 {
diff --git a/block/vhdx.h b/block/vhdx.h
index 23028af..6c35737 100644
--- a/block/vhdx.h
+++ b/block/vhdx.h
@@ -361,6 +361,7 @@ typedef struct BDRVVHDXState {
 VHDXBatEntry *bat;
 uint64_t bat_offset;
 
+bool first_visible_write;
 MSGUID session_guid;
 
 VHDXLogEntries log;
@@ -373,6 +374,9 @@ typedef struct BDRVVHDXState {
 
 void vhdx_guid_generate(MSGUID *guid);
 
+int vhdx_update_headers(BlockDriverState *bs, BDRVVHDXState *s, bool rw,
+MSGUID *log_guid);
+
 uint32_t vhdx_update_checksum(uint8_t *buf, size_t size, int crc_offset);
 uint32_t vhdx_checksum_calc(uint32_t crc, uint8_t *buf, size_t size,
 int crc_offset);
@@ -402,4 +406,6 @@ void vhdx_log_data_le_export(VHDXLogDataSector *d);
 void vhdx_log_entry_hdr_le_import(VHDXLogEntryHeader *hdr);
 void vhdx_log_entry_hdr_le_export(VHDXLogEntryHeader *hdr);
 
+int vhdx_user_visible_write(BlockDriverState *bs, BDRVVHDXState *s);
+
 #endif
-- 
1.8.3.1




[Qemu-devel] [PULL v2 26/37] block: vhdx - add region overlap detection for image files

2013-11-08 Thread Stefan Hajnoczi
From: Jeff Cody 

Regions in the image file cannot overlap - the log, region tables,
and metdata must all be unique and non-overlapping.

This adds region checking by means of a QLIST; there can be a variable
number of regions and metadata (there may be metadata or region tables
that we do not recognize / know about, but are not required).

This adds the capability to register a region for later checking, and
to check against registered regions for any overlap.

Also, if neither the BAT or Metadata region tables are found, return
error.

Signed-off-by: Jeff Cody 
Signed-off-by: Stefan Hajnoczi 
---
 block/vhdx.c | 82 
 block/vhdx.h |  9 +++
 2 files changed, 91 insertions(+)

diff --git a/block/vhdx.c b/block/vhdx.c
index 8fbfbd6..574ac4c 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -204,6 +204,50 @@ void vhdx_guid_generate(MSGUID *guid)
 memcpy(guid, uuid, sizeof(MSGUID));
 }
 
+/* Check for region overlaps inside the VHDX image */
+static int vhdx_region_check(BDRVVHDXState *s, uint64_t start, uint64_t length)
+{
+int ret = 0;
+uint64_t end;
+VHDXRegionEntry *r;
+
+end = start + length;
+QLIST_FOREACH(r, &s->regions, entries) {
+if (!((start >= r->end) || (end <= r->start))) {
+ret = -EINVAL;
+goto exit;
+}
+}
+
+exit:
+return ret;
+}
+
+/* Register a region for future checks */
+static void vhdx_region_register(BDRVVHDXState *s,
+ uint64_t start, uint64_t length)
+{
+VHDXRegionEntry *r;
+
+r = g_malloc0(sizeof(*r));
+
+r->start = start;
+r->end = start + length;
+
+QLIST_INSERT_HEAD(&s->regions, r, entries);
+}
+
+/* Free all registered regions */
+static void vhdx_region_unregister_all(BDRVVHDXState *s)
+{
+VHDXRegionEntry *r, *r_next;
+
+QLIST_FOREACH_SAFE(r, &s->regions, entries, r_next) {
+QLIST_REMOVE(r, entries);
+g_free(r);
+}
+}
+
 /*
  * Per the MS VHDX Specification, for every VHDX file:
  *  - The header section is fixed size - 1 MB
@@ -389,6 +433,9 @@ static int vhdx_parse_header(BlockDriverState *bs, 
BDRVVHDXState *s)
 }
 }
 
+vhdx_region_register(s, s->headers[s->curr_header]->log_offset,
+s->headers[s->curr_header]->log_length);
+
 ret = 0;
 
 goto exit;
@@ -452,6 +499,15 @@ static int vhdx_open_region_tables(BlockDriverState *bs, 
BDRVVHDXState *s)
 le32_to_cpus(&rt_entry.length);
 le32_to_cpus(&rt_entry.data_bits);
 
+/* check for region overlap between these entries, and any
+ * other memory regions in the file */
+ret = vhdx_region_check(s, rt_entry.file_offset, rt_entry.length);
+if (ret < 0) {
+goto fail;
+}
+
+vhdx_region_register(s, rt_entry.file_offset, rt_entry.length);
+
 /* see if we recognize the entry */
 if (guid_eq(rt_entry.guid, bat_guid)) {
 /* must be unique; if we have already found it this is invalid */
@@ -482,6 +538,12 @@ static int vhdx_open_region_tables(BlockDriverState *bs, 
BDRVVHDXState *s)
 goto fail;
 }
 }
+
+if (!bat_rt_found || !metadata_rt_found) {
+ret = -EINVAL;
+goto fail;
+}
+
 ret = 0;
 
 fail:
@@ -751,6 +813,7 @@ static void vhdx_close(BlockDriverState *bs)
 error_free(s->migration_blocker);
 qemu_vfree(s->log.hdr);
 s->log.hdr = NULL;
+vhdx_region_unregister_all(s);
 }
 
 static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
@@ -768,6 +831,7 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, 
int flags,
 s->first_visible_write = true;
 
 qemu_co_mutex_init(&s->lock);
+QLIST_INIT(&s->regions);
 
 /* validate the file signature */
 ret = bdrv_pread(bs->file, 0, &signature, sizeof(uint64_t));
@@ -842,8 +906,26 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, 
int flags,
 goto fail;
 }
 
+uint64_t payblocks = s->chunk_ratio;
+/* endian convert, and verify populated BAT field file offsets against
+ * region table and log entries */
 for (i = 0; i < s->bat_entries; i++) {
 le64_to_cpus(&s->bat[i]);
+if (payblocks--) {
+/* payload bat entries */
+if ((s->bat[i] & VHDX_BAT_STATE_BIT_MASK) ==
+PAYLOAD_BLOCK_FULL_PRESENT) {
+ret = vhdx_region_check(s, s->bat[i] & VHDX_BAT_FILE_OFF_MASK,
+s->block_size);
+if (ret < 0) {
+goto fail;
+}
+}
+} else {
+payblocks = s->chunk_ratio;
+/* Once differencing files are supported, verify sector bitmap
+ * blocks here */
+}
 }
 
 if (flags & BDRV_O_RDWR) {
diff --git a/block/vhdx.h b/block/vhdx.h
index 584ebec..d906559 100644
--- a/block/vhdx.h
+++ b/block/vhdx.

[Qemu-devel] [PULL v2 10/37] block: qemu-iotests, add quotes to $TEST_IMG usage in 061

2013-11-08 Thread Stefan Hajnoczi
From: Jeff Cody 

When creating images with backing files in the test, the backing
file argument was not quoted properly.  This caused the test to fail
when run from a pathname with a space.  Pass the backing argument in
with the -b option to _make_test_img, so it can be properly quoted.

Signed-off-by: Jeff Cody 
Reviewed-by: Eric Blake 
Signed-off-by: Stefan Hajnoczi 
---
 tests/qemu-iotests/061 | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/qemu-iotests/061 b/tests/qemu-iotests/061
index fa9319d..e42f9bd 100755
--- a/tests/qemu-iotests/061
+++ b/tests/qemu-iotests/061
@@ -163,7 +163,7 @@ echo "=== Testing zero expansion on backed image ==="
 echo
 IMGOPTS="compat=1.1" TEST_IMG="$TEST_IMG.base" _make_test_img 64M
 $QEMU_IO -c "write -P 0x2a 0 128k" "$TEST_IMG.base" | _filter_qemu_io
-IMGOPTS="compat=1.1,backing_file=$TEST_IMG.base" _make_test_img 64M
+IMGOPTS="compat=1.1" _make_test_img -b "$TEST_IMG.base" 64M
 $QEMU_IO -c "read -P 0x2a 0 128k" -c "write -z 0 64k" "$TEST_IMG" | 
_filter_qemu_io
 $QEMU_IMG amend -o "compat=0.10" "$TEST_IMG"
 _check_test_img
@@ -174,7 +174,7 @@ echo "=== Testing zero expansion on backed inactive 
clusters ==="
 echo
 IMGOPTS="compat=1.1" TEST_IMG="$TEST_IMG.base" _make_test_img 64M
 $QEMU_IO -c "write -P 0x2a 0 128k" "$TEST_IMG.base" | _filter_qemu_io
-IMGOPTS="compat=1.1,backing_file=$TEST_IMG.base" _make_test_img 64M
+IMGOPTS="compat=1.1" _make_test_img -b "$TEST_IMG.base" 64M
 $QEMU_IO -c "write -z 0 64k" "$TEST_IMG" | _filter_qemu_io
 $QEMU_IMG snapshot -c foo "$TEST_IMG"
 $QEMU_IO -c "write -P 0x42 0 128k" "$TEST_IMG" | _filter_qemu_io
@@ -190,7 +190,7 @@ echo "=== Testing zero expansion on backed image with 
shared L2 table ==="
 echo
 IMGOPTS="compat=1.1" TEST_IMG="$TEST_IMG.base" _make_test_img 64M
 $QEMU_IO -c "write -P 0x2a 0 128k" "$TEST_IMG.base" | _filter_qemu_io
-IMGOPTS="compat=1.1,backing_file=$TEST_IMG.base" _make_test_img 64M
+IMGOPTS="compat=1.1" _make_test_img -b "$TEST_IMG.base" 64M
 $QEMU_IO -c "write -z 0 128k" "$TEST_IMG" | _filter_qemu_io
 $QEMU_IMG snapshot -c foo "$TEST_IMG"
 $QEMU_IMG amend -o "compat=0.10" "$TEST_IMG"
-- 
1.8.3.1




Re: [Qemu-devel] [RFC PATCH] i386: Add _PXM method to ACPI CPU objects

2013-11-08 Thread Vasilis Liaskovitis
Hi,

On Thu, Nov 07, 2013 at 03:03:42PM +0200, Michael S. Tsirkin wrote:
> On Thu, Nov 07, 2013 at 01:41:59PM +0100, Vasilis Liaskovitis wrote:
> > This patch adds a _PXM method to ACPI CPU objects for the pc machine. The 
> > _PXM
> > value is derived from the passed in guest info, same way as CPU SRAT 
> > entries.
> > 
> > The motivation for this patch is a CPU hot-unplug/hot-plug bug observed when
> > using a 3.11 linux guest kernel on a multi-NUMA node qemu/kvm VM. The linux
> > guest kernel parses the SRAT CPU entries at boot time and stores them in the
> > array __apicid_to_node. When a CPU is hot-removed, the linux guest kernel
> > resets the removed CPU's __apicid_to_node entry to NO_NUMA_NODE (kernel 
> > commit
> > c4c60524). When the removed cpu is hot-added again, the linux kernel looks 
> > up
> > the hot-added cpu object's _PXM method instead of somehow re-discovering the
> > SRAT entry info. With current qemu/seabios, the _PXM method is not found, 
> > and
> > the CPU is thus hot-plugged in the default NUMA node 0. (The problem does 
> > not
> > show up on initial hotplug of a cpu; the PXM method is still not found in 
> > this
> > case, but the kernel still has the correct proximity value from the CPU's 
> > SRAT
> > entry stored in __apicid_to_node)
> > 
> > ACPI spec mentions that the _PXM method is the correct way to determine
> > proximity information at hot-add time.
> 
> Where does it say this?
> I found this:
> If the Local APIC ID / Local SAPIC ID / Local x2APIC ID of a dynamically
> added processor is not present in the System Resource Affinity Table
> (SRAT), a _PXM object must exist for the processor’s device or one of
> its ancestors in the ACPI Namespace.
> 
> Does this mean that linux is buggy, and should be fixed up to look up
> the apic ID in SRAT?

The quote above suggests that if SRAT is absent, _PXM should be present.
Seabios/qemu provide SRAT entries, and  no _PXM. The fact that the kernel
resets the parse SRAT info on hot-remove time looks like a kernel problem.

But As Toshi Kani mentioned in the original thread, here is a quote from ACPI
5.0, stating _PXM and only _PXM should be used at hot-plug time:

===
17.2.1 System Resource Affinity Table Definition

This optional System Resource Affinity Table (SRAT) provides the boot
time description of the processor and memory ranges belonging to a
system locality. OSPM will consume the SRAT only at boot time. OSPM
should use _PXM for any devices that are hot-added into the system after
boot up.


So in this sense, the kernel is correct (kernel only uses _PXM at hot-plug time)
, and qemu/Seabios should have _PXM methods for hot operations.

> 
> > So far, qemu/seabios do not provide this
> > method for CPUs. So regardless of kernel behaviour, it is a good idea to add
> > this _PXM method. Since ACPI table generation has recently been moved from
> > seabios to qemu, we do this in qemu.
> > 
> > Note that the above hot-remove/hot-add scenario has been tested on an older
> > qemu + non-upstreamed patches for cpu hot-removal support, and not on qemu
> > master (since cpu-del support is still not on master). The only testing done
> > with qemu/seabios master and this patch, are successful boots of multi-node
> > linux and windows8 guests.
> > 
> > For the initial discussion on seabios and linux-acpi lists see
> > http://www.spinics.net/lists/linux-acpi/msg47058.html
> > 
> > Signed-off-by: Vasilis Liaskovitis 
> > Reviewed-by: Thilo Fromm 
> 
> Even if this is a linux bug, I have no issue with working around
> it in qemu.
> 
> But I think proper testing needs to be done with rebased upport for cpu-del.

Ok, I can try to rebase cpu-del support for testing. If there are cpu-del bits
already somewhere (Igor?) and not merged yet, please point me to them.

> 
> > ---
> >  hw/i386/acpi-build.c  |2 ++
> >  hw/i386/ssdt-proc.dsl |2 ++
> >  2 files changed, 4 insertions(+)
> > 
> > diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> > index 6cfa044..9373f5e 100644
> > --- a/hw/i386/acpi-build.c
> > +++ b/hw/i386/acpi-build.c
> > @@ -603,6 +603,7 @@ static inline char acpi_get_hex(uint32_t val)
> >  #define ACPI_PROC_OFFSET_CPUHEX (*ssdt_proc_name - *ssdt_proc_start + 2)
> >  #define ACPI_PROC_OFFSET_CPUID1 (*ssdt_proc_name - *ssdt_proc_start + 4)
> >  #define ACPI_PROC_OFFSET_CPUID2 (*ssdt_proc_id - *ssdt_proc_start)
> > +#define ACPI_PROC_OFFSET_CPUPXM (*ssdt_proc_pxm - *ssdt_proc_start)
> >  #define ACPI_PROC_SIZEOF (*ssdt_proc_end - *ssdt_proc_start)
> >  #define ACPI_PROC_AML (ssdp_proc_aml + *ssdt_proc_start)
> >  
> > @@ -724,6 +725,7 @@ build_ssdt(GArray *table_data, GArray *linker,
> >  proc[ACPI_PROC_OFFSET_CPUHEX+1] = acpi_get_hex(i);
> >  proc[ACPI_PROC_OFFSET_CPUID1] = i;
> >  proc[ACPI_PROC_OFFSET_CPUID2] = i;
> > +proc[ACPI_PROC_OFFSET_CPUPXM] = guest_info->node_cpu[i];
> >  }
> >  
> >  /* build this code:
> > diff --git a/hw/i386/ssdt-proc.ds

[Qemu-devel] [PULL v2 34/37] block: vhdx - update _make_test_img() to filter out vhdx options

2013-11-08 Thread Stefan Hajnoczi
From: Jeff Cody 

The non-global option output is suppresed in _make_test_img() for
output verification in the 0?? tests.  This adds suppression for
the vhdx-unique options as well.  This allows check -vhdx to run
successfully.

Signed-off-by: Jeff Cody 
Signed-off-by: Stefan Hajnoczi 
---
 tests/qemu-iotests/common.rc | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
index d24de2c..7f62457 100644
--- a/tests/qemu-iotests/common.rc
+++ b/tests/qemu-iotests/common.rc
@@ -157,7 +157,10 @@ _make_test_img()
 -e "s# zeroed_grain=\\(on\\|off\\)##g" \
 -e "s# subformat='[^']*'##g" \
 -e "s# adapter_type='[^']*'##g" \
--e "s# lazy_refcounts=\\(on\\|off\\)##g"
+-e "s# lazy_refcounts=\\(on\\|off\\)##g" \
+-e "s# block_size=[0-9]\\+##g" \
+-e "s# block_state_zero=\\(on\\|off\\)##g" \
+-e "s# log_size=[0-9]\\+##g"
 
 # Start an NBD server on the image file, which is what we'll be talking to
 if [ $IMGPROTO = "nbd" ]; then
-- 
1.8.3.1




[Qemu-devel] [PULL v2 22/37] block: vhdx - break endian translation functions out

2013-11-08 Thread Stefan Hajnoczi
From: Jeff Cody 

This moves the endian translation functions out from the vhdx.c source,
into a separate source file. In addition to the previously defined
endian functions, new endian translation functions for log support are
added as well.

Signed-off-by: Jeff Cody 
Signed-off-by: Stefan Hajnoczi 
---
 block/Makefile.objs |   2 +-
 block/vhdx-endian.c | 141 
 block/vhdx.c|  43 
 block/vhdx.h|   8 +++
 4 files changed, 150 insertions(+), 44 deletions(-)
 create mode 100644 block/vhdx-endian.c

diff --git a/block/Makefile.objs b/block/Makefile.objs
index e7214de..2fc496a 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -2,7 +2,7 @@ block-obj-y += raw_bsd.o cow.o qcow.o vdi.o vmdk.o cloop.o 
dmg.o bochs.o vpc.o v
 block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o 
qcow2-cache.o
 block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-obj-y += qed-check.o
-block-obj-$(CONFIG_VHDX) += vhdx.o
+block-obj-$(CONFIG_VHDX) += vhdx.o vhdx-endian.o
 block-obj-y += parallels.o blkdebug.o blkverify.o
 block-obj-y += snapshot.o qapi.o
 block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
diff --git a/block/vhdx-endian.c b/block/vhdx-endian.c
new file mode 100644
index 000..3e93e63
--- /dev/null
+++ b/block/vhdx-endian.c
@@ -0,0 +1,141 @@
+/*
+ * Block driver for Hyper-V VHDX Images
+ *
+ * Copyright (c) 2013 Red Hat, Inc.,
+ *
+ * Authors:
+ *  Jeff Cody 
+ *
+ *  This is based on the "VHDX Format Specification v1.00", published 8/25/2012
+ *  by Microsoft:
+ *  https://www.microsoft.com/en-us/download/details.aspx?id=34750
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qemu-common.h"
+#include "block/block_int.h"
+#include "block/vhdx.h"
+
+#include 
+
+
+/*
+ * All the VHDX formats on disk are little endian - the following
+ * are helper import/export functions to correctly convert
+ * endianness from disk read to native cpu format, and back again.
+ */
+
+
+/* VHDX File Header */
+
+
+void vhdx_header_le_import(VHDXHeader *h)
+{
+assert(h != NULL);
+
+le32_to_cpus(&h->signature);
+le32_to_cpus(&h->checksum);
+le64_to_cpus(&h->sequence_number);
+
+leguid_to_cpus(&h->file_write_guid);
+leguid_to_cpus(&h->data_write_guid);
+leguid_to_cpus(&h->log_guid);
+
+le16_to_cpus(&h->log_version);
+le16_to_cpus(&h->version);
+le32_to_cpus(&h->log_length);
+le64_to_cpus(&h->log_offset);
+}
+
+void vhdx_header_le_export(VHDXHeader *orig_h, VHDXHeader *new_h)
+{
+assert(orig_h != NULL);
+assert(new_h != NULL);
+
+new_h->signature   = cpu_to_le32(orig_h->signature);
+new_h->checksum= cpu_to_le32(orig_h->checksum);
+new_h->sequence_number = cpu_to_le64(orig_h->sequence_number);
+
+new_h->file_write_guid = orig_h->file_write_guid;
+new_h->data_write_guid = orig_h->data_write_guid;
+new_h->log_guid= orig_h->log_guid;
+
+cpu_to_leguids(&new_h->file_write_guid);
+cpu_to_leguids(&new_h->data_write_guid);
+cpu_to_leguids(&new_h->log_guid);
+
+new_h->log_version = cpu_to_le16(orig_h->log_version);
+new_h->version = cpu_to_le16(orig_h->version);
+new_h->log_length  = cpu_to_le32(orig_h->log_length);
+new_h->log_offset  = cpu_to_le64(orig_h->log_offset);
+}
+
+
+/* VHDX Log Headers */
+
+
+void vhdx_log_desc_le_import(VHDXLogDescriptor *d)
+{
+assert(d != NULL);
+
+le32_to_cpus(&d->signature);
+le32_to_cpus(&d->trailing_bytes);
+le64_to_cpus(&d->leading_bytes);
+le64_to_cpus(&d->file_offset);
+le64_to_cpus(&d->sequence_number);
+}
+
+void vhdx_log_desc_le_export(VHDXLogDescriptor *d)
+{
+assert(d != NULL);
+
+cpu_to_le32s(&d->signature);
+cpu_to_le32s(&d->trailing_bytes);
+cpu_to_le64s(&d->leading_bytes);
+cpu_to_le64s(&d->file_offset);
+cpu_to_le64s(&d->sequence_number);
+}
+
+void vhdx_log_data_le_export(VHDXLogDataSector *d)
+{
+assert(d != NULL);
+
+cpu_to_le32s(&d->data_signature);
+cpu_to_le32s(&d->sequence_high);
+cpu_to_le32s(&d->sequence_low);
+}
+
+void vhdx_log_entry_hdr_le_import(VHDXLogEntryHeader *hdr)
+{
+assert(hdr != NULL);
+
+le32_to_cpus(&hdr->signature);
+le32_to_cpus(&hdr->checksum);
+le32_to_cpus(&hdr->entry_length);
+le32_to_cpus(&hdr->tail);
+le64_to_cpus(&hdr->sequence_number);
+le32_to_cpus(&hdr->descriptor_count);
+leguid_to_cpus(&hdr->log_guid);
+le64_to_cpus(&hdr->flushed_file_offset);
+le64_to_cpus(&hdr->last_file_offset);
+}
+
+void vhdx_log_entry_hdr_le_export(VHDXLogEntryHeader *hdr)
+{
+assert(hdr != NULL);
+
+cpu_to_le32s(&hdr->signature);
+cpu_to_le32s(&hdr->checksum);
+cpu_to_le32s(&hdr->entry_length);
+cpu_to_le32s(&hdr->tail);
+cpu_to_le64s(&hdr->sequence_number);
+cpu_to

[Qemu-devel] [PULL v2 33/37] block: vhdx - add .bdrv_create() support

2013-11-08 Thread Stefan Hajnoczi
From: Jeff Cody 

This adds support for VHDX image creation, for images of type "Fixed"
and "Dynamic".  "Differencing" types (i.e., VHDX images with backing
files) are currently not supported.

Options for image creation include:
* log size:
The size of the journaling log for VHDX.  Minimum is 1MB,
and it must be a multiple of 1MB. Invalid log sizes will be
silently fixed by rounding up to the nearest MB.

Default is 1MB.

* block size:
This is the size of a payload block.  The range is 1MB to 256MB,
inclusive, and must be a multiple of 1MB as well.  Invalid sizes
and multiples will be silently fixed.  If '0' is passed, then
a sane size is chosen (depending on virtual image size).

Default is 0 (Auto-select).

* subformat:
- "dynamic"
An image without data pre-allocated.
- "fixed"
An image with data pre-allocated.

Default is "dynamic"

When creating the image file, the lettered sections are created:

-.
|   (A)|   (B)|(C)| (D)   | (E)
|  File ID |  Header1 |  Header 2 |  Region Tbl 1 |  Region Tbl 2
|  |  |   |   |
.-.
0 64KB  128KB   192KB   256KB  320KB

. ~ --- ~  ~  ~ ---.
| (F) | (G)   |(H)|
| Journal Log |  BAT / Bitmap |  Metadata |   data ..
| |   |   |
. ~ --- ~  ~  ~ ---.
1MB (var.)  (var.)  (var.)

Signed-off-by: Jeff Cody 
Signed-off-by: Stefan Hajnoczi 
---
 block/vhdx.c | 558 +++
 block/vhdx.h |  15 +-
 2 files changed, 572 insertions(+), 1 deletion(-)

diff --git a/block/vhdx.c b/block/vhdx.c
index 7bf7cd6..7d1af96 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -23,6 +23,19 @@
 #include "migration/migration.h"
 
 #include 
+#include 
+
+/* Options for VHDX creation */
+
+#define VHDX_BLOCK_OPT_LOG_SIZE   "log_size"
+#define VHDX_BLOCK_OPT_BLOCK_SIZE "block_size"
+#define VHDX_BLOCK_OPT_ZERO "block_state_zero"
+
+typedef enum VHDXImageType {
+VHDX_TYPE_DYNAMIC = 0,
+VHDX_TYPE_FIXED,
+VHDX_TYPE_DIFFERENCING,   /* Currently unsupported */
+} VHDXImageType;
 
 /* Several metadata and region table data entries are identified by
  * guids in  a MS-specific GUID format. */
@@ -1320,6 +1333,548 @@ exit:
 }
 
 
+
+/*
+ * Create VHDX Headers
+ *
+ * There are 2 headers, and the highest sequence number will represent
+ * the active header
+ */
+static int vhdx_create_new_headers(BlockDriverState *bs, uint64_t image_size,
+   uint32_t log_size)
+{
+int ret = 0;
+VHDXHeader *hdr = NULL;
+
+hdr = g_malloc0(sizeof(VHDXHeader));
+
+hdr->signature   = VHDX_HEADER_SIGNATURE;
+hdr->sequence_number = g_random_int();
+hdr->log_version = 0;
+hdr->version = 1;
+hdr->log_length  = log_size;
+hdr->log_offset  = VHDX_HEADER_SECTION_END;
+vhdx_guid_generate(&hdr->file_write_guid);
+vhdx_guid_generate(&hdr->data_write_guid);
+
+ret = vhdx_write_header(bs, hdr, VHDX_HEADER1_OFFSET, false);
+if (ret < 0) {
+goto exit;
+}
+hdr->sequence_number++;
+ret = vhdx_write_header(bs, hdr, VHDX_HEADER2_OFFSET, false);
+if (ret < 0) {
+goto exit;
+}
+
+exit:
+g_free(hdr);
+return ret;
+}
+
+
+/*
+ * Create the Metadata entries.
+ *
+ * For more details on the entries, see section 3.5 (pg 29) in the
+ * VHDX 1.00 specification.
+ *
+ * We support 5 metadata entries (all required by spec):
+ *  File Parameters,
+ *  Virtual Disk Size,
+ *  Page 83 Data,
+ *  Logical Sector Size,
+ *  Physical Sector Size
+ *
+ * The first 64KB of the Metadata section is reserved for the metadata
+ * header and entries; beyond that, the metadata items themselves reside.
+ */
+static int vhdx_create_new_metadata(BlockDriverState *bs,
+uint64_t image_size,
+uint32_t block_size,
+uint32_t sector_size,
+uint64_t metadata_offset,
+VHDXImageType type)
+{
+int ret = 0;
+uint32_t offset = 0;
+void *buffer = NULL;
+void *entry_buffer;
+VHDXMetadataTableHeader *md_table;;
+VHDXMetadataTableEntry  *md_table_entry;
+
+/* Metadata entries */
+VHDXFileParameters *mt_file_params;
+VHDXVirtualDiskSize*mt_virtual_size;
+VHDXPage83Data *mt_page83;
+VHDXVirtualDiskLogicalSectorSize  *mt_log_sector_size;
+VHDXVirtualDiskPhysicalSectorSize *mt_phys_se

Re: [Qemu-devel] [PATCH V6 1/5] block/qemu-img: Refine and export infinite loop checking in collect_image_info_list()

2013-11-08 Thread Fam Zheng
On Tue, 11/05 22:09, Xu Wang wrote:
> If there is a loop in the backing file chain, it could cause problems
> such as no response or a segfault during system boot. Hence detecting a
> backing file loop is necessary. This patch extracts the loop check from
> collect_image_info_list() in block.c into independent functions
> bdrv_backing_chain_okay() and bdrv_image_create_okay().
> 
> Signed-off-by: Xu Wang 
> ---
>  block.c   | 117 
> ++
>  include/block/block.h |   4 ++
>  qemu-img.c|  44 ---
>  3 files changed, 139 insertions(+), 26 deletions(-)
> 
> diff --git a/block.c b/block.c
> index 58efb5b..3443117 100644
> --- a/block.c
> +++ b/block.c
> @@ -4490,6 +4490,123 @@ bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie 
> *cookie)
>  bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
>  }
>  
> +static gboolean str_equal_func(gconstpointer a, gconstpointer b)
> +{
> +return strcmp(a, b) == 0;
> +}

Just use g_str_equal here.

> +
> +static bool file_chain_loop_check(GHashTable *filenames, const char 
> *filename,
> +  const char *fmt) {

Open brace '{' should be in a new line for functions.

Still confusing function name and return type. Suggest file_chain_has_loop().

> +BlockDriverState *bs;
> +BlockDriver *drv;
> +char fbuf[1024];

Could use PATH_MAX.

> +int ret;
> +Error *local_err = NULL;
> +
> +while (filename && (filename[0] != '\0')) {
> +if (g_hash_table_lookup_extended(filenames, filename, NULL, NULL)) {
> +error_report("Backing file '%s' creates an infinite loop.",
> + filename);
> +return true;
> +}
> +g_hash_table_insert(filenames, (gpointer)filename, NULL);
> +
> +bs = bdrv_new("image");
> +
> +if (fmt) {
> +drv = bdrv_find_format(fmt);
> +if (!drv) {
> +error_report("Unknown file format '%s'", fmt);
> +bdrv_delete(bs);
> +return true;
> +}
> +} else {
> +drv = NULL;
> +}

No need to call bdrv_find_format for multiple times. Also it doesn't look good
to write format checking here, just let the caller pass in drv.

> +
> +ret = bdrv_open(bs, filename, NULL,
> +BDRV_O_CACHE_WB | BDRV_O_NO_BACKING, drv, 
> &local_err);
> +if (ret < 0) {
> +error_report("Could not open '%s': %s", filename,
> + error_get_pretty(local_err));
> +error_free(local_err);
> +local_err = NULL;
> +return true;
> +}
> +
> +bdrv_get_backing_filename(bs, fbuf, sizeof(fbuf));
> +filename = fbuf;
> +fmt = NULL;
> +
> +bdrv_unref(bs);
> +}
> +
> +return false;
> +}
> +
> +/**
> + * Check backing file chain if there is a loop in it.
> + *
> + * @filename: topmost image filename of backing file chain.
> + * @fmt: topmost image format of backing file chain(may be NULL to 
> autodetect).
> + *
> + * Returns: true for backing file loop or error happened, false for no loop.

Really?

> + */
> +bool bdrv_backing_chain_okay(const char *filename, const char *fmt) {
> +GHashTable *filenames;
> +
> +if (filename == NULL || filename[0] == '\0') {
> +return true;

Please don't mix "goto err" and multiple "return true". Could be

   goto exit;

...

> +}
> +
> +filenames = g_hash_table_new_full(g_str_hash, str_equal_func, NULL, 
> NULL);
> +
> +if (file_chain_loop_check(filenames, filename, fmt)) {
> +goto err;
> +}
> +
> +g_hash_table_destroy(filenames);
  exit:

> +return true;
> +
> +err:
> +g_hash_table_destroy(filenames);
> +return false;
> +}
> +
> +/**
> + * Check if there is loop exists in the backing file chain and if there will
> + * be loop occur after backing file chain updated or new image created.
> + *
> + * @filename: the image filename to be created.
> + * @backing_file: topmost image filename of backing file chain.
> + * @backing_fmt: topmost image format (may be NULL to autodetect).
> + *
> + * Returns: true for backing file loop or error happened, false for no loop.

I don't think so.

> + */
> +bool bdrv_new_chain_okay(const char *filename, const char *backing_file,
> +  const char *backing_fmt) {

Please align arguments: ^

This function could be merged to bdrv_backing_chain_ok by adding an optional
argument const char *new_filename. If it's not NULL you add it to the hash
table, like:

bool bdrv_backing_chain_okay(const char *filename, const char *fmt,
 const char *new_filename)
{
GHashTable *filenames;

if (filename == NULL || filename[0] == '\0') {
goto exit;
}

filenames = g_hash_table_new_full(g_str_hash, str_equal_f

[Qemu-devel] [PULL v2 15/37] qdev-monitor-test: add device_add leak test cases

2013-11-08 Thread Stefan Hajnoczi
Ensure that the device_add error code path deletes device objects.
Failure to do so not only leaks the objects but can also keep other
objects (like drive or netdev) alive due to qdev properties holding
references.

Signed-off-by: Stefan Hajnoczi 
Reviewed-by: Eric Blake 
---
 tests/Makefile|  2 ++
 tests/qdev-monitor-test.c | 81 +++
 2 files changed, 83 insertions(+)
 create mode 100644 tests/qdev-monitor-test.c

diff --git a/tests/Makefile b/tests/Makefile
index 973f497..379cdd9 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -69,6 +69,7 @@ check-qtest-i386-y += tests/i440fx-test$(EXESUF)
 check-qtest-i386-y += tests/fw_cfg-test$(EXESUF)
 check-qtest-i386-y += tests/qom-test$(EXESUF)
 check-qtest-i386-y += tests/blockdev-test$(EXESUF)
+check-qtest-i386-y += tests/qdev-monitor-test$(EXESUF)
 check-qtest-x86_64-y = $(check-qtest-i386-y)
 gcov-files-i386-y += i386-softmmu/hw/mc146818rtc.c
 gcov-files-x86_64-y = $(subst 
i386-softmmu/,x86_64-softmmu/,$(gcov-files-i386-y))
@@ -202,6 +203,7 @@ tests/i440fx-test$(EXESUF): tests/i440fx-test.o 
$(libqos-pc-obj-y)
 tests/fw_cfg-test$(EXESUF): tests/fw_cfg-test.o $(libqos-pc-obj-y)
 tests/qom-test$(EXESUF): tests/qom-test.o
 tests/blockdev-test$(EXESUF): tests/blockdev-test.o $(libqos-pc-obj-y)
+tests/qdev-monitor-test$(EXESUF): tests/qdev-monitor-test.o $(libqos-pc-obj-y)
 tests/qemu-iotests/socket_scm_helper$(EXESUF): 
tests/qemu-iotests/socket_scm_helper.o
 
 # QTest rules
diff --git a/tests/qdev-monitor-test.c b/tests/qdev-monitor-test.c
new file mode 100644
index 000..33a8ea4
--- /dev/null
+++ b/tests/qdev-monitor-test.c
@@ -0,0 +1,81 @@
+/*
+ * qdev-monitor.c test cases
+ *
+ * Copyright (C) 2013 Red Hat Inc.
+ *
+ * Authors:
+ *  Stefan Hajnoczi 
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#include 
+#include 
+#include "libqtest.h"
+#include "qapi/qmp/qjson.h"
+
+static void test_device_add(void)
+{
+QDict *response;
+QDict *error;
+
+qtest_start("-drive if=none,id=drive0");
+
+/* Make device_add fail.  If this leaks the virtio-blk-pci device then a
+ * reference to drive0 will also be held (via qdev properties).
+ */
+response = qmp("{\"execute\": \"device_add\","
+   " \"arguments\": {"
+   "   \"driver\": \"virtio-blk-pci\","
+   "   \"drive\": \"drive0\""
+   "}}");
+g_assert(response);
+error = qdict_get_qdict(response, "error");
+g_assert(!strcmp(qdict_get_try_str(error, "class") ?: "",
+ "GenericError"));
+g_assert(!strcmp(qdict_get_try_str(error, "desc") ?: "",
+ "Device initialization failed."));
+QDECREF(response);
+
+/* Delete the drive */
+response = qmp("{\"execute\": \"human-monitor-command\","
+   " \"arguments\": {"
+   "   \"command-line\": \"drive_del drive0\""
+   "}}");
+g_assert(response);
+g_assert(!strcmp(qdict_get_try_str(response, "return") ?: "(null)", ""));
+QDECREF(response);
+
+/* Try to re-add the drive.  This fails with duplicate IDs if a leaked
+ * virtio-blk-pci exists that holds a reference to the old drive0.
+ */
+response = qmp("{\"execute\": \"human-monitor-command\","
+   " \"arguments\": {"
+   "   \"command-line\": \"drive_add pci-addr=auto 
if=none,id=drive0\""
+   "}}");
+g_assert(response);
+g_assert(!strcmp(qdict_get_try_str(response, "return") ?: "",
+ "OK\r\n"));
+QDECREF(response);
+
+qtest_end();
+}
+
+int main(int argc, char **argv)
+{
+const char *arch = qtest_get_arch();
+
+/* Check architecture */
+if (strcmp(arch, "i386") && strcmp(arch, "x86_64")) {
+g_test_message("Skipping test for non-x86\n");
+return 0;
+}
+
+/* Run the tests */
+g_test_init(&argc, &argv, NULL);
+
+qtest_add_func("/qmp/device_add", test_device_add);
+
+return g_test_run();
+}
-- 
1.8.3.1




[Qemu-devel] [PULL v2 36/37] block: vhdx qemu-iotest - log replay of data sector

2013-11-08 Thread Stefan Hajnoczi
From: Jeff Cody 

This tests the replay of a data sector in a VHDX image file.

The image file is a 10G dynamic image, with 4MB block size.  The
image was created with qemu-img, and the log left unplayed by
modification of the vhdx image format driver.

It was verified under both QEMU and Hyper-V that the image file,
post log replay, matched.

Signed-off-by: Jeff Cody 
Signed-off-by: Stefan Hajnoczi 
---
 tests/qemu-iotests/070 |  67 +
 tests/qemu-iotests/070.out |   8 +++
 tests/qemu-iotests/group   |   1 +
 .../sample_images/iotest-dirtylog-10G-4M.vhdx.bz2  | Bin 0 -> 4490 bytes
 4 files changed, 76 insertions(+)
 create mode 100755 tests/qemu-iotests/070
 create mode 100644 tests/qemu-iotests/070.out
 create mode 100644 
tests/qemu-iotests/sample_images/iotest-dirtylog-10G-4M.vhdx.bz2

diff --git a/tests/qemu-iotests/070 b/tests/qemu-iotests/070
new file mode 100755
index 000..41bf100
--- /dev/null
+++ b/tests/qemu-iotests/070
@@ -0,0 +1,67 @@
+#!/bin/bash
+#
+# Test VHDX log replay from an image with a journal that needs to be
+# replayed
+#
+# Copyright (C) 2013 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see .
+#
+
+# creator
+owner=jc...@redhat.com
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1   # failure is the default!
+
+_cleanup()
+{
+_cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+
+_supported_fmt vhdx
+_supported_proto generic
+_supported_os Linux
+
+# With the log replayed, the pattern 0xa5 extends to 0xc025000
+# If the log was not replayed, it would only extend to 0xc00
+#
+# This image is a 10G dynamic image, with 4M block size, and 1 unplayed
+# data sector in the log
+#
+# This image was created with qemu-img, however it was verified using
+# Hyper-V to properly replay the logs and give the same post-replay
+# image as qemu.
+_use_sample_img iotest-dirtylog-10G-4M.vhdx.bz2
+
+echo
+echo "=== Verify open image read-only fails, due to dirty log ==="
+$QEMU_IO -r -c "read -pP 0xa5 0 18M" "$TEST_IMG" 2>&1 | grep -o "Permission 
denied"
+
+echo "=== Verify open image replays log  ==="
+$QEMU_IO  -c "read -pP 0xa5 0 18M" "$TEST_IMG" | _filter_qemu_io
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/070.out b/tests/qemu-iotests/070.out
new file mode 100644
index 000..9db8ff2
--- /dev/null
+++ b/tests/qemu-iotests/070.out
@@ -0,0 +1,8 @@
+QA output created by 070
+
+=== Verify open image read-only fails, due to dirty log ===
+Permission denied
+=== Verify open image replays log  ===
+read 18874368/18874368 bytes at offset 0
+18 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+*** done
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index c57ff35..b18b241 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -75,3 +75,4 @@
 067 rw auto
 068 rw auto
 069 rw auto
+070 rw auto
diff --git a/tests/qemu-iotests/sample_images/iotest-dirtylog-10G-4M.vhdx.bz2 
b/tests/qemu-iotests/sample_images/iotest-dirtylog-10G-4M.vhdx.bz2
new file mode 100644
index 
..4b91cfc654b9b8b46ff45918b6f9a2f88d47ecf0
GIT binary patch
literal 4490
zcmV;55q0iDT4*^jL0KkKS$k&2jsZ0{fB*mg|NsC0|NsC0|NsC0|NsC0|NsC0|NsC0
z|NsC0|Nr0&KYaiIlYO_ZcRTI9zV`F0x$m=gzWd(y-zN9hz4y-D_uhSdA3nFvh^Y#D
z3N-W+)gBsC)bz}ZrZlILew2Gu*e9xbn-M=#1j3#IpQ)+hVw(d~8mH(cr=uY@Nt$S7
zGgH${gz#w-)Y_X->Uoh(srnT2MvW5!o=m5hQ`E_($r-8*J*s&yO-Je&r;JJH1ZsJeHq%qlPgC_agrA@q
z6ExL76+K45Pt`LfQ`6LHeyQqysguD*L)sMeF+D;krkb9oqt!jCHku&F^%{92Kxoj@
z)b%`}=s}G~l-gwUp{J%rpb|{#w7HflMNXmv@(xLr>T?F+91*DdQ8${XwYpy(8PL1
zhpFm%CWe|C14f2NsA3vuX_4s-8fXknGVA-VQ))b>#;C+&N$5sHH8B{}!5B0&42=`hO&SRGJx8gd)ICh~1j2f1Gej6P
zCTZ%K8$@Wqrje#20i?i~(9s8}kCadV%PbH$Ws8vcdpP+(cp!SJv0
zBp9v^cVke1zziW%O?^-eCRNnX*IrRxO$vjsNH7Z7HcG2usbH1K3s|a)(!FR<0W=5&
zi)b!R~~XYSpWIVMIYKrG^(U5Cjm_MLG
z=zt-000D52L_t-ta;O%fBYZ)!P`C<&0IKPQmY^>rtdx=xk#6kRwPNnQh>{~CUJ8f`
zA#4SlDVr+_1$xTR9o`6;hz1`PX{te8lEnce7Vwl1nbL*TMG_<+!Xp47@u;+Ov)sASi|W0?-$rWI)iaL~?173}6^7{k65VZOFU0uU@@+
z<$M2~p7@Jzjj?fR-wAPbZEDfk#WIk@MFzw)oh=>5OGQY)ts6!#Ac&GaI|>KioPr%F
z)+B^cli}we(9Xv#P?-VHG5|Ulpg0o(B62#3F!*tX2W-Tg)oCKKp#V#Nxyc54qztBU
zvcR7EuH>rA^BVvES;J%hA%UONcbup0!Myh{{Wk@6w{X+

[Qemu-devel] [PULL v2 07/37] block: qemu-iotests, add quotes to $TEST_IMG usage in 019

2013-11-08 Thread Stefan Hajnoczi
From: Jeff Cody 

There were still instances of $TEST_IMG not being properly quoted.
This was in the usage of a string built up for a 'for' loop; modify
the loop so we can quote $TEST_IMG properly.

Signed-off-by: Jeff Cody 
Reviewed-by: Eric Blake 
Signed-off-by: Stefan Hajnoczi 
---
 tests/qemu-iotests/019 | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/qemu-iotests/019 b/tests/qemu-iotests/019
index cd3582c..5bb18d0 100755
--- a/tests/qemu-iotests/019
+++ b/tests/qemu-iotests/019
@@ -90,12 +90,12 @@ mv "$TEST_IMG" "$TEST_IMG.orig"
 # Test the conversion twice: One test with the old-style -B option and another
 # one with -o backing_file
 
-for backing_option in "-B $TEST_IMG.base" "-o backing_file=$TEST_IMG.base"; do
+for backing_option in "-B " "-o backing_file="; do
 
 echo
-echo Testing conversion with $backing_option | _filter_testdir | 
_filter_imgfmt
+echo Testing conversion with $backing_option$TEST_IMG.base | 
_filter_testdir | _filter_imgfmt
 echo
-$QEMU_IMG convert -O $IMGFMT $backing_option "$TEST_IMG.orig" "$TEST_IMG"
+$QEMU_IMG convert -O $IMGFMT $backing_option"$TEST_IMG.base" 
"$TEST_IMG.orig" "$TEST_IMG"
 
 echo "Checking if backing clusters are allocated when they shouldn't"
 echo
-- 
1.8.3.1




[Qemu-devel] [PULL v2 31/37] block: vhdx - break out code operations to functions

2013-11-08 Thread Stefan Hajnoczi
From: Jeff Cody 

This is preperation for vhdx_create().  The ability to write headers,
and calculate the number of BAT entries will be needed within the
create() functions, so move this relevant code into helper functions.

Signed-off-by: Jeff Cody 
Signed-off-by: Stefan Hajnoczi 
---
 block/vhdx.c | 121 +++
 1 file changed, 80 insertions(+), 41 deletions(-)

diff --git a/block/vhdx.c b/block/vhdx.c
index 7da149a..7bf7cd6 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -248,6 +248,14 @@ static void vhdx_region_unregister_all(BDRVVHDXState *s)
 }
 }
 
+static void vhdx_set_shift_bits(BDRVVHDXState *s)
+{
+s->logical_sector_size_bits = 31 - clz32(s->logical_sector_size);
+s->sectors_per_block_bits =   31 - clz32(s->sectors_per_block);
+s->chunk_ratio_bits = 63 - clz64(s->chunk_ratio);
+s->block_size_bits =  31 - clz32(s->block_size);
+}
+
 /*
  * Per the MS VHDX Specification, for every VHDX file:
  *  - The header section is fixed size - 1 MB
@@ -267,6 +275,50 @@ static int vhdx_probe(const uint8_t *buf, int buf_size, 
const char *filename)
 return 0;
 }
 
+/*
+ * Writes the header to the specified offset.
+ *
+ * This will optionally read in buffer data from disk (otherwise zero-fill),
+ * and then update the header checksum.  Header is converted to proper
+ * endianness before being written to the specified file offset
+ */
+static int vhdx_write_header(BlockDriverState *bs_file, VHDXHeader *hdr,
+ uint64_t offset, bool read)
+{
+uint8_t *buffer = NULL;
+int ret;
+VHDXHeader header_le;
+
+assert(bs_file != NULL);
+assert(hdr != NULL);
+
+/* the header checksum is not over just the packed size of VHDXHeader,
+ * but rather over the entire 'reserved' range for the header, which is
+ * 4KB (VHDX_HEADER_SIZE). */
+
+buffer = qemu_blockalign(bs_file, VHDX_HEADER_SIZE);
+if (read) {
+/* if true, we can't assume the extra reserved bytes are 0 */
+ret = bdrv_pread(bs_file, offset, buffer, VHDX_HEADER_SIZE);
+if (ret < 0) {
+goto exit;
+}
+} else {
+memset(buffer, 0, VHDX_HEADER_SIZE);
+}
+
+/* overwrite the actual VHDXHeader portion */
+memcpy(buffer, hdr, sizeof(VHDXHeader));
+hdr->checksum = vhdx_update_checksum(buffer, VHDX_HEADER_SIZE,
+ offsetof(VHDXHeader, checksum));
+vhdx_header_le_export(hdr, &header_le);
+ret = bdrv_pwrite_sync(bs_file, offset, &header_le, sizeof(VHDXHeader));
+
+exit:
+qemu_vfree(buffer);
+return ret;
+}
+
 /* Update the VHDX headers
  *
  * This follows the VHDX spec procedures for header updates.
@@ -282,8 +334,6 @@ static int vhdx_update_header(BlockDriverState *bs, 
BDRVVHDXState *s,
 
 VHDXHeader *active_header;
 VHDXHeader *inactive_header;
-VHDXHeader header_le;
-uint8_t *buffer;
 
 /* operate on the non-current header */
 if (s->curr_header == 0) {
@@ -311,31 +361,13 @@ static int vhdx_update_header(BlockDriverState *bs, 
BDRVVHDXState *s,
 inactive_header->log_guid = *log_guid;
 }
 
-/* the header checksum is not over just the packed size of VHDXHeader,
- * but rather over the entire 'reserved' range for the header, which is
- * 4KB (VHDX_HEADER_SIZE). */
-
-buffer = qemu_blockalign(bs, VHDX_HEADER_SIZE);
-/* we can't assume the extra reserved bytes are 0 */
-ret = bdrv_pread(bs->file, header_offset, buffer, VHDX_HEADER_SIZE);
-if (ret < 0) {
-goto exit;
-}
-/* overwrite the actual VHDXHeader portion */
-memcpy(buffer, inactive_header, sizeof(VHDXHeader));
-inactive_header->checksum =
-vhdx_update_checksum(buffer, VHDX_HEADER_SIZE,
- offsetof(VHDXHeader, checksum));
-vhdx_header_le_export(inactive_header, &header_le);
-ret = bdrv_pwrite_sync(bs->file, header_offset, &header_le,
-   sizeof(VHDXHeader));
+vhdx_write_header(bs->file, inactive_header, header_offset, true);
 if (ret < 0) {
 goto exit;
 }
 s->curr_header = hdr_idx;
 
 exit:
-qemu_vfree(buffer);
 return ret;
 }
 
@@ -773,10 +805,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, 
BDRVVHDXState *s)
 goto exit;
 }
 
-s->logical_sector_size_bits = 31 - clz32(s->logical_sector_size);
-s->sectors_per_block_bits =   31 - clz32(s->sectors_per_block);
-s->chunk_ratio_bits = 63 - clz64(s->chunk_ratio);
-s->block_size_bits =  31 - clz32(s->block_size);
+vhdx_set_shift_bits(s);
 
 ret = 0;
 
@@ -785,6 +814,31 @@ exit:
 return ret;
 }
 
+/*
+ * Calculate the number of BAT entries, including sector
+ * bitmap entries.
+ */
+static void vhdx_calc_bat_entries(BDRVVHDXState *s)
+{
+uint32_t data_blocks_cnt, bitmap_blocks_cnt;
+
+data_blocks_cnt = s->virtua

  1   2   >