from:"Tao Xu"

[PATCH v2] net/colo: check vnet_hdr_support flag when using virtio-net

2021-08-18 Thread Tao Xu

When COLO use only one vnet_hdr_support parameter between
COLO network filter(filter-mirror, filter-redirector or
filter-rewriter and colo-compare, packet will not be parsed
correctly. Acquire network driver related to COLO, if it is
nirtio-net, check vnet_hdr_support flag of COLO network filter
and colo-compare.

Signed-off-by: Tao Xu 
Signed-off-by: Zhang Chen 
---

Changelog:
v2:
 Detect virtio-net driver and apply vnet_hdr_support
 automatically. (Jason)
---
 net/colo-compare.c| 57 +++
 net/colo.c| 20 +++
 net/colo.h|  4 +++
 net/filter-mirror.c   | 21 
 net/filter-rewriter.c | 10 
 qapi/qom.json |  6 +
 qemu-options.hx   |  6 +++--
 7 files changed, 122 insertions(+), 2 deletions(-)

diff --git a/net/colo-compare.c b/net/colo-compare.c
index b100e7b51f..870bd05a41 100644
--- a/net/colo-compare.c
+++ b/net/colo-compare.c
@@ -110,6 +110,7 @@ struct CompareState {
 char *sec_indev;
 char *outdev;
 char *notify_dev;
+char *netdev;
 CharBackend chr_pri_in;
 CharBackend chr_sec_in;
 CharBackend chr_out;
@@ -838,6 +839,28 @@ static int compare_chr_can_read(void *opaque)
 return COMPARE_READ_LEN_MAX;
 }
 
+static int colo_set_default_netdev(void *opaque, QemuOpts *opts, Error **errp)
+{
+const char *colo_obj_type, *netdev_from_filter;
+char **netdev = (char **)opaque;
+
+colo_obj_type = qemu_opt_get(opts, "qom-type");
+
+if (colo_obj_type &&
+(strcmp(colo_obj_type, "filter-mirror") == 0 ||
+ strcmp(colo_obj_type, "filter-redirector") == 0 ||
+ strcmp(colo_obj_type, "filter-rewriter") == 0)) {
+netdev_from_filter = qemu_opt_get(opts, "netdev");
+if (*netdev == NULL) {
+*netdev = g_strdup(netdev_from_filter);
+} else if (strcmp(*netdev, netdev_from_filter) != 0) {
+warn_report("%s is using a different netdev from other COLO "
+"component", colo_obj_type);
+}
+}
+return 0;
+}
+
 /*
  * Called from the main thread on the primary for packets
  * arriving over the socket from the primary.
@@ -1050,6 +1073,21 @@ static void compare_set_vnet_hdr(Object *obj,
 s->vnet_hdr = value;
 }
 
+static char *compare_get_netdev(Object *obj, Error **errp)
+{
+CompareState *s = COLO_COMPARE(obj);
+
+return g_strdup(s->netdev);
+}
+
+static void compare_set_netdev(Object *obj, const char *value, Error **errp)
+{
+CompareState *s = COLO_COMPARE(obj);
+
+g_free(s->netdev);
+s->netdev = g_strdup(value);
+}
+
 static char *compare_get_notify_dev(Object *obj, Error **errp)
 {
 CompareState *s = COLO_COMPARE(obj);
@@ -1274,6 +1312,12 @@ static void colo_compare_complete(UserCreatable *uc, 
Error **errp)
 max_queue_size = MAX_QUEUE_SIZE;
 }
 
+if (!s->netdev) {
+/* Set default netdev as the first colo netfilter found */
+qemu_opts_foreach(qemu_find_opts("object"),
+  colo_set_default_netdev, >netdev, NULL);
+}
+
 if (find_and_check_chardev(, s->pri_indev, errp) ||
 !qemu_chr_fe_init(>chr_pri_in, chr, errp)) {
 return;
@@ -1289,6 +1333,16 @@ static void colo_compare_complete(UserCreatable *uc, 
Error **errp)
 return;
 }
 
+if (!s->vnet_hdr &&
+qemu_opts_foreach(qemu_find_opts("device"),
+  vnet_driver_check, s->netdev, NULL)) {
+/*
+ * colo compare needs 'vnet_hdr_support' when it works on virtio-net,
+ * add 'vnet_hdr_support' automatically
+ */
+s->vnet_hdr = true;
+}
+
 net_socket_rs_init(>pri_rs, compare_pri_rs_finalize, s->vnet_hdr);
 net_socket_rs_init(>sec_rs, compare_sec_rs_finalize, s->vnet_hdr);
 
@@ -1400,6 +1454,9 @@ static void colo_compare_init(Object *obj)
 s->vnet_hdr = false;
 object_property_add_bool(obj, "vnet_hdr_support", compare_get_vnet_hdr,
  compare_set_vnet_hdr);
+/* colo compare can't varify that netdev is correct */
+object_property_add_str(obj, "netdev", compare_get_netdev,
+compare_set_netdev);
 }
 
 void colo_compare_cleanup(void)
diff --git a/net/colo.c b/net/colo.c
index 3a3e6e89a0..4a03780f45 100644
--- a/net/colo.c
+++ b/net/colo.c
@@ -243,3 +243,23 @@ bool connection_has_tracked(GHashTable 
*connection_track_table,
 
 return conn ? true : false;
 }
+
+/* check the network driver related to COLO, return 1 if it is virtio-net */
+int vnet_driver_check(void *opaque, QemuOpts *opts, Error **errp)
+{
+const char *driver_type, *netdev_from_driver;
+char *netdev_from_filter = (char *)opaque;
+
+driver_type = qemu_opt_get(opts, "driver");
+netde

Re: [PATCH] net/colo: check vnet_hdr_support flag when using virtio-net

2021-08-17 Thread Tao Xu





On 8/17/2021 2:01 PM, Tao Xu wrote:


On 8/16/2021 10:58 AM, Jason Wang wrote:


在 2021/8/6 下午2:08, Tao Xu 写道:

When COLO use only one vnet_hdr_support parameter between
COLO network filter(filter-mirror, filter-redirector or
filter-rewriter and colo-compare, packet will not be parsed
correctly. Acquire network driver related to COLO, if it is
nirtio-net, check vnet_hdr_support flag of COLO network filter
and colo-compare.

Signed-off-by: Tao Xu 
Signed-off-by: Zhang Chen 
---
   net/colo-compare.c    | 25 +
   net/colo.c    | 20 
   net/colo.h    |  4 
   net/filter-mirror.c   | 17 +
   net/filter-rewriter.c |  9 +
   5 files changed, 75 insertions(+)

diff --git a/net/colo-compare.c b/net/colo-compare.c
index b100e7b51f..bc1cc951c0 100644
--- a/net/colo-compare.c
+++ b/net/colo-compare.c
@@ -838,6 +838,23 @@ static int compare_chr_can_read(void *opaque)
   return COMPARE_READ_LEN_MAX;
   }
+/* check vnet_hdr_support flag through COLO filter modules */
+static int colo_vnet_driver_check(void *opaque, QemuOpts *opts, 
Error **errp)

+{
+    const char *colo_obj_type;
+
+    colo_obj_type = qemu_opt_get(opts, "qom-type");
+
+    if (strcmp(colo_obj_type, "filter-mirror") == 0 ||
+    strcmp(colo_obj_type, "filter-redirector") == 0 ||
+    strcmp(colo_obj_type, "filter-rewriter") == 0) {
+    if (qemu_opt_get(opts, "vnet_hdr_support")) {
+    return 1;
+    }
+    }
+    return 0;
+}
+
   /*
    * Called from the main thread on the primary for packets
    * arriving over the socket from the primary.
@@ -1289,6 +1306,14 @@ static void 
colo_compare_complete(UserCreatable *uc, Error **errp)

   return;
   }
+    if (!s->vnet_hdr &&
+    qemu_opts_foreach(qemu_find_opts("object"),
+  colo_vnet_driver_check, NULL, NULL)) {
+    error_setg(errp, "colo compare needs 'vnet_hdr_support' "
+   "when colo filter modules work on virtio-net");
+    return;
+    }



I wonder if we can detect virtio-net and apply vnet_hdr automatically.

Thanks

For filter-mirror, filter-redirector and filter-rewriter, we can detect 
and add it automatically, because these netfilter is attached to netdev, 
for example,


     if (!s->vnet_hdr &&
     qemu_opts_foreach(qemu_find_opts("device"),
  vnet_driver_check, nf->netdev_id, NULL)) {
     s->vnet_hdr = true.
     }


But for colo-compare, it isn't attached to netdev, only can check colo 
netfilter to check vnet_hdr_support. In this situation, if all netfilter 
vnet_hdr_support is missing, colo_vnet_driver_check() will return 0, it 
can't find vnet_hdr_support is missing.


So can we apply vnet_hdr automatically for filter-mirror, 
filter-redirector and filter-rewriter? And keep report error for 
colo-compare?


Sorry, I find the solution for colo-compare apply vnet_hdr 
automatically, I will submit V2 later.

Re: [PATCH] net/colo: check vnet_hdr_support flag when using virtio-net

2021-08-17 Thread Tao Xu




On 8/16/2021 10:58 AM, Jason Wang wrote:


在 2021/8/6 下午2:08, Tao Xu 写道:

When COLO use only one vnet_hdr_support parameter between
COLO network filter(filter-mirror, filter-redirector or
filter-rewriter and colo-compare, packet will not be parsed
correctly. Acquire network driver related to COLO, if it is
nirtio-net, check vnet_hdr_support flag of COLO network filter
and colo-compare.

Signed-off-by: Tao Xu 
Signed-off-by: Zhang Chen 
---
   net/colo-compare.c| 25 +
   net/colo.c| 20 
   net/colo.h|  4 
   net/filter-mirror.c   | 17 +
   net/filter-rewriter.c |  9 +
   5 files changed, 75 insertions(+)

diff --git a/net/colo-compare.c b/net/colo-compare.c
index b100e7b51f..bc1cc951c0 100644
--- a/net/colo-compare.c
+++ b/net/colo-compare.c
@@ -838,6 +838,23 @@ static int compare_chr_can_read(void *opaque)
   return COMPARE_READ_LEN_MAX;
   }
   
+/* check vnet_hdr_support flag through COLO filter modules */

+static int colo_vnet_driver_check(void *opaque, QemuOpts *opts, Error **errp)
+{
+const char *colo_obj_type;
+
+colo_obj_type = qemu_opt_get(opts, "qom-type");
+
+if (strcmp(colo_obj_type, "filter-mirror") == 0 ||
+strcmp(colo_obj_type, "filter-redirector") == 0 ||
+strcmp(colo_obj_type, "filter-rewriter") == 0) {
+if (qemu_opt_get(opts, "vnet_hdr_support")) {
+return 1;
+}
+}
+return 0;
+}
+
   /*
* Called from the main thread on the primary for packets
* arriving over the socket from the primary.
@@ -1289,6 +1306,14 @@ static void colo_compare_complete(UserCreatable *uc, 
Error **errp)
   return;
   }
   
+if (!s->vnet_hdr &&

+qemu_opts_foreach(qemu_find_opts("object"),
+  colo_vnet_driver_check, NULL, NULL)) {
+error_setg(errp, "colo compare needs 'vnet_hdr_support' "
+   "when colo filter modules work on virtio-net");
+return;
+}



I wonder if we can detect virtio-net and apply vnet_hdr automatically.

Thanks

For filter-mirror, filter-redirector and filter-rewriter, we can detect 
and add it automatically, because these netfilter is attached to netdev, 
for example,


if (!s->vnet_hdr &&
qemu_opts_foreach(qemu_find_opts("device"),
 vnet_driver_check, nf->netdev_id, NULL)) {
s->vnet_hdr = true.
}


But for colo-compare, it isn't attached to netdev, only can check colo 
netfilter to check vnet_hdr_support. In this situation, if all netfilter 
vnet_hdr_support is missing, colo_vnet_driver_check() will return 0, it 
can't find vnet_hdr_support is missing.


So can we apply vnet_hdr automatically for filter-mirror, 
filter-redirector and filter-rewriter? And keep report error for 
colo-compare?

Re: [PATCH] net/colo: check vnet_hdr_support flag when using virtio-net

2021-08-12 Thread Tao Xu


Hi Jason,

Do you have any comments on this patch?

Thank you!

On 8/6/2021 2:08 PM, Xu, Tao3 wrote:

When COLO use only one vnet_hdr_support parameter between
COLO network filter(filter-mirror, filter-redirector or
filter-rewriter and colo-compare, packet will not be parsed
correctly. Acquire network driver related to COLO, if it is
nirtio-net, check vnet_hdr_support flag of COLO network filter
and colo-compare.

Signed-off-by: Tao Xu 
Signed-off-by: Zhang Chen 
---
  net/colo-compare.c| 25 +
  net/colo.c| 20 
  net/colo.h|  4 
  net/filter-mirror.c   | 17 +
  net/filter-rewriter.c |  9 +
  5 files changed, 75 insertions(+)

diff --git a/net/colo-compare.c b/net/colo-compare.c
index b100e7b51f..bc1cc951c0 100644
--- a/net/colo-compare.c
+++ b/net/colo-compare.c
@@ -838,6 +838,23 @@ static int compare_chr_can_read(void *opaque)
  return COMPARE_READ_LEN_MAX;
  }
  
+/* check vnet_hdr_support flag through COLO filter modules */

+static int colo_vnet_driver_check(void *opaque, QemuOpts *opts, Error **errp)
+{
+const char *colo_obj_type;
+
+colo_obj_type = qemu_opt_get(opts, "qom-type");
+
+if (strcmp(colo_obj_type, "filter-mirror") == 0 ||
+strcmp(colo_obj_type, "filter-redirector") == 0 ||
+strcmp(colo_obj_type, "filter-rewriter") == 0) {
+if (qemu_opt_get(opts, "vnet_hdr_support")) {
+return 1;
+}
+}
+return 0;
+}
+
  /*
   * Called from the main thread on the primary for packets
   * arriving over the socket from the primary.
@@ -1289,6 +1306,14 @@ static void colo_compare_complete(UserCreatable *uc, 
Error **errp)
  return;
  }
  
+if (!s->vnet_hdr &&

+qemu_opts_foreach(qemu_find_opts("object"),
+  colo_vnet_driver_check, NULL, NULL)) {
+error_setg(errp, "colo compare needs 'vnet_hdr_support' "
+   "when colo filter modules work on virtio-net");
+return;
+}
+
  net_socket_rs_init(>pri_rs, compare_pri_rs_finalize, s->vnet_hdr);
  net_socket_rs_init(>sec_rs, compare_sec_rs_finalize, s->vnet_hdr);
  
diff --git a/net/colo.c b/net/colo.c

index 3a3e6e89a0..4a03780f45 100644
--- a/net/colo.c
+++ b/net/colo.c
@@ -243,3 +243,23 @@ bool connection_has_tracked(GHashTable 
*connection_track_table,
  
  return conn ? true : false;

  }
+
+/* check the network driver related to COLO, return 1 if it is virtio-net */
+int vnet_driver_check(void *opaque, QemuOpts *opts, Error **errp)
+{
+const char *driver_type, *netdev_from_driver;
+char *netdev_from_filter = (char *)opaque;
+
+driver_type = qemu_opt_get(opts, "driver");
+netdev_from_driver = qemu_opt_get(opts, "netdev");
+
+if (!driver_type || !netdev_from_driver || !netdev_from_filter) {
+return 0;
+}
+
+if (g_str_has_prefix(driver_type, "virtio-net") &&
+strcmp(netdev_from_driver, netdev_from_filter) == 0) {
+return 1;
+}
+return 0;
+}
diff --git a/net/colo.h b/net/colo.h
index d91cd245c4..d401fc76b6 100644
--- a/net/colo.h
+++ b/net/colo.h
@@ -18,6 +18,9 @@
  #include "qemu/jhash.h"
  #include "qemu/timer.h"
  #include "net/eth.h"
+#include "qemu/option.h"
+#include "qemu/option_int.h"
+#include "qemu/config-file.h"
  
  #define HASHTABLE_MAX_SIZE 16384
  
@@ -104,5 +107,6 @@ Packet *packet_new(const void *data, int size, int vnet_hdr_len);

  Packet *packet_new_nocopy(void *data, int size, int vnet_hdr_len);
  void packet_destroy(void *opaque, void *user_data);
  void packet_destroy_partial(void *opaque, void *user_data);
+int vnet_driver_check(void *opaque, QemuOpts *opts, Error **errp);
  
  #endif /* NET_COLO_H */

diff --git a/net/filter-mirror.c b/net/filter-mirror.c
index f20240cc9f..b8b3f2fe1d 100644
--- a/net/filter-mirror.c
+++ b/net/filter-mirror.c
@@ -12,6 +12,7 @@
  #include "qemu/osdep.h"
  #include "net/filter.h"
  #include "net/net.h"
+#include "net/colo.h"
  #include "qapi/error.h"
  #include "qom/object.h"
  #include "qemu/main-loop.h"
@@ -224,6 +225,14 @@ static void filter_mirror_setup(NetFilterState *nf, Error 
**errp)
  return;
  }
  
+if (!s->vnet_hdr &&

+qemu_opts_foreach(qemu_find_opts("device"),
+ vnet_driver_check, nf->netdev_id, NULL)) {
+error_setg(errp, "filter mirror needs 'vnet_hdr_support' "
+   "when network driver is virtio-net");
+return;
+}
+
  qemu_chr_fe_init(>chr_out, chr, errp);
  }
  
@@ -252,6 +261,14 @@ static void filter_redirector_setup(NetFilterState *nf, Error **errp)

[PATCH] net/colo: check vnet_hdr_support flag when using virtio-net

2021-08-06 Thread Tao Xu

When COLO use only one vnet_hdr_support parameter between
COLO network filter(filter-mirror, filter-redirector or
filter-rewriter and colo-compare, packet will not be parsed
correctly. Acquire network driver related to COLO, if it is
nirtio-net, check vnet_hdr_support flag of COLO network filter
and colo-compare.

Signed-off-by: Tao Xu 
Signed-off-by: Zhang Chen 
---
 net/colo-compare.c| 25 +
 net/colo.c| 20 
 net/colo.h|  4 
 net/filter-mirror.c   | 17 +
 net/filter-rewriter.c |  9 +
 5 files changed, 75 insertions(+)

diff --git a/net/colo-compare.c b/net/colo-compare.c
index b100e7b51f..bc1cc951c0 100644
--- a/net/colo-compare.c
+++ b/net/colo-compare.c
@@ -838,6 +838,23 @@ static int compare_chr_can_read(void *opaque)
 return COMPARE_READ_LEN_MAX;
 }
 
+/* check vnet_hdr_support flag through COLO filter modules */
+static int colo_vnet_driver_check(void *opaque, QemuOpts *opts, Error **errp)
+{
+const char *colo_obj_type;
+
+colo_obj_type = qemu_opt_get(opts, "qom-type");
+
+if (strcmp(colo_obj_type, "filter-mirror") == 0 ||
+strcmp(colo_obj_type, "filter-redirector") == 0 ||
+strcmp(colo_obj_type, "filter-rewriter") == 0) {
+if (qemu_opt_get(opts, "vnet_hdr_support")) {
+return 1;
+}
+}
+return 0;
+}
+
 /*
  * Called from the main thread on the primary for packets
  * arriving over the socket from the primary.
@@ -1289,6 +1306,14 @@ static void colo_compare_complete(UserCreatable *uc, 
Error **errp)
 return;
 }
 
+if (!s->vnet_hdr &&
+qemu_opts_foreach(qemu_find_opts("object"),
+  colo_vnet_driver_check, NULL, NULL)) {
+error_setg(errp, "colo compare needs 'vnet_hdr_support' "
+   "when colo filter modules work on virtio-net");
+return;
+}
+
 net_socket_rs_init(>pri_rs, compare_pri_rs_finalize, s->vnet_hdr);
 net_socket_rs_init(>sec_rs, compare_sec_rs_finalize, s->vnet_hdr);
 
diff --git a/net/colo.c b/net/colo.c
index 3a3e6e89a0..4a03780f45 100644
--- a/net/colo.c
+++ b/net/colo.c
@@ -243,3 +243,23 @@ bool connection_has_tracked(GHashTable 
*connection_track_table,
 
 return conn ? true : false;
 }
+
+/* check the network driver related to COLO, return 1 if it is virtio-net */
+int vnet_driver_check(void *opaque, QemuOpts *opts, Error **errp)
+{
+const char *driver_type, *netdev_from_driver;
+char *netdev_from_filter = (char *)opaque;
+
+driver_type = qemu_opt_get(opts, "driver");
+netdev_from_driver = qemu_opt_get(opts, "netdev");
+
+if (!driver_type || !netdev_from_driver || !netdev_from_filter) {
+return 0;
+}
+
+if (g_str_has_prefix(driver_type, "virtio-net") &&
+strcmp(netdev_from_driver, netdev_from_filter) == 0) {
+return 1;
+}
+return 0;
+}
diff --git a/net/colo.h b/net/colo.h
index d91cd245c4..d401fc76b6 100644
--- a/net/colo.h
+++ b/net/colo.h
@@ -18,6 +18,9 @@
 #include "qemu/jhash.h"
 #include "qemu/timer.h"
 #include "net/eth.h"
+#include "qemu/option.h"
+#include "qemu/option_int.h"
+#include "qemu/config-file.h"
 
 #define HASHTABLE_MAX_SIZE 16384
 
@@ -104,5 +107,6 @@ Packet *packet_new(const void *data, int size, int 
vnet_hdr_len);
 Packet *packet_new_nocopy(void *data, int size, int vnet_hdr_len);
 void packet_destroy(void *opaque, void *user_data);
 void packet_destroy_partial(void *opaque, void *user_data);
+int vnet_driver_check(void *opaque, QemuOpts *opts, Error **errp);
 
 #endif /* NET_COLO_H */
diff --git a/net/filter-mirror.c b/net/filter-mirror.c
index f20240cc9f..b8b3f2fe1d 100644
--- a/net/filter-mirror.c
+++ b/net/filter-mirror.c
@@ -12,6 +12,7 @@
 #include "qemu/osdep.h"
 #include "net/filter.h"
 #include "net/net.h"
+#include "net/colo.h"
 #include "qapi/error.h"
 #include "qom/object.h"
 #include "qemu/main-loop.h"
@@ -224,6 +225,14 @@ static void filter_mirror_setup(NetFilterState *nf, Error 
**errp)
 return;
 }
 
+if (!s->vnet_hdr &&
+qemu_opts_foreach(qemu_find_opts("device"),
+ vnet_driver_check, nf->netdev_id, NULL)) {
+error_setg(errp, "filter mirror needs 'vnet_hdr_support' "
+   "when network driver is virtio-net");
+return;
+}
+
 qemu_chr_fe_init(>chr_out, chr, errp);
 }
 
@@ -252,6 +261,14 @@ static void filter_redirector_setup(NetFilterState *nf, 
Error **errp)
 }
 }
 
+if (!s->vnet_hdr &&
+qemu_opts_foreach(qemu_find_opts("device"),
+ vnet_driver_check, nf->

[PATCH] iotests: Fix typo in iotest 051

2021-03-24 Thread Tao Xu

There is an typo in iotest 051, correct it.

Signed-off-by: Tao Xu 
---
 tests/qemu-iotests/051| 2 +-
 tests/qemu-iotests/051.pc.out | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051
index f92161d8ef..1595babe82 100755
--- a/tests/qemu-iotests/051
+++ b/tests/qemu-iotests/051
@@ -209,7 +209,7 @@ case "$QEMU_DEFAULT_MACHINE" in
 # virtio-blk enables the iothread only when the driver initialises the
 # device, so a second virtio-blk device can't be added even with the
 # same iothread. virtio-scsi allows this.
-run_qemu $iothread -device 
virtio-blk-pci,drive=disk,iohtread=iothread0,share-rw=on
+run_qemu $iothread -device 
virtio-blk-pci,drive=disk,iothread=iothread0,share-rw=on
 run_qemu $iothread -device 
virtio-scsi,id=virtio-scsi1,iothread=thread0 -device 
scsi-hd,bus=virtio-scsi1.0,drive=disk,share-rw=on
 ;;
  *)
diff --git a/tests/qemu-iotests/051.pc.out b/tests/qemu-iotests/051.pc.out
index a28e3fc124..a43086bb41 100644
--- a/tests/qemu-iotests/051.pc.out
+++ b/tests/qemu-iotests/051.pc.out
@@ -183,9 +183,9 @@ Testing: -drive 
file=TEST_DIR/t.qcow2,if=none,node-name=disk -object iothread,id
 QEMU X.Y.Z monitor - type 'help' for more information
 (qemu) QEMU_PROG: -device scsi-hd,bus=virtio-scsi1.0,drive=disk,share-rw=on: 
Cannot change iothread of active block backend
 
-Testing: -drive file=TEST_DIR/t.qcow2,if=none,node-name=disk -object 
iothread,id=thread0 -device virtio-scsi,iothread=thread0,id=virtio-scsi0 
-device scsi-hd,bus=virtio-scsi0.0,drive=disk,share-rw=on -device 
virtio-blk-pci,drive=disk,iohtread=iothread0,share-rw=on
+Testing: -drive file=TEST_DIR/t.qcow2,if=none,node-name=disk -object 
iothread,id=thread0 -device virtio-scsi,iothread=thread0,id=virtio-scsi0 
-device scsi-hd,bus=virtio-scsi0.0,drive=disk,share-rw=on -device 
virtio-blk-pci,drive=disk,iothread=iothread0,share-rw=on
 QEMU X.Y.Z monitor - type 'help' for more information
-(qemu) QEMU_PROG: -device 
virtio-blk-pci,drive=disk,iohtread=iothread0,share-rw=on: Cannot change 
iothread of active block backend
+(qemu) QEMU_PROG: -device 
virtio-blk-pci,drive=disk,iothread=iothread0,share-rw=on: Cannot change 
iothread of active block backend
 
 Testing: -drive file=TEST_DIR/t.qcow2,if=none,node-name=disk -object 
iothread,id=thread0 -device virtio-scsi,iothread=thread0,id=virtio-scsi0 
-device scsi-hd,bus=virtio-scsi0.0,drive=disk,share-rw=on -device 
virtio-scsi,id=virtio-scsi1,iothread=thread0 -device 
scsi-hd,bus=virtio-scsi1.0,drive=disk,share-rw=on
 QEMU X.Y.Z monitor - type 'help' for more information
-- 
2.25.1

[Bug 1920871] [NEW] netperf UDP_STREAM high packet loss on QEMU tap network

2021-03-23 Thread Tao Xu

Public bug reported:

Hi, I boot a guest with "-netdev
tap,id=hn0,vhost=off,br=br0,helper=/usr/local/libexec/qemu-bridge-
helper" network option, and using "netperf -H IP -t UDP_STREAM" to test
guest UDP performance, I got the following output:

Socket  Message  Elapsed  Messages
SizeSize Time Okay Errors   Throughput
bytes   bytessecs#  #   10^6bits/sec

212992   65507   10.00  144710  07583.56
212992   10.00  32  1.68

We can find most of UDP packets are lost. But I test another host machine or 
use "-netdev usr,x". I can got:
Socket  Message  Elapsed  Messages
SizeSize Time Okay Errors   Throughput
bytes   bytessecs#  #   10^6bits/sec

212992   65507   10.00   18351  0 961.61
212992   10.00   18350961.56

most of UDP packets are recived.

And If we check the tap qemu used, we can see:
ifconfig tap0
tap0: flags=4419  mtu 1500
inet6 fe80::ecc6:21ff:fe6f:b174  prefixlen 64  scopeid 0x20
ether ee:c6:21:6f:b1:74  txqueuelen 1000  (Ethernet)
RX packets 282  bytes 30097 (29.3 KiB)
RX errors 0  dropped 0  overruns 0  frame 0
TX packets 9086214  bytes 12731596673 (11.8 GiB)
TX errors 0  dropped 16349024 overruns 0  carrier 0  collisions 0
lots of TX packets are dropped.

list other packet size:

➜  boot netperf -H 192.168.199.200 -t UDP_STREAM -- -m 1
MIGRATED UDP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 
192.168.199.200 () port 0 AF_INET
Socket  Message  Elapsed  Messages
SizeSize Time Okay Errors   Throughput
bytes   bytessecs#  #   10^6bits/sec

212992   1   10.00 2297941  0   1.84
212992   10.00 1462024  1.17

➜  boot netperf -H 192.168.199.200 -t UDP_STREAM -- -m 128
MIGRATED UDP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 
192.168.199.200 () port 0 AF_INET
Socket  Message  Elapsed  Messages
SizeSize Time Okay Errors   Throughput
bytes   bytessecs#  #   10^6bits/sec

212992 128   10.00 2311547  0 236.70
212992   10.00 1359834139.25

** Affects: qemu
 Importance: Undecided
 Status: New

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1920871

Title:
  netperf UDP_STREAM high packet loss on QEMU tap network

Status in QEMU:
  New

Bug description:
  Hi, I boot a guest with "-netdev
  tap,id=hn0,vhost=off,br=br0,helper=/usr/local/libexec/qemu-bridge-
  helper" network option, and using "netperf -H IP -t UDP_STREAM" to
  test guest UDP performance, I got the following output:

  Socket  Message  Elapsed  Messages
  SizeSize Time Okay Errors   Throughput
  bytes   bytessecs#  #   10^6bits/sec

  212992   65507   10.00  144710  07583.56
  212992   10.00  32  1.68

  We can find most of UDP packets are lost. But I test another host machine or 
use "-netdev usr,x". I can got:
  Socket  Message  Elapsed  Messages
  SizeSize Time Okay Errors   Throughput
  bytes   bytessecs#  #   10^6bits/sec

  212992   65507   10.00   18351  0 961.61
  212992   10.00   18350961.56

  most of UDP packets are recived.

  And If we check the tap qemu used, we can see:
  ifconfig tap0
  tap0: flags=4419  mtu 1500
  inet6 fe80::ecc6:21ff:fe6f:b174  prefixlen 64  scopeid 0x20
  ether ee:c6:21:6f:b1:74  txqueuelen 1000  (Ethernet)
  RX packets 282  bytes 30097 (29.3 KiB)
  RX errors 0  dropped 0  overruns 0  frame 0
  TX packets 9086214  bytes 12731596673 (11.8 GiB)
  TX errors 0  dropped 16349024 overruns 0  carrier 0  collisions 0
  lots of TX packets are dropped.

  list other packet size:

  ➜  boot netperf -H 192.168.199.200 -t UDP_STREAM -- -m 1
  MIGRATED UDP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 
192.168.199.200 () port 0 AF_INET
  Socket  Message  Elapsed  Messages
  SizeSize Time Okay Errors   Throughput
  bytes   bytessecs#  #   10^6bits/sec

  212992   1   10.00 2297941  0   1.84
  212992   10.00 1462024  1.17

  ➜  boot netperf -H 192.168.199.200 -t UDP_STREAM -- -m 128
  MIGRATED UDP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 
192.168.199.200 () port 0 AF_INET
  Socket  Message  Elapsed  Messages
  SizeSize Time Okay Errors   Throughput
  bytes   bytessecs#  #   10^6bits/sec

  212992 128   10.00 2311547  0 236.70
  212992   10.00

Re: [PATCH] quorum: Implement bdrv_co_block_status()

2020-11-05 Thread Tao Xu

I test this patch in COLO, it resolve the issue qcow2 image become 
larger after drive-mirror. Thank you!


Tested-by: Tao Xu 

On 11/5/2020 2:04 AM, Alberto Garcia wrote:

The quorum driver does not implement bdrv_co_block_status() and
because of that it always reports to contain data even if all its
children are known to be empty.

One consequence of this is that if we for example create a quorum with
a size of 10GB and we mirror it to a new image the operation will
write 10GB of actual zeroes to the destination image wasting a lot of
time and disk space.

Since a quorum has an arbitrary number of children of potentially
different formats there is no way to report all possible allocation
status flags in a way that makes sense, so this implementation only
reports when a given region is known to contain zeroes
(BDRV_BLOCK_ZERO) or not (BDRV_BLOCK_DATA).

If all children agree that a region contains zeroes then we can return
BDRV_BLOCK_ZERO using the smallest size reported by the children
(because all agree that a region of at least that size contains
zeroes).

If at least one child disagrees we have to return BDRV_BLOCK_DATA.
In this case we use the largest of the sizes reported by the children
that didn't return BDRV_BLOCK_ZERO (because we know that there won't
be an agreement for at least that size).

Signed-off-by: Alberto Garcia 
---
  block/quorum.c |  49 
  tests/qemu-iotests/312 | 148 +
  tests/qemu-iotests/312.out |  67 +
  tests/qemu-iotests/group   |   1 +
  4 files changed, 265 insertions(+)
  create mode 100755 tests/qemu-iotests/312
  create mode 100644 tests/qemu-iotests/312.out

diff --git a/block/quorum.c b/block/quorum.c
index e846a7e892..29cee42705 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -18,6 +18,7 @@
  #include "qemu/module.h"
  #include "qemu/option.h"
  #include "block/block_int.h"
+#include "block/coroutines.h"
  #include "block/qdict.h"
  #include "qapi/error.h"
  #include "qapi/qapi-events-block.h"
@@ -1174,6 +1175,53 @@ static void quorum_child_perm(BlockDriverState *bs, 
BdrvChild *c,
   | DEFAULT_PERM_UNCHANGED;
  }
  
+/*

+ * Each one of the children can report different status flags even
+ * when they contain the same data, so what this function does is
+ * return BDRV_BLOCK_ZERO if *all* children agree that a certain
+ * region contains zeroes, and BDRV_BLOCK_DATA otherwise.
+ */
+static int coroutine_fn quorum_co_block_status(BlockDriverState *bs,
+   bool want_zero,
+   int64_t offset, int64_t count,
+   int64_t *pnum, int64_t *map,
+   BlockDriverState **file)
+{
+BDRVQuorumState *s = bs->opaque;
+int i, ret;
+int64_t pnum_zero = count;
+int64_t pnum_data = 0;
+
+for (i = 0; i < s->num_children; i++) {
+int64_t bytes;
+ret = bdrv_co_common_block_status_above(s->children[i]->bs, NULL, 
false,
+want_zero, offset, count,
+, NULL, NULL, NULL);
+if (ret < 0) {
+return ret;
+}
+/*
+ * Even if all children agree about whether there are zeroes
+ * or not at @offset they might disagree on the size, so use
+ * the smallest when reporting BDRV_BLOCK_ZERO and the largest
+ * when reporting BDRV_BLOCK_DATA.
+ */
+if (ret & BDRV_BLOCK_ZERO) {
+pnum_zero = MIN(pnum_zero, bytes);
+} else {
+pnum_data = MAX(pnum_data, bytes);
+}
+}
+
+if (pnum_data) {
+*pnum = pnum_data;
+return BDRV_BLOCK_DATA;
+} else {
+*pnum = pnum_zero;
+return BDRV_BLOCK_ZERO;
+}
+}
+
  static const char *const quorum_strong_runtime_opts[] = {
  QUORUM_OPT_VOTE_THRESHOLD,
  QUORUM_OPT_BLKVERIFY,
@@ -1192,6 +1240,7 @@ static BlockDriver bdrv_quorum = {
  .bdrv_close = quorum_close,
  .bdrv_gather_child_options  = quorum_gather_child_options,
  .bdrv_dirname   = quorum_dirname,
+.bdrv_co_block_status   = quorum_co_block_status,
  
  .bdrv_co_flush_to_disk  = quorum_co_flush,
  
diff --git a/tests/qemu-iotests/312 b/tests/qemu-iotests/312

new file mode 100755
index 00..1b08f1552f
--- /dev/null
+++ b/tests/qemu-iotests/312
@@ -0,0 +1,148 @@
+#!/usr/bin/env bash
+#
+# Test drive-mirror with quorum
+#
+# The goal of this test is to check how the quorum driver reports
+# regions that are known to read as zeroes (BDRV_BLOCK_ZERO). The idea
+# is that drive-mirror will try the efficient representation of zeroes
+# in the destination image inste

Re: [PATCH 3/3] numa: Initialize node initiator with respect to .has_cpu

2020-06-04 Thread Tao Xu


On 6/3/20 5:16 PM, Michal Privoznik wrote:

On 6/2/20 10:00 AM, Tao Xu wrote:


On 6/1/2020 4:10 PM, Michal Privoznik wrote:

On 5/29/20 5:09 PM, Igor Mammedov wrote:

On Fri, 29 May 2020 15:33:48 +0200
Michal Privoznik  wrote:


The initiator attribute of a NUMA node is documented as the 'NUMA
node that has best performance to given NUMA node'. If a NUMA
node has at least one CPU there can hardly be a different node
with better performace and thus all NUMA nodes which have a CPU
are initiators to themselves. Reflect this fact when initializing
the attribute.


It is not true in case of the node is memory-less


Are you saying that if there's a memory-less NUMA node, then it needs to
have initiator set too? Asking mostly out of curiosity because we don't
allow memory-less NUMA nodes in Libvirt just yet. Nor cpu-less, but my
patches that I'm referring to in cover letter will allow at least
cpu-less nodes. Should I allow both?

QEMU now is not support memory-less NUMA node, but in hardware may be
supported. So we reserve this type of NUMA node for future usage. And
QEMU now can support cpu-less NUMA node, for emulating some "slow"
memory(like some NVDIMM).


Oh yeah, I understand that. But it doesn't explain why initiator needs
to be specified for NUMA nodes with cpus and memory, or does it? Maybe
I'm still misunderstanding what the initiator is.



Yes, the initiator NUMA nodes with cpus and memory should be itself. In 
ACPI 6.3 spec, initiator is defined as:


This field is valid only if the memory controller
responsible for satisfying the access to memory
belonging to the specified memory proximity
domain is directly attached to an initiator that
belongs to a proximity domain. In that case, this
field contains the integer that represents the
proximity domain to which the initiator (Generic
Initiator or Processor) belongs. This number shall
match the corresponding entry in the SRAT table’s
processor affinity structure (e.g., Processor Local
APIC/SAPIC Affinity Structure, Processor Local
x2APIC Affinity Structure, GICC Affinity Structure) if
the initiator is a processor, or the Generic Initiator
Affinity Structure if the initator is a generic
initiator.
Note: this field provides additional information as
to the initiator node that is closest (as in directly
attached) to the memory address ranges within
the specified memory proximity domain, and
therefore should provide the best performance.

And if in the future, there is a memory-less NUMA node. Because in HMAT 
we describe "Memory" Proximity Domain Attributes Structure, I think we 
should not add memory-less NUMA node into HMAT.






Also, can you shed more light into why machine_set_cpu_numa_node() did
not override the .initiator?


And this one is still unanswered too. Because from user's perspective,
initiator has to be set on all NUMA nodes (if HMAT is enabled) and it
seems like this auto assignment code is not run/not working.

Michal



So we check the HMAT configure in hw/core/machine.c 
numa_validate_initiator(NumaState *numa_state) because the initiator 
NUMA nodes with cpus and memory should be itself. And in 
machine_set_cpu_numa_node we didn't use auto assignment way just use 
user's setting in cli (although there is only one right choice for NUMA 
nodes with cpus and memory). But I don't know if it is appropriate to 
auto assign the initiator for NUMA nodes with cpus and memory.

Re: [PATCH 3/3] numa: Initialize node initiator with respect to .has_cpu

2020-06-02 Thread Tao Xu




On 6/1/2020 4:10 PM, Michal Privoznik wrote:

On 5/29/20 5:09 PM, Igor Mammedov wrote:

On Fri, 29 May 2020 15:33:48 +0200
Michal Privoznik  wrote:


The initiator attribute of a NUMA node is documented as the 'NUMA
node that has best performance to given NUMA node'. If a NUMA
node has at least one CPU there can hardly be a different node
with better performace and thus all NUMA nodes which have a CPU
are initiators to themselves. Reflect this fact when initializing
the attribute.


It is not true in case of the node is memory-less


Are you saying that if there's a memory-less NUMA node, then it needs to
have initiator set too? Asking mostly out of curiosity because we don't
allow memory-less NUMA nodes in Libvirt just yet. Nor cpu-less, but my
patches that I'm referring to in cover letter will allow at least
cpu-less nodes. Should I allow both?
QEMU now is not support memory-less NUMA node, but in hardware may be 
supported. So we reserve this type of NUMA node for future usage. And 
QEMU now can support cpu-less NUMA node, for emulating some "slow" 
memory(like some NVDIMM).




Also, can you shed more light into why machine_set_cpu_numa_node() did
not override the .initiator?

Thanks,
Michal

Re: [PATCH v4] target/i386: Add notes for versioned CPU models

2020-05-21 Thread Tao Xu


Hi Eduardo

Could you review this patch?

Tao Xu

On 3/24/2020 1:10 PM, Xu, Tao3 wrote:

Add which features are added or removed in this version.

Signed-off-by: Tao Xu 
---

The output is as follows:
qemu-system-x86_64 -cpu help | grep "\["
x86 Cascadelake-Server-v2  Intel Xeon Processor (Cascadelake) 
[ARCH_CAPABILITIES]
x86 Cascadelake-Server-v3  Intel Xeon Processor (Cascadelake) 
[ARCH_CAPABILITIES, no TSX]
x86 Denverton-v2  Intel Atom Processor (Denverton) [no MPX, no MONITOR]
x86 Icelake-Client-v2 Intel Core Processor (Icelake) [no TSX]
x86 Icelake-Server-v2 Intel Xeon Processor (Icelake) [no TSX]

Changes in v3:
 - Keep the existing custom model-id (Eduardo)

Changes in v2:
 - correct the note of Cascadelake v3 (Xiaoyao)
---
  target/i386/cpu.c | 5 +
  1 file changed, 5 insertions(+)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 34b511f078..1c7690baa0 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -3192,6 +3192,7 @@ static X86CPUDefinition builtin_x86_defs[] = {
  .versions = (X86CPUVersionDefinition[]) {
  { .version = 1 },
  { .version = 2,
+  .note = "ARCH_CAPABILITIES",
.props = (PropValue[]) {
{ "arch-capabilities", "on" },
{ "rdctl-no", "on" },
@@ -3203,6 +3204,7 @@ static X86CPUDefinition builtin_x86_defs[] = {
  },
  { .version = 3,
.alias = "Cascadelake-Server-noTSX",
+  .note = "ARCH_CAPABILITIES, no TSX",
.props = (PropValue[]) {
{ "hle", "off" },
{ "rtm", "off" },
@@ -3424,6 +3426,7 @@ static X86CPUDefinition builtin_x86_defs[] = {
  { .version = 1 },
  {
  .version = 2,
+.note = "no TSX",
  .alias = "Icelake-Client-noTSX",
  .props = (PropValue[]) {
  { "hle", "off" },
@@ -3541,6 +3544,7 @@ static X86CPUDefinition builtin_x86_defs[] = {
  { .version = 1 },
  {
  .version = 2,
+.note = "no TSX",
  .alias = "Icelake-Server-noTSX",
  .props = (PropValue[]) {
  { "hle", "off" },
@@ -3648,6 +3652,7 @@ static X86CPUDefinition builtin_x86_defs[] = {
  { .version = 1 },
  {
  .version = 2,
+.note = "no MPX, no MONITOR",
  .props = (PropValue[]) {
  { "monitor", "off" },
  { "mpx", "off" },

Re: Migration with ``drive-mirror`` + NBD will let quorum qcow2 image become larger

2020-05-20 Thread Tao Xu


On 5/19/2020 10:49 PM, Alberto Garcia wrote:

On Tue 19 May 2020 11:15:44 AM CEST, Kevin Wolf wrote:

But maybe it could return a limited set of flags at least so that the
mirror job can get the BDRV_BLOCK_ZERO information if the quorum
children agree on it.


Yeah, maybe it is possible to implement a conservative version of that
function and fall back to BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED (or
something like that) in the cases where there's no clear alternative.

Berto



Thank you Kevin and Berto for your suggestion.

Migration with ``drive-mirror`` + NBD will let quorum qcow2 image become larger

2020-05-19 Thread Tao Xu


Hi,

I am using ``drive-mirror`` + NBD for live storage migration. But I find 
that if I use a qcow2 image(virtual size: 10 GiB, disk size: 1.8 GiB) as 
a child of quorum, then the destination image become larger(virtual 
size: 10 GiB, disk size: 10 GiB). However if I use a qcow2 image 
directly, then the destination image(virtual size: 10 GiB, disk size: 
1.8 GiB) will be equal to the source.


So I am wondering if my usage is wrong or it is expected with 
quorum+drive-mirror?


P.S. Detail:

1) [On *destination* Host]: qemu-img create -f qcow2 fedora32.qcow2 10G
Formatting 'fedora32.qcow2', fmt=qcow2 size=10737418240 
cluster_size=65536 lazy_refcounts=off refcount_bits=16


qemu-img info fedora32.qcow2
image: fedora32.qcow2
file format: qcow2
virtual size: 10 GiB (10737418240 bytes)
disk size: 196 KiB
cluster_size: 65536
Format specific information:
compat: 1.1
lazy refcounts: false
refcount bits: 16
corrupt: false

Boot the QEMU using:

disk_path=fedora32.qcow2
net_param="-netdev 
tap,id=hn0,vhost=off,br=br0,helper=/usr/local/libexec/qemu-bridge-helper 
-device rtl8139,id=e0,netdev=hn0"

cmdline="qemu-system-x86_64 \
-enable-kvm \
-m 2G -smp 4 -qmp stdio -bios OVMF.fd \
-monitor telnet:127.0.0.1:,nowait,server -vnc :7 -rtc base=utc \
-cpu host -device cirrus-vga,id=video0,bus=pci.0,addr=0x2 \
-device piix3-usb-uhci,id=usb,bus=pci.0,addr=0x1.0x2 \
-device usb-tablet,id=input0,bus=usb.0,port=1 $net_param \
-drive if=none,id=parent0,file.filename=$disk_path,driver=qcow2 \
-incoming tcp:0:"
exec $cmdline

[On *destination* QEMU]:
{'execute':'qmp_capabilities'}
{'execute': 'nbd-server-start', 'arguments': {'addr': {'type': 'inet', 
'data': {'host': '192.168.0.33', 'port': '8889'} } } }
{'execute': 'nbd-server-add', 'arguments': {'device': 'parent0', 
'writable': true } }


2) [On *source* Host]:

Boot the QEMU using:

disk_path=fedora32.qcow2
net_param="-netdev 
tap,id=hn0,vhost=off,br=br0,helper=/usr/local/libexec/qemu-bridge-helper 
-device rtl8139,id=e0,netdev=hn0"

cmdline="qemu-system-x86_64 \
-enable-kvm \
-m 2G -smp 4 -qmp stdio -bios OVMF.fd \
-monitor telnet:127.0.0.1:,nowait,server -vnc :7 -rtc base=utc \
-cpu host -device cirrus-vga,id=video0,bus=pci.0,addr=0x2 \
-device piix3-usb-uhci,id=usb,bus=pci.0,addr=0x1.0x2 \
-device usb-tablet,id=input0,bus=usb.0,port=1 $net_param \
-drive 
if=virtio,id=colo-disk0,driver=quorum,vote-threshold=1,children.0.file.filename=$disk_path,children.0.driver=qcow2"

exec $cmdline

[On *source* QEMU]:

{'execute':'qmp_capabilities'}
{'execute': 'drive-mirror', 'arguments':{ 'device': 'colo-disk0', 
'job-id': 'resync', 'target': 'nbd://192.168.0.33:8889/parent0', 'mode': 
'existing', 'format': 'nbd', 'sync': 'full'} }


{"timestamp": {"seconds": 1589902560, "microseconds": 107418}, "event": 
"JOB_STATUS_CHANGE", "data": {"status": "created", "id": "resync"}}
{"timestamp": {"seconds": 1589902560, "microseconds": 107487}, "event": 
"JOB_STATUS_CHANGE", "data": {"status": "running", "id": "resync"}}

{"return": {}}
{"timestamp": {"seconds": 1589902721, "microseconds": 439095}, "event": 
"JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "resync"}}
{"timestamp": {"seconds": 1589902721, "microseconds": 439194}, "event": 
"BLOCK_JOB_READY", "data": {"device": "resync", "len": 10739253248, 
"offset": 10739253248, "speed": 0, "type": "mirror"}}


3)[On *destination* Host]:
qemu-img info fedora32.qcow2
image: fedora32.qcow2
file format: qcow2
virtual size: 10 GiB (10737418240 bytes)
disk size: 10 GiB
cluster_size: 65536
Format specific information:
compat: 1.1
lazy refcounts: false
refcount bits: 16
corrupt: false
4)But if [On *source* Host] boot qemu using:

disk_path=fedora32.qcow2
net_param="-netdev 
tap,id=hn0,vhost=off,br=br0,helper=/usr/local/libexec/qemu-bridge-helper 
-device rtl8139,id=e0,netdev=hn0"

cmdline="qemu-system-x86_64 \
-enable-kvm \
-m 2G -smp 4 -qmp stdio -bios OVMF.fd \
-monitor telnet:127.0.0.1:,nowait,server -vnc :7 -rtc base=utc \
-cpu host -device cirrus-vga,id=video0,bus=pci.0,addr=0x2 \
-device piix3-usb-uhci,id=usb,bus=pci.0,addr=0x1.0x2 \
-device usb-tablet,id=input0,bus=usb.0,port=1 $net_param \
-drive if=virtio,id=parent0,file.filename=$disk_path,driver=qcow2"
exec $cmdline

Then [On *destination* Host]:

qemu-img info fedora32.qcow2
image: fedora32.qcow2
file format: qcow2
virtual size: 10 GiB (10737418240 bytes)
disk size: 1.8 GiB
cluster_size: 65536
Format specific information:
compat: 1.1
lazy refcounts: false
refcount bits: 16
corrupt: false

Re: [PATCH v4] target/i386: Add notes for versioned CPU models

2020-04-07 Thread Tao Xu


Ping for comments

On 3/24/2020 1:10 PM, Xu, Tao3 wrote:

Add which features are added or removed in this version.

Signed-off-by: Tao Xu 
---

The output is as follows:
qemu-system-x86_64 -cpu help | grep "\["
x86 Cascadelake-Server-v2  Intel Xeon Processor (Cascadelake) 
[ARCH_CAPABILITIES]
x86 Cascadelake-Server-v3  Intel Xeon Processor (Cascadelake) 
[ARCH_CAPABILITIES, no TSX]
x86 Denverton-v2  Intel Atom Processor (Denverton) [no MPX, no MONITOR]
x86 Icelake-Client-v2 Intel Core Processor (Icelake) [no TSX]
x86 Icelake-Server-v2 Intel Xeon Processor (Icelake) [no TSX]

Changes in v3:
 - Keep the existing custom model-id (Eduardo)

Changes in v2:
 - correct the note of Cascadelake v3 (Xiaoyao)
---
  target/i386/cpu.c | 5 +
  1 file changed, 5 insertions(+)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 34b511f078..1c7690baa0 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -3192,6 +3192,7 @@ static X86CPUDefinition builtin_x86_defs[] = {
  .versions = (X86CPUVersionDefinition[]) {
  { .version = 1 },
  { .version = 2,
+  .note = "ARCH_CAPABILITIES",
.props = (PropValue[]) {
{ "arch-capabilities", "on" },
{ "rdctl-no", "on" },
@@ -3203,6 +3204,7 @@ static X86CPUDefinition builtin_x86_defs[] = {
  },
  { .version = 3,
.alias = "Cascadelake-Server-noTSX",
+  .note = "ARCH_CAPABILITIES, no TSX",
.props = (PropValue[]) {
{ "hle", "off" },
{ "rtm", "off" },
@@ -3424,6 +3426,7 @@ static X86CPUDefinition builtin_x86_defs[] = {
  { .version = 1 },
  {
  .version = 2,
+.note = "no TSX",
  .alias = "Icelake-Client-noTSX",
  .props = (PropValue[]) {
  { "hle", "off" },
@@ -3541,6 +3544,7 @@ static X86CPUDefinition builtin_x86_defs[] = {
  { .version = 1 },
  {
  .version = 2,
+.note = "no TSX",
  .alias = "Icelake-Server-noTSX",
  .props = (PropValue[]) {
  { "hle", "off" },
@@ -3648,6 +3652,7 @@ static X86CPUDefinition builtin_x86_defs[] = {
  { .version = 1 },
  {
  .version = 2,
+.note = "no MPX, no MONITOR",
  .props = (PropValue[]) {
  { "monitor", "off" },
  { "mpx", "off" },

[PATCH v4] target/i386: Add notes for versioned CPU models

2020-03-23 Thread Tao Xu

Add which features are added or removed in this version.

Signed-off-by: Tao Xu 
---

The output is as follows:
qemu-system-x86_64 -cpu help | grep "\["
x86 Cascadelake-Server-v2  Intel Xeon Processor (Cascadelake) 
[ARCH_CAPABILITIES]
x86 Cascadelake-Server-v3  Intel Xeon Processor (Cascadelake) 
[ARCH_CAPABILITIES, no TSX]
x86 Denverton-v2  Intel Atom Processor (Denverton) [no MPX, no MONITOR]
x86 Icelake-Client-v2 Intel Core Processor (Icelake) [no TSX]
x86 Icelake-Server-v2 Intel Xeon Processor (Icelake) [no TSX]

Changes in v3:
- Keep the existing custom model-id (Eduardo)

Changes in v2:
- correct the note of Cascadelake v3 (Xiaoyao)
---
 target/i386/cpu.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 34b511f078..1c7690baa0 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -3192,6 +3192,7 @@ static X86CPUDefinition builtin_x86_defs[] = {
 .versions = (X86CPUVersionDefinition[]) {
 { .version = 1 },
 { .version = 2,
+  .note = "ARCH_CAPABILITIES",
   .props = (PropValue[]) {
   { "arch-capabilities", "on" },
   { "rdctl-no", "on" },
@@ -3203,6 +3204,7 @@ static X86CPUDefinition builtin_x86_defs[] = {
 },
 { .version = 3,
   .alias = "Cascadelake-Server-noTSX",
+  .note = "ARCH_CAPABILITIES, no TSX",
   .props = (PropValue[]) {
   { "hle", "off" },
   { "rtm", "off" },
@@ -3424,6 +3426,7 @@ static X86CPUDefinition builtin_x86_defs[] = {
 { .version = 1 },
 {
 .version = 2,
+.note = "no TSX",
 .alias = "Icelake-Client-noTSX",
 .props = (PropValue[]) {
 { "hle", "off" },
@@ -3541,6 +3544,7 @@ static X86CPUDefinition builtin_x86_defs[] = {
 { .version = 1 },
 {
 .version = 2,
+.note = "no TSX",
 .alias = "Icelake-Server-noTSX",
 .props = (PropValue[]) {
 { "hle", "off" },
@@ -3648,6 +3652,7 @@ static X86CPUDefinition builtin_x86_defs[] = {
 { .version = 1 },
 {
 .version = 2,
+.note = "no MPX, no MONITOR",
 .props = (PropValue[]) {
 { "monitor", "off" },
 { "mpx", "off" },
-- 
2.20.1

Re: [PATCH v2] target/i386: Add ARCH_CAPABILITIES related bits into Icelake-Server CPU model

2020-03-23 Thread Tao Xu




On 3/24/2020 2:39 AM, Eduardo Habkost wrote:

On Mon, Mar 23, 2020 at 10:58:16AM +0800, Xiaoyao Li wrote:

On 3/23/2020 10:32 AM, Tao Xu wrote:

Hi Xiaoyao,

May be you can add .note for this new version.

for example:

+    .version = 3,
+    .note = "ARCH_CAPABILITIES",
+    .props = (PropValue[]) {


Hi Paolo and Eduardo,

Need I spin a new version to add the .note ?
Maybe you can add it when queue?


Please send a follow up patch so we don't hold a bug fix because
of something that's just cosmetic.  I will queue this patch.  We
still need a new version of "target/i386: Add notes for versioned
CPU models"[1], don't we?

[1] https://lore.kernel.org/qemu-devel/20200228215253.gb494...@habkost.net/

I am sorry for misunderstanding your comments in that patch[1]. I will 
submit a new version of this patch.

Re: [PATCH v2] target/i386: Add ARCH_CAPABILITIES related bits into Icelake-Server CPU model

2020-03-22 Thread Tao Xu


Hi Xiaoyao,

May be you can add .note for this new version.

for example:

+.version = 3,
+.note = "ARCH_CAPABILITIES",
+.props = (PropValue[]) {

On 3/16/2020 5:56 PM, Xiaoyao Li wrote:

Current Icelake-Server CPU model lacks all the features enumerated by
MSR_IA32_ARCH_CAPABILITIES.

Add them, so that guest of "Icelake-Server" can see all of them.

Signed-off-by: Xiaoyao Li 
---
v2:
  - Add it as a new version.
---
  target/i386/cpu.c | 13 +
  1 file changed, 13 insertions(+)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 92fafa265914..5fba6a2ad6b3 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -3496,6 +3496,19 @@ static X86CPUDefinition builtin_x86_defs[] = {
  { /* end of list */ }
  },
  },
+{
+.version = 3,
+.props = (PropValue[]) {
+{ "arch-capabilities", "on" },
+{ "rdctl-no", "on" },
+{ "ibrs-all", "on" },
+{ "skip-l1dfl-vmentry", "on" },
+{ "mds-no", "on" },
+{ "pschange-mc-no", "on" },
+{ "taa-no", "on" },
+{ /* end of list */ }
+},
+},
  { /* end of list */ }
  }
  },
--
2.20.1

Re: [PATCH v3 2/4] target/i386: Remove monitor from some CPU models

2020-03-02 Thread Tao Xu


On 3/3/2020 1:19 AM, Eduardo Habkost wrote:

On Mon, Mar 02, 2020 at 07:47:28PM +0800, Tao Xu wrote:

On 2/29/2020 5:39 AM, Eduardo Habkost wrote:

On Wed, Feb 12, 2020 at 04:13:26PM +0800, Tao Xu wrote:

Add new version of Snowridge, Denverton, Opteron_G3, EPYC, and Dhyana
CPU model to uremove MONITOR/MWAIT featre.

After QEMU/KVM use "-overcommit cpu-pm=on" to expose MONITOR/MWAIT
(commit id 6f131f13e68d648a8e4f083c667ab1acd88ce4cd), the MONITOR/MWAIT
feature in these CPU model is unused.

Signed-off-by: Tao Xu 


What exactly is the problem you are trying to fix?

No CPU model will ever have monitor=on set by default with KVM,
because kvm_default_props has a monitor=off element.



Maybe it is not a fix. For example, when we boot a guest with Denverton
cpu model, guest cannot detect MONITOR/MWAIT and boot with no warning,
because of "monitor=off" by default. The MONITOR/MWAIT feature in these CPU
model is unused,but no harm. I am wondering if we should remove it from
existing CPU models.


As monitor=off is on kvm_default_props, changing the CPU model
table will only affect other accelerators (e.g. TCG, where
MONITOR/MWAIT support is advertised as supported).

We shouldn't be dictating policy for other accelerators just
because KVM doesn't support it.  Removing the feature on
kvm_default_props is sufficient.


I understand, thanks.

Re: [PATCH v3 4/4] target/i386: Add notes for versioned CPU models

2020-03-02 Thread Tao Xu


On 2/29/2020 5:52 AM, Eduardo Habkost wrote:

On Wed, Feb 12, 2020 at 04:13:28PM +0800, Tao Xu wrote:

Add which features are added or removed in this version. Remove the
changed model-id in versioned CPU models, to keep the model name
unchanged at /proc/cpuinfo inside the VM.

Signed-off-by: Tao Xu 
---

Changes in v2:
 - correct the note of Cascadelake v3 (Xiaoyao)
---
  target/i386/cpu.c | 54 ++-
  1 file changed, 25 insertions(+), 29 deletions(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 81a039beb6..739ef4ce91 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -2278,10 +2278,9 @@ static X86CPUDefinition builtin_x86_defs[] = {
  {
  .version = 2,
  .alias = "Nehalem-IBRS",
+.note = "IBRS",
  .props = (PropValue[]) {
  { "spec-ctrl", "on" },
-{ "model-id",
-  "Intel Core i7 9xx (Nehalem Core i7, IBRS update)" },
  { /* end of list */ }


Changing model-id is guest-visible, so we can't do this.  The
same applies to the other models where model-id is being removed.

I suggest using the .note property only on the CPU model versions
that don't have custom model-id set yet, or when existing
information on model-id is incomplete.

For future CPU model versions, we can start using only .note and
stop changing model-id.



Got it,thanks!

Re: [PATCH v3 2/4] target/i386: Remove monitor from some CPU models

2020-03-02 Thread Tao Xu


On 2/29/2020 5:39 AM, Eduardo Habkost wrote:

On Wed, Feb 12, 2020 at 04:13:26PM +0800, Tao Xu wrote:

Add new version of Snowridge, Denverton, Opteron_G3, EPYC, and Dhyana
CPU model to uremove MONITOR/MWAIT featre.

After QEMU/KVM use "-overcommit cpu-pm=on" to expose MONITOR/MWAIT
(commit id 6f131f13e68d648a8e4f083c667ab1acd88ce4cd), the MONITOR/MWAIT
feature in these CPU model is unused.

Signed-off-by: Tao Xu 


What exactly is the problem you are trying to fix?

No CPU model will ever have monitor=on set by default with KVM,
because kvm_default_props has a monitor=off element.



Maybe it is not a fix. For example, when we boot a guest with Denverton
cpu model, guest cannot detect MONITOR/MWAIT and boot with no warning, 
because of "monitor=off" by default. The MONITOR/MWAIT feature in these 
CPU model is unused,but no harm. I am wondering if we should remove it 
from existing CPU models.

Re: [PATCH v3 4/4] target/i386: Add notes for versioned CPU models

2020-02-12 Thread Tao Xu


On 2/12/2020 5:00 PM, Igor Mammedov wrote:

On Wed, 12 Feb 2020 16:13:28 +0800
Tao Xu  wrote:


Add which features are added or removed in this version. Remove the
changed model-id in versioned CPU models, to keep the model name
unchanged at /proc/cpuinfo inside the VM.

Signed-off-by: Tao Xu 
---

Changes in v2:
 - correct the note of Cascadelake v3 (Xiaoyao)
---
  target/i386/cpu.c | 54 ++-
  1 file changed, 25 insertions(+), 29 deletions(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 81a039beb6..739ef4ce91 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c

[...]

@@ -3142,6 +3130,7 @@ static X86CPUDefinition builtin_x86_defs[] = {
  .versions = (X86CPUVersionDefinition[]) {
  { .version = 1 },
  { .version = 2,
+  .note = "ARCH_CAPABILITIES",


what's ARCH_CAPABILITIES?



These are some features exposed by MSR_IA32_ARCH_CAPABILITIES. For 
Cascadelake, these are "rdctl-no" "ibrs-all" "skip-l1dfl-vmentry" "mds-no"

[PATCH v3 4/4] target/i386: Add notes for versioned CPU models

2020-02-12 Thread Tao Xu

Add which features are added or removed in this version. Remove the
changed model-id in versioned CPU models, to keep the model name
unchanged at /proc/cpuinfo inside the VM.

Signed-off-by: Tao Xu 
---

Changes in v2:
- correct the note of Cascadelake v3 (Xiaoyao)
---
 target/i386/cpu.c | 54 ++-
 1 file changed, 25 insertions(+), 29 deletions(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 81a039beb6..739ef4ce91 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -2278,10 +2278,9 @@ static X86CPUDefinition builtin_x86_defs[] = {
 {
 .version = 2,
 .alias = "Nehalem-IBRS",
+.note = "IBRS",
 .props = (PropValue[]) {
 { "spec-ctrl", "on" },
-{ "model-id",
-  "Intel Core i7 9xx (Nehalem Core i7, IBRS update)" },
 { /* end of list */ }
 }
 },
@@ -2359,10 +2358,9 @@ static X86CPUDefinition builtin_x86_defs[] = {
 {
 .version = 2,
 .alias = "Westmere-IBRS",
+.note = "IBRS",
 .props = (PropValue[]) {
 { "spec-ctrl", "on" },
-{ "model-id",
-  "Westmere E56xx/L56xx/X56xx (IBRS update)" },
 { /* end of list */ }
 }
 },
@@ -2445,10 +2443,9 @@ static X86CPUDefinition builtin_x86_defs[] = {
 {
 .version = 2,
 .alias = "SandyBridge-IBRS",
+.note = "IBRS",
 .props = (PropValue[]) {
 { "spec-ctrl", "on" },
-{ "model-id",
-  "Intel Xeon E312xx (Sandy Bridge, IBRS update)" },
 { /* end of list */ }
 }
 },
@@ -2537,10 +2534,9 @@ static X86CPUDefinition builtin_x86_defs[] = {
 {
 .version = 2,
 .alias = "IvyBridge-IBRS",
+.note = "IBRS",
 .props = (PropValue[]) {
 { "spec-ctrl", "on" },
-{ "model-id",
-  "Intel Xeon E3-12xx v2 (Ivy Bridge, IBRS)" },
 { /* end of list */ }
 }
 },
@@ -2634,17 +2630,18 @@ static X86CPUDefinition builtin_x86_defs[] = {
 {
 .version = 2,
 .alias = "Haswell-noTSX",
+.note = "no TSX",
 .props = (PropValue[]) {
 { "hle", "off" },
 { "rtm", "off" },
 { "stepping", "1" },
-{ "model-id", "Intel Core Processor (Haswell, no TSX)", },
 { /* end of list */ }
 },
 },
 {
 .version = 3,
 .alias = "Haswell-IBRS",
+.note = "IBRS",
 .props = (PropValue[]) {
 /* Restore TSX features removed by -v2 above */
 { "hle", "on" },
@@ -2655,21 +2652,18 @@ static X86CPUDefinition builtin_x86_defs[] = {
  */
 { "stepping", "4" },
 { "spec-ctrl", "on" },
-{ "model-id",
-  "Intel Core Processor (Haswell, IBRS)" },
 { /* end of list */ }
 }
 },
 {
 .version = 4,
 .alias = "Haswell-noTSX-IBRS",
+.note = "no TSX, IBRS",
 .props = (PropValue[]) {
 { "hle", "off" },
 { "rtm", "off" },
 /* spec-ctrl was already enabled by -v3 above */
 { "stepping", "1" },
-{ "model-id",
-  "Intel Core Processor (Haswell, no TSX, IBRS)" },
 { /* end of list */ }
 }
 },
@@ -2765,35 +2759,33 @@ static X86CPUDefinition builtin_x86_defs[] = {
 {
 .version = 2,
 .alias = "Broadwell-noTSX",
+.note = "no TSX",
 .props = (PropValue[]) {
 { "hle", "off" },
 { "rtm", &quo

[PATCH v3 1/4] target/i386: Add Denverton-v2 (no MPX) CPU model

2020-02-12 Thread Tao Xu

Because MPX is being removed from the linux kernel, remove MPX feature
from Denverton.

Signed-off-by: Tao Xu 
---
 target/i386/cpu.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 32efa46852..848c992cd3 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -3592,6 +3592,18 @@ static X86CPUDefinition builtin_x86_defs[] = {
 .features[FEAT_VMX_VMFUNC] = MSR_VMX_VMFUNC_EPT_SWITCHING,
 .xlevel = 0x8008,
 .model_id = "Intel Atom Processor (Denverton)",
+.versions = (X86CPUVersionDefinition[]) {
+{ .version = 1 },
+{
+.version = 2,
+.props = (PropValue[]) {
+{ "monitor", "off" },
+{ "mpx", "off" },
+{ /* end of list */ },
+},
+},
+{ /* end of list */ },
+},
 },
 {
 .name = "Snowridge",
-- 
2.20.1

[PATCH v3 0/4] Add extra information to versioned CPU models

2020-02-12 Thread Tao Xu

This series of patches will remove MPX from Denverton, remove Remove
monitor from some CPU models. Add additional information for -cpu help
to indicate the changes in this version of CPU model.

The output is as follows:
./x86_64-softmmu/qemu-system-x86_64 -cpu help | grep "\["
x86 Broadwell-v2  Intel Core Processor (Broadwell) [no TSX] 

x86 Broadwell-v3  Intel Core Processor (Broadwell) [IBRS]   

x86 Broadwell-v4  Intel Core Processor (Broadwell) [no TSX, IBRS]   

x86 Cascadelake-Server-v2  Intel Xeon Processor (Cascadelake) 
[ARCH_CAPABILITIES]
x86 Cascadelake-Server-v3  Intel Xeon Processor (Cascadelake) 
[ARCH_CAPABILITIES, no TSX]
x86 Denverton-v2  Intel Atom Processor (Denverton) [no MPX, no MONITOR] 

x86 Dhyana-v2 Hygon Dhyana Processor [no MONITOR]   

x86 EPYC-v2   AMD EPYC Processor [IBPB] 

x86 EPYC-v3   AMD EPYC Processor [IBPB, no MONITOR] 

x86 Haswell-v2Intel Core Processor (Haswell) [no TSX]   

x86 Haswell-v3Intel Core Processor (Haswell) [IBRS] 

x86 Haswell-v4Intel Core Processor (Haswell) [no TSX, IBRS] 

x86 Icelake-Client-v2 Intel Core Processor (Icelake) [no TSX]   

x86 Icelake-Server-v2 Intel Xeon Processor (Icelake) [no TSX]   

x86 IvyBridge-v2  Intel Xeon E3-12xx v2 (Ivy Bridge) [IBRS] 

x86 Nehalem-v2Intel Core i7 9xx (Nehalem Class Core i7) [IBRS]  

x86 Opteron_G3-v2 AMD Opteron 23xx (Gen 3 Class Opteron) [no MONITOR]   

x86 SandyBridge-v2Intel Xeon E312xx (Sandy Bridge) [IBRS]   

x86 Skylake-Client-v2 Intel Core Processor (Skylake) [IBRS] 

x86 Skylake-Client-v3 Intel Core Processor (Skylake) [no TSX, IBRS] 

x86 Skylake-Server-v2 Intel Xeon Processor (Skylake) [IBRS] 

x86 Skylake-Server-v3 Intel Xeon Processor (Skylake) [no TSX, IBRS] 

x86 Snowridge-v2  Intel Atom Processor (SnowRidge) [no MPX] 

x86 Snowridge-v3  Intel Atom Processor (SnowRidge) [no MPX, no MONITOR] 

x86 Westmere-v2   Westmere E56xx/L56xx/X56xx (Nehalem-C) [IBRS]

Changes in v2:
- Rebase
- correct the note of Cascadelake v3 (Xiaoyao)

Tao Xu (4):
  target/i386: Add Denverton-v2 (no MPX) CPU model
  target/i386: Remove monitor from some CPU models
  target/i386: Add new property note to versioned CPU models
  target/i386: Add notes for versioned CPU models

 target/i386/cpu.c | 115 +-
 1 file changed, 84 insertions(+), 31 deletions(-)

-- 
2.20.1

[PATCH v3 3/4] target/i386: Add new property note to versioned CPU models

2020-02-12 Thread Tao Xu

Add additional information for -cpu help to indicate the changes in this
version of CPU model.

Suggested-by: Eduardo Habkost 
Signed-off-by: Tao Xu 
---
 target/i386/cpu.c | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 6905e4eabd..81a039beb6 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -1690,6 +1690,7 @@ typedef struct PropValue {
 typedef struct X86CPUVersionDefinition {
 X86CPUVersion version;
 const char *alias;
+const char *note;
 PropValue *props;
 } X86CPUVersionDefinition;
 
@@ -1720,6 +1721,7 @@ struct X86CPUModel {
 X86CPUDefinition *cpudef;
 /* CPU model version */
 X86CPUVersion version;
+const char *note;
 /*
  * If true, this is an alias CPU model.
  * This matters only for "-cpu help" and query-cpu-definitions
@@ -4899,6 +4901,7 @@ static void x86_cpu_list_entry(gpointer data, gpointer 
user_data)
 g_autofree char *name = x86_cpu_class_get_model_name(cc);
 g_autofree char *desc = g_strdup(cc->model_description);
 g_autofree char *alias_of = x86_cpu_class_get_alias_of(cc);
+g_autofree char *model_id = x86_cpu_class_get_model_id(cc);
 
 if (!desc && alias_of) {
 if (cc->model && cc->model->version == CPU_VERSION_AUTO) {
@@ -4907,11 +4910,14 @@ static void x86_cpu_list_entry(gpointer data, gpointer 
user_data)
 desc = g_strdup_printf("(alias of %s)", alias_of);
 }
 }
+if (!desc && cc->model && cc->model->note) {
+desc = g_strdup_printf("%s [%s]", model_id, cc->model->note);
+}
 if (!desc) {
-desc = x86_cpu_class_get_model_id(cc);
+desc = g_strdup_printf("%s", model_id);
 }
 
-qemu_printf("x86 %-20s  %-48s\n", name, desc);
+qemu_printf("x86 %-20s  %-58s\n", name, desc);
 }
 
 /* list available CPU models and flags */
@@ -5388,6 +5394,7 @@ static void x86_register_cpudef_types(X86CPUDefinition 
*def)
 x86_cpu_versioned_model_name(def, vdef->version);
 m->cpudef = def;
 m->version = vdef->version;
+m->note = vdef->note;
 x86_register_cpu_model_type(name, m);
 
 if (vdef->alias) {
-- 
2.20.1

[PATCH v3 2/4] target/i386: Remove monitor from some CPU models

2020-02-12 Thread Tao Xu

Add new version of Snowridge, Denverton, Opteron_G3, EPYC, and Dhyana
CPU model to remove MONITOR/MWAIT feature.

After QEMU/KVM use "-overcommit cpu-pm=on" to expose MONITOR/MWAIT
(commit id 6f131f13e68d648a8e4f083c667ab1acd88ce4cd), the MONITOR/MWAIT
feature in these CPU model is unused.

Signed-off-by: Tao Xu 
---
 target/i386/cpu.c | 38 ++
 1 file changed, 38 insertions(+)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 848c992cd3..6905e4eabd 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -3731,6 +3731,14 @@ static X86CPUDefinition builtin_x86_defs[] = {
 { /* end of list */ },
 },
 },
+{
+.version = 3,
+.props = (PropValue[]) {
+/* mpx was already removed by -v2 above */
+{ "monitor", "off" },
+{ /* end of list */ },
+},
+},
 { /* end of list */ },
 },
 },
@@ -3842,6 +3850,17 @@ static X86CPUDefinition builtin_x86_defs[] = {
 CPUID_EXT3_ABM | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM,
 .xlevel = 0x8008,
 .model_id = "AMD Opteron 23xx (Gen 3 Class Opteron)",
+.versions = (X86CPUVersionDefinition[]) {
+{ .version = 1 },
+{
+.version = 2,
+.props = (PropValue[]) {
+{ "monitor", "off" },
+{ /* end of list */ },
+},
+},
+{ /* end of list */ },
+},
 },
 {
 .name = "Opteron_G4",
@@ -3966,6 +3985,14 @@ static X86CPUDefinition builtin_x86_defs[] = {
 { /* end of list */ }
 }
 },
+{
+.version = 3,
+.props = (PropValue[]) {
+/* ibpb was already enabled by -v2 above */
+{ "monitor", "off" },
+{ /* end of list */ },
+},
+},
 { /* end of list */ }
 }
 },
@@ -4018,6 +4045,17 @@ static X86CPUDefinition builtin_x86_defs[] = {
 .xlevel = 0x801E,
 .model_id = "Hygon Dhyana Processor",
 .cache_info = _cache_info,
+.versions = (X86CPUVersionDefinition[]) {
+{ .version = 1 },
+{
+.version = 2,
+.props = (PropValue[]) {
+{ "monitor", "off" },
+{ /* end of list */ },
+},
+},
+{ /* end of list */ },
+},
 },
 };
 
-- 
2.20.1

[PATCH RESEND v2 4/4] target/i386: Add notes for versioned CPU models

2020-01-07 Thread Tao Xu

Add which features are added or removed in this version. Remove the
changed model-id in versioned CPU models.

Signed-off-by: Tao Xu 
---

Changes in v2:
- correct the note of Cascadelake v3 (Xiaoyao)
---
 target/i386/cpu.c | 50 +++
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 736b4c7326..4daa153bfa 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -2278,10 +2278,9 @@ static X86CPUDefinition builtin_x86_defs[] = {
 {
 .version = 2,
 .alias = "Nehalem-IBRS",
+.note = "IBRS",
 .props = (PropValue[]) {
 { "spec-ctrl", "on" },
-{ "model-id",
-  "Intel Core i7 9xx (Nehalem Core i7, IBRS update)" },
 { /* end of list */ }
 }
 },
@@ -2359,10 +2358,9 @@ static X86CPUDefinition builtin_x86_defs[] = {
 {
 .version = 2,
 .alias = "Westmere-IBRS",
+.note = "IBRS",
 .props = (PropValue[]) {
 { "spec-ctrl", "on" },
-{ "model-id",
-  "Westmere E56xx/L56xx/X56xx (IBRS update)" },
 { /* end of list */ }
 }
 },
@@ -2445,10 +2443,9 @@ static X86CPUDefinition builtin_x86_defs[] = {
 {
 .version = 2,
 .alias = "SandyBridge-IBRS",
+.note = "IBRS",
 .props = (PropValue[]) {
 { "spec-ctrl", "on" },
-{ "model-id",
-  "Intel Xeon E312xx (Sandy Bridge, IBRS update)" },
 { /* end of list */ }
 }
 },
@@ -2537,10 +2534,9 @@ static X86CPUDefinition builtin_x86_defs[] = {
 {
 .version = 2,
 .alias = "IvyBridge-IBRS",
+.note = "IBRS",
 .props = (PropValue[]) {
 { "spec-ctrl", "on" },
-{ "model-id",
-  "Intel Xeon E3-12xx v2 (Ivy Bridge, IBRS)" },
 { /* end of list */ }
 }
 },
@@ -2634,17 +2630,18 @@ static X86CPUDefinition builtin_x86_defs[] = {
 {
 .version = 2,
 .alias = "Haswell-noTSX",
+.note = "no TSX",
 .props = (PropValue[]) {
 { "hle", "off" },
 { "rtm", "off" },
 { "stepping", "1" },
-{ "model-id", "Intel Core Processor (Haswell, no TSX)", },
 { /* end of list */ }
 },
 },
 {
 .version = 3,
 .alias = "Haswell-IBRS",
+.note = "IBRS",
 .props = (PropValue[]) {
 /* Restore TSX features removed by -v2 above */
 { "hle", "on" },
@@ -2655,21 +2652,18 @@ static X86CPUDefinition builtin_x86_defs[] = {
  */
 { "stepping", "4" },
 { "spec-ctrl", "on" },
-{ "model-id",
-  "Intel Core Processor (Haswell, IBRS)" },
 { /* end of list */ }
 }
 },
 {
 .version = 4,
 .alias = "Haswell-noTSX-IBRS",
+.note = "no TSX, IBRS",
 .props = (PropValue[]) {
 { "hle", "off" },
 { "rtm", "off" },
 /* spec-ctrl was already enabled by -v3 above */
 { "stepping", "1" },
-{ "model-id",
-  "Intel Core Processor (Haswell, no TSX, IBRS)" },
 { /* end of list */ }
 }
 },
@@ -2765,35 +2759,33 @@ static X86CPUDefinition builtin_x86_defs[] = {
 {
 .version = 2,
 .alias = "Broadwell-noTSX",
+.note = "no TSX",
 .props = (PropValue[]) {
 { "hle", "off" },
 { "rtm", "off" },
-{ "model-id", "Inte

[PATCH RESEND v2 0/4] Add extra information to versioned CPU models

2020-01-07 Thread Tao Xu

This series of patches will remove MPX from Denverton, remove Remove
monitor from some CPU models. Add additional information for -cpu help
to indicate the changes in this version of CPU model.

The output is as follows:
./x86_64-softmmu/qemu-system-x86_64 -cpu help | grep "\["
x86 Broadwell-v2  Intel Core Processor (Broadwell) [no TSX] 

x86 Broadwell-v3  Intel Core Processor (Broadwell) [IBRS]   

x86 Broadwell-v4  Intel Core Processor (Broadwell) [no TSX, IBRS]   

x86 Cascadelake-Server-v2  Intel Xeon Processor (Cascadelake) 
[ARCH_CAPABILITIES]
x86 Cascadelake-Server-v3  Intel Xeon Processor (Cascadelake) 
[ARCH_CAPABILITIES, no TSX]
x86 Denverton-v2  Intel Atom Processor (Denverton) [no MPX, no MONITOR] 

x86 Dhyana-v2 Hygon Dhyana Processor [no MONITOR]   

x86 EPYC-v2   AMD EPYC Processor [IBPB] 

x86 EPYC-v3   AMD EPYC Processor [IBPB, no MONITOR] 

x86 Haswell-v2Intel Core Processor (Haswell) [no TSX]   

x86 Haswell-v3Intel Core Processor (Haswell) [IBRS] 

x86 Haswell-v4Intel Core Processor (Haswell) [no TSX, IBRS] 

x86 Icelake-Client-v2 Intel Core Processor (Icelake) [no TSX]   

x86 Icelake-Server-v2 Intel Xeon Processor (Icelake) [no TSX]   

x86 IvyBridge-v2  Intel Xeon E3-12xx v2 (Ivy Bridge) [IBRS] 

x86 Nehalem-v2Intel Core i7 9xx (Nehalem Class Core i7) [IBRS]  

x86 Opteron_G3-v2 AMD Opteron 23xx (Gen 3 Class Opteron) [no MONITOR]   

x86 SandyBridge-v2Intel Xeon E312xx (Sandy Bridge) [IBRS]   

x86 Skylake-Client-v2 Intel Core Processor (Skylake) [IBRS] 

x86 Skylake-Client-v3 Intel Core Processor (Skylake) [no TSX, IBRS] 

x86 Skylake-Server-v2 Intel Xeon Processor (Skylake) [IBRS] 

x86 Skylake-Server-v3 Intel Xeon Processor (Skylake) [no TSX, IBRS] 

x86 Snowridge-v2  Intel Atom Processor (SnowRidge) [no MPX] 

x86 Snowridge-v3  Intel Atom Processor (SnowRidge) [no MPX, no MONITOR] 

x86 Westmere-v2   Westmere E56xx/L56xx/X56xx (Nehalem-C) [IBRS]

Changes in v2:
- Rebase
- correct the note of Cascadelake v3 (Xiaoyao)

Tao Xu (4):
  target/i386: Add Denverton-v2 (no MPX) CPU model
  target/i386: Remove monitor from some CPU models
  target/i386: Add new property note to versioned CPU models
  target/i386: Add notes for versioned CPU models

 target/i386/cpu.c | 111 +++---
 1 file changed, 84 insertions(+), 27 deletions(-)

-- 
2.20.1

[PATCH RESEND v2 3/4] target/i386: Add new property note to versioned CPU models

2020-01-07 Thread Tao Xu

Add additional information for -cpu help to indicate the changes in this
version of CPU model.

Suggested-by: Eduardo Habkost 
Signed-off-by: Tao Xu 
---
 target/i386/cpu.c | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index a6eb1b81fd..736b4c7326 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -1690,6 +1690,7 @@ typedef struct PropValue {
 typedef struct X86CPUVersionDefinition {
 X86CPUVersion version;
 const char *alias;
+const char *note;
 PropValue *props;
 } X86CPUVersionDefinition;
 
@@ -1720,6 +1721,7 @@ struct X86CPUModel {
 X86CPUDefinition *cpudef;
 /* CPU model version */
 X86CPUVersion version;
+const char *note;
 /*
  * If true, this is an alias CPU model.
  * This matters only for "-cpu help" and query-cpu-definitions
@@ -4846,6 +4848,7 @@ static void x86_cpu_list_entry(gpointer data, gpointer 
user_data)
 g_autofree char *name = x86_cpu_class_get_model_name(cc);
 g_autofree char *desc = g_strdup(cc->model_description);
 g_autofree char *alias_of = x86_cpu_class_get_alias_of(cc);
+g_autofree char *model_id = x86_cpu_class_get_model_id(cc);
 
 if (!desc && alias_of) {
 if (cc->model && cc->model->version == CPU_VERSION_AUTO) {
@@ -4854,11 +4857,14 @@ static void x86_cpu_list_entry(gpointer data, gpointer 
user_data)
 desc = g_strdup_printf("(alias of %s)", alias_of);
 }
 }
+if (!desc && cc->model && cc->model->note) {
+desc = g_strdup_printf("%s [%s]", model_id, cc->model->note);
+}
 if (!desc) {
-desc = x86_cpu_class_get_model_id(cc);
+desc = g_strdup_printf("%s", model_id);
 }
 
-qemu_printf("x86 %-20s  %-48s\n", name, desc);
+qemu_printf("x86 %-20s  %-58s\n", name, desc);
 }
 
 /* list available CPU models and flags */
@@ -5335,6 +5341,7 @@ static void x86_register_cpudef_types(X86CPUDefinition 
*def)
 x86_cpu_versioned_model_name(def, vdef->version);
 m->cpudef = def;
 m->version = vdef->version;
+m->note = vdef->note;
 x86_register_cpu_model_type(name, m);
 
 if (vdef->alias) {
-- 
2.20.1

[PATCH RESEND v2 1/4] target/i386: Add Denverton-v2 (no MPX) CPU model

2020-01-07 Thread Tao Xu

Because MPX is being removed from the linux kernel, remove MPX feature
from Denverton.

Signed-off-by: Tao Xu 
---
 target/i386/cpu.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 31556b7ec4..6981aa2a34 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -3539,6 +3539,18 @@ static X86CPUDefinition builtin_x86_defs[] = {
 .features[FEAT_VMX_VMFUNC] = MSR_VMX_VMFUNC_EPT_SWITCHING,
 .xlevel = 0x8008,
 .model_id = "Intel Atom Processor (Denverton)",
+.versions = (X86CPUVersionDefinition[]) {
+{ .version = 1 },
+{
+.version = 2,
+.props = (PropValue[]) {
+{ "monitor", "off" },
+{ "mpx", "off" },
+{ /* end of list */ },
+},
+},
+{ /* end of list */ },
+},
 },
 {
 .name = "Snowridge",
-- 
2.20.1

[PATCH RESEND v2 2/4] target/i386: Remove monitor from some CPU models

2020-01-07 Thread Tao Xu

Add new version of Snowridge, Denverton, Opteron_G3, EPYC, and Dhyana
CPU model to remove MONITOR/MWAIT feature.

After QEMU/KVM use "-overcommit cpu-pm=on" to expose MONITOR/MWAIT
(commit id 6f131f13e68d648a8e4f083c667ab1acd88ce4cd), the MONITOR/MWAIT
feature in these CPU model is unused.

Signed-off-by: Tao Xu 
---
 target/i386/cpu.c | 38 ++
 1 file changed, 38 insertions(+)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 6981aa2a34..a6eb1b81fd 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -3678,6 +3678,14 @@ static X86CPUDefinition builtin_x86_defs[] = {
 { /* end of list */ },
 },
 },
+{
+.version = 3,
+.props = (PropValue[]) {
+/* mpx was already removed by -v2 above */
+{ "monitor", "off" },
+{ /* end of list */ },
+},
+},
 { /* end of list */ },
 },
 },
@@ -3789,6 +3797,17 @@ static X86CPUDefinition builtin_x86_defs[] = {
 CPUID_EXT3_ABM | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM,
 .xlevel = 0x8008,
 .model_id = "AMD Opteron 23xx (Gen 3 Class Opteron)",
+.versions = (X86CPUVersionDefinition[]) {
+{ .version = 1 },
+{
+.version = 2,
+.props = (PropValue[]) {
+{ "monitor", "off" },
+{ /* end of list */ },
+},
+},
+{ /* end of list */ },
+},
 },
 {
 .name = "Opteron_G4",
@@ -3913,6 +3932,14 @@ static X86CPUDefinition builtin_x86_defs[] = {
 { /* end of list */ }
 }
 },
+{
+.version = 3,
+.props = (PropValue[]) {
+/* ibpb was already enabled by -v2 above */
+{ "monitor", "off" },
+{ /* end of list */ },
+},
+},
 { /* end of list */ }
 }
 },
@@ -3965,6 +3992,17 @@ static X86CPUDefinition builtin_x86_defs[] = {
 .xlevel = 0x801E,
 .model_id = "Hygon Dhyana Processor",
 .cache_info = _cache_info,
+.versions = (X86CPUVersionDefinition[]) {
+{ .version = 1 },
+{
+.version = 2,
+.props = (PropValue[]) {
+{ "monitor", "off" },
+{ /* end of list */ },
+},
+},
+{ /* end of list */ },
+},
 },
 };
 
-- 
2.20.1

Re: [PATCH] util/cutils: Expand do_strtosz parsing precision to 64 bits

2019-12-18 Thread Tao Xu


On 12/19/2019 2:26 AM, Markus Armbruster wrote:

Tao Xu  writes:


On 12/18/2019 9:33 AM, Tao Xu wrote:

On 12/17/2019 6:25 PM, Markus Armbruster wrote:

[...]

Also fun: for "0123", we use uint64_t 83, not double 123.0.  But for
"0123.", we use 123.0, not 83.

Do we really want to accept octal and hexadecimal integers?



Thank you for reminding me. Octal and hexadecimal may bring more
confusion. I will use qemu_strtou64(nptr, , 10, ) and
add test for input like "0123".



Hi Markus,

After I use qemu_strtou64(nptr, , 10, ), it cause another
question. Because qemu_strtod_finite support hexadecimal input, so in
this situation, it will parsed as double. It will also let large
hexadecimal integers be rounded. So there may be two solution:

1: use qemu_strtou64(nptr, , 0, ) and parse octal as
decimal. This will keep hexadecimal valid as now.

"0123" --> 123; "0x123" --> 291


How would you make qemu_strtou64() parse octal as decimal?


How about this solution, set @base as variable, if we detect 
hexadecimal, we use 0, then can prase decimal as u64, else we use 10, 
then can prase octal as decimal, because 0 prefix will be ignored in 
qemu_strtou64(nptr, , 10, );


const char *p = nptr;
while (qemu_isspace(*p)) {
   p++;
}
if (*p == '0' && (qemu_toupper(*(p+1)) == 'X' ||) {
base = 0;
} else {
base = 10;
}

retd = qemu_strtod_finite(nptr, , );
retu = qemu_strtou64(nptr, , base, );
use_strtod = strlen(suffixd) < strlen(suffixu);

if (use_strtod) {
endptr = suffixd;
retval = retd;
} else {
endptr = suffixu;
retval = retu;
}



2: use qemu_strtou64(nptr, , 10, ) and reject octal and
decimal.

"0123" --> Error; "0x123" --> Error


How would you reject the 0x prefix?

How about check the first character is '0' and 'x' and then 
return -EINVAL.

Re: [PATCH RESEND v20 0/8] Build ACPI Heterogeneous Memory Attribute Table (HMAT)

2019-12-18 Thread Tao Xu


On 12/13/2019 6:06 PM, Michael S. Tsirkin wrote:

On Fri, Dec 13, 2019 at 09:19:21AM +0800, Tao Xu wrote:

This series of patches will build Heterogeneous Memory Attribute Table (HMAT)
according to the command line. The ACPI HMAT describes the memory attributes,
such as memory side cache attributes and bandwidth and latency details,
related to the Memory Proximity Domain.
The software is expected to use HMAT information as hint for optimization.

In the linux kernel, the codes in drivers/acpi/hmat/hmat.c parse and report
the platform's HMAT tables.

The V19 patches link:
https://patchwork.kernel.org/cover/11265525/


Looks good to me, I'll queue it for merge after the release. If possible
please ping me after the release to help make sure it didn't get
dropped.



Hi Michael,

I am wondering if these patches can be merged this week, because QEMU 
5.0 developing tree is open and next week may be the holidays.


Thank you very much!

Tao Xu

Re: [PATCH] util/cutils: Expand do_strtosz parsing precision to 64 bits

2019-12-17 Thread Tao Xu


On 12/18/2019 9:33 AM, Tao Xu wrote:

On 12/17/2019 6:25 PM, Markus Armbruster wrote:

Tao Xu  writes:


On 12/5/19 11:29 PM, Markus Armbruster wrote:

Tao Xu  writes:


Parse input string both as a double and as a uint64_t, then use the
method which consumes more characters. Update the related test cases.

Signed-off-by: Tao Xu 
---

[...]

diff --git a/util/cutils.c b/util/cutils.c
index 77acadc70a..b08058c57c 100644
--- a/util/cutils.c
+++ b/util/cutils.c
@@ -212,24 +212,43 @@ static int do_strtosz(const char *nptr, const 
char **end,

 const char default_suffix, int64_t unit,
 uint64_t *result)
   {
-    int retval;
-    const char *endptr;
+    int retval, retd, retu;
+    const char *suffix, *suffixd, *suffixu;
   unsigned char c;
   int mul_required = 0;
-    double val, mul, integral, fraction;
+    bool use_strtod;
+    uint64_t valu;
+    double vald, mul, integral, fraction;


Note for later: @mul is double.


+
+    retd = qemu_strtod_finite(nptr, , );
+    retu = qemu_strtou64(nptr, , 0, );


Note for later: passing 0 to base accepts octal and hexadecimal
integers.


+    use_strtod = strlen(suffixd) < strlen(suffixu);
+
+    /*
+ * Parse @nptr both as a double and as a uint64_t, then use 
the method

+ * which consumes more characters.
+ */


The comment is in a funny place.  I'd put it right before the
qemu_strtod_finite() line.


+    if (use_strtod) {
+    suffix = suffixd;
+    retval = retd;
+    } else {
+    suffix = suffixu;
+    retval = retu;
+    }
   -    retval = qemu_strtod_finite(nptr, , );
   if (retval) {
   goto out;
   }


This is even more subtle than it looks.

A close reading of the function contracts leads to three cases for each
conversion:

* parse error (including infinity and NaN)

    @retu / @retd is -EINVAL
    @valu / @vald is uninitialized
    @suffixu / @suffixd is @nptr

* range error

    @retu / @retd is -ERANGE
    @valu / @vald is our best approximation of the conversion result
    @suffixu / @suffixd points to the first character not consumed 
by the

    conversion.

    Sub-cases:

    - uint64_t overflow

  We know the conversion result exceeds UINT64_MAX.

    - double overflow

  we know the conversion result's magnitude exceeds the largest
  representable finite double DBL_MAX.

    - double underflow

  we know the conversion result is close to zero (closer than 
DBL_MIN,

  the smallest normalized positive double).

* success

    @retu / @retd is 0
    @valu / @vald is the conversion result
    @suffixu / @suffixd points to the first character not consumed 
by the

    conversion.

This leads to a matrix (parse error, uint64_t overflow, success) x
(parse error, double overflow, double underflow, success).  We need to
check the code does what we want for each element of this matrix, and
document any behavior that's not perfectly obvious.

(success, success): we pick uint64_t if qemu_strtou64() consumed more
characters than qemu_strtod_finite(), else double.  "More" is important
here; when they consume the same characters, we *need* to use the
uint64_t result.  Example: for "18446744073709551615", we need to use
uint64_t 18446744073709551615, not double 18446744073709551616.0.  But
for "18446744073709551616.", we need to use the double.  Good.


Also fun: for "0123", we use uint64_t 83, not double 123.0.  But for
"0123.", we use 123.0, not 83.

Do we really want to accept octal and hexadecimal integers?



Thank you for reminding me. Octal and hexadecimal may bring more 
confusion. I will use qemu_strtou64(nptr, , 10, ) and add 
test for input like "0123".




Hi Markus,

After I use qemu_strtou64(nptr, , 10, ), it cause another 
question. Because qemu_strtod_finite support hexadecimal input, so in 
this situation, it will parsed as double. It will also let large 
hexadecimal integers be rounded. So there may be two solution:


1: use qemu_strtou64(nptr, , 0, ) and parse octal as 
decimal. This will keep hexadecimal valid as now.


"0123" --> 123; "0x123" --> 291

2: use qemu_strtou64(nptr, , 10, ) and reject octal and 
decimal.


"0123" --> Error; "0x123" --> Error

Re: [PATCH] util/cutils: Expand do_strtosz parsing precision to 64 bits

2019-12-17 Thread Tao Xu


On 12/17/2019 11:01 PM, Markus Armbruster wrote:

Christophe de Dinechin  writes:


On 17 Dec 2019, at 15:08, Markus Armbruster  wrote:

Christophe de Dinechin  writes:


On 5 Dec 2019, at 16:29, Markus Armbruster  wrote:

Tao Xu  writes:


Parse input string both as a double and as a uint64_t, then use the
method which consumes more characters. Update the related test cases.

Signed-off-by: Tao Xu 
---

[...]

diff --git a/util/cutils.c b/util/cutils.c
index 77acadc70a..b08058c57c 100644
--- a/util/cutils.c
+++ b/util/cutils.c
@@ -212,24 +212,43 @@ static int do_strtosz(const char *nptr, const char **end,
  const char default_suffix, int64_t unit,
  uint64_t *result)
{
-int retval;
-const char *endptr;
+int retval, retd, retu;
+const char *suffix, *suffixd, *suffixu;
unsigned char c;
int mul_required = 0;
-double val, mul, integral, fraction;
+bool use_strtod;
+uint64_t valu;
+double vald, mul, integral, fraction;


Note for later: @mul is double.


+
+retd = qemu_strtod_finite(nptr, , );
+retu = qemu_strtou64(nptr, , 0, );
+use_strtod = strlen(suffixd) < strlen(suffixu);
+
+/*
+ * Parse @nptr both as a double and as a uint64_t, then use the method
+ * which consumes more characters.
+ */


The comment is in a funny place.  I'd put it right before the
qemu_strtod_finite() line.


+if (use_strtod) {
+suffix = suffixd;
+retval = retd;
+} else {
+suffix = suffixu;
+retval = retu;
+}

-retval = qemu_strtod_finite(nptr, , );
if (retval) {
goto out;
}


This is even more subtle than it looks.


But why it is even necessary?

The “contract” for the function used to be that it returned rounded values
beyond 2^53, which in itself is curious.

But now it’s a 6-dimensional matrix of hell with NaNs and barfnots, when the
name implies it’s simply doing a text to u64 conversion…

There is certainly a reason, but I’m really curious what it is :-)


It all goes back to commit 9f9b17a4f0 "Introduce strtosz() library
function to convert a string to a byte count.".  To support "convenient"
usage like "1.5G", it parses the number part with strtod().  This limits
us to 53 bits of precision.  Larger sizes get rounded.

I guess the excuse for this was that when you're dealing with sizes that
large (petabytes!), your least significant bits are zero anyway.

Regardless, the interface is *awful*.  We should've forced the author to
spell it out in all its glory in a proper function contract.  That tends
to cool the enthusiasm for "convenient" syntax amazingly fast.

The awful interface has been confusing people for close to a decade now.

What to do?


I see. Thanks for the rationale. I knew it had to make sense :-)


For a value of "sense"...


I’d probably avoid strtod even with the convenient syntax above.
Do you want 1.33e-6M to be allowed? Do we want to ever
accept or generate NaN or Inf values?


NaN or Inf definitely not.  That's why we use qemu_strtod_finite()
before and after the patch.

No sane person should ever use 1.33e-6M.  Or even 1.1k (which yields
1126, rounded silently from machine number 1126.40001, which
approximates the true value 1126.4).

Certain fractions are actually sane.  1.5k denotes a perfectly fine
integer, which the code manages not to screw up.  I'd recommend against
using fractions regardless.

What usage are we prepared to break?  What kind of confusion are we
willing to bear?  Those are the questions.


Tao Xu's patch tries to make the function do what its users expect,
namely parse a bleepin' 64 bit integer, without breaking any of the
"convenience" syntax.  Turns out that's amazingly subtle.  Are we making
things less confusing or more?


Thanks for your explanation. I think another reason is build-in 'size' 
is really commonly used. May be someone use '-m 1.5G' to boot QEMU or 
write it to a config file.

Re: [PATCH RESEND v2] util/cutils: Expand do_strtosz parsing precision to 64 bits

2019-12-17 Thread Tao Xu


On 12/17/2019 7:44 PM, Christophe de Dinechin wrote:




On 9 Dec 2019, at 09:30, Tao Xu  wrote:

Parse input string both as a double and as a uint64_t, then use the
method which consumes more characters. Update the related test cases.

Signed-off-by: Tao Xu 
---

Changes in v2:
- Resend to use double small than DBL_MIN
- Add more test case for double overflow and underflow.
- Set mul as int64_t (Markus)
- Restore endptr (Markus)
---
tests/test-cutils.c| 37 +++
tests/test-keyval.c| 47 +
tests/test-qemu-opts.c | 39 +---
util/cutils.c  | 67 +++---
4 files changed, 75 insertions(+), 115 deletions(-)


[...]

+/*
+ * Parse @nptr both as a double and as a uint64_t, then use the method
+ * which consumes more characters.
+ */


Why do ever need to parse as double if you have uint64?



Because we want to keep do_strtosz Compatible with double input (such as 
1.5k).

+retd = qemu_strtod_finite(nptr, , );
+retu = qemu_strtou64(nptr, , 0, );
+use_strtod = strlen(suffixd) < strlen(suffixu);


You could simply compare suffixd and suffixu:

use_strtod = suffixd > suffixu;



Thank you for your suggestion.

+
+if (use_strtod) {
+endptr = suffixd;
+retval = retd;
+} else {
+endptr = suffixu;
+retval = retu;
+}

-retval = qemu_strtod_finite(nptr, , );
 if (retval) {
 goto out;
 }
-fraction = modf(val, );
-if (fraction != 0) {
-mul_required = 1;
+if (use_strtod) {
+fraction = modf(vald, );
+if (fraction != 0) {
+mul_required = 1;
+}
 }
 c = *endptr;
 mul = suffix_mul(c, unit);
@@ -238,17 +258,30 @@ static int do_strtosz(const char *nptr, const char **end,
 retval = -EINVAL;
 goto out;
 }
-/*
- * Values near UINT64_MAX overflow to 2**64 when converting to double
- * precision.  Compare against the maximum representable double precision
- * value below 2**64, computed as "the next value after 2**64 (0x1p64) in
- * the direction of 0".
- */
-if ((val * mul > nextafter(0x1p64, 0)) || val < 0) {
-retval = -ERANGE;
-goto out;
+
+if (use_strtod) {
+/*
+ * Values near UINT64_MAX overflow to 2**64 when converting to double
+ * precision. Compare against the maximum representable double 
precision
+ * value below 2**64, computed as "the next value after 2**64 (0x1p64)
+ * in the direction of 0".
+ */
+if ((vald * mul > nextafter(0x1p64, 0)) || vald < 0) {
+retval = -ERANGE;
+goto out;
+}
+*result = vald * mul;
+} else {
+/* Reject negative input and overflow output */
+while (qemu_isspace(*nptr)) {
+nptr++;
+}
+if (*nptr == '-' || UINT64_MAX / mul < valu) {
+retval = -ERANGE;
+goto out;
+}
+*result = valu * mul;
 }
-*result = val * mul;
 retval = 0;

out:
--
2.20.1

Re: [PATCH] util/cutils: Expand do_strtosz parsing precision to 64 bits

2019-12-17 Thread Tao Xu


On 12/17/2019 6:25 PM, Markus Armbruster wrote:

Tao Xu  writes:


On 12/5/19 11:29 PM, Markus Armbruster wrote:

Tao Xu  writes:


Parse input string both as a double and as a uint64_t, then use the
method which consumes more characters. Update the related test cases.

Signed-off-by: Tao Xu 
---

[...]

diff --git a/util/cutils.c b/util/cutils.c
index 77acadc70a..b08058c57c 100644
--- a/util/cutils.c
+++ b/util/cutils.c
@@ -212,24 +212,43 @@ static int do_strtosz(const char *nptr, const char **end,
 const char default_suffix, int64_t unit,
 uint64_t *result)
   {
-int retval;
-const char *endptr;
+int retval, retd, retu;
+const char *suffix, *suffixd, *suffixu;
   unsigned char c;
   int mul_required = 0;
-double val, mul, integral, fraction;
+bool use_strtod;
+uint64_t valu;
+double vald, mul, integral, fraction;


Note for later: @mul is double.


+
+retd = qemu_strtod_finite(nptr, , );
+retu = qemu_strtou64(nptr, , 0, );


Note for later: passing 0 to base accepts octal and hexadecimal
integers.


+use_strtod = strlen(suffixd) < strlen(suffixu);
+
+/*
+ * Parse @nptr both as a double and as a uint64_t, then use the method
+ * which consumes more characters.
+ */


The comment is in a funny place.  I'd put it right before the
qemu_strtod_finite() line.


+if (use_strtod) {
+suffix = suffixd;
+retval = retd;
+} else {
+suffix = suffixu;
+retval = retu;
+}
   -retval = qemu_strtod_finite(nptr, , );
   if (retval) {
   goto out;
   }


This is even more subtle than it looks.

A close reading of the function contracts leads to three cases for each
conversion:

* parse error (including infinity and NaN)

@retu / @retd is -EINVAL
@valu / @vald is uninitialized
@suffixu / @suffixd is @nptr

* range error

@retu / @retd is -ERANGE
@valu / @vald is our best approximation of the conversion result
@suffixu / @suffixd points to the first character not consumed by the
conversion.

Sub-cases:

- uint64_t overflow

  We know the conversion result exceeds UINT64_MAX.

- double overflow

  we know the conversion result's magnitude exceeds the largest
  representable finite double DBL_MAX.

- double underflow

  we know the conversion result is close to zero (closer than DBL_MIN,
  the smallest normalized positive double).

* success

@retu / @retd is 0
@valu / @vald is the conversion result
@suffixu / @suffixd points to the first character not consumed by the
conversion.

This leads to a matrix (parse error, uint64_t overflow, success) x
(parse error, double overflow, double underflow, success).  We need to
check the code does what we want for each element of this matrix, and
document any behavior that's not perfectly obvious.

(success, success): we pick uint64_t if qemu_strtou64() consumed more
characters than qemu_strtod_finite(), else double.  "More" is important
here; when they consume the same characters, we *need* to use the
uint64_t result.  Example: for "18446744073709551615", we need to use
uint64_t 18446744073709551615, not double 18446744073709551616.0.  But
for "18446744073709551616.", we need to use the double.  Good.


Also fun: for "0123", we use uint64_t 83, not double 123.0.  But for
"0123.", we use 123.0, not 83.

Do we really want to accept octal and hexadecimal integers?



Thank you for reminding me. Octal and hexadecimal may bring more 
confusion. I will use qemu_strtou64(nptr, , 10, ) and add 
test for input like "0123".



(success, parse error) and (parse error, success): we pick the one that
succeeds, because success consumes characters, and failure to parse does
not.  Good.

(parse error, parse error): neither consumes characters, so we pick
uint64_t.  Good.

(parse error, double overflow), (parse error, double underflow) and
(uint64_t overflow, parse error): we pick the range error, because it
consumes characters.  Good.

These are the simple combinations.  The remainder are hairier: (success,
double overflow), (success, double underflow), (uint64_t overflow,
success).  I lack the time to analyze them today.  Must be done before
we take this patch.  Any takers?


(success, double overflow), (success, double underflow), pick double
overflow error, return -ERANGE. Because it consumes
characters. Example: for "1.79769e+309", qemu_strtou64 consumes "1",
and prases as uint64_t; but qemu_strtod_finite return -ERANGE and
consumes all characters. It is OK.


The only way to have double overflow when uint64_t succeeds is an
exponent.  Double consumes the characters making up the exponent,
uint64_t does not.  We use double.

The only way to have double underflow is with an exponent or a decimal
point.  Double consumes their characters, uint64_t does not.  We

Re: [PATCH RESEND v2] util/cutils: Expand do_strtosz parsing precision to 64 bits

2019-12-16 Thread Tao Xu


Gentle ping.

On 12/9/2019 4:30 PM, Xu, Tao3 wrote:

Parse input string both as a double and as a uint64_t, then use the
method which consumes more characters. Update the related test cases.

Signed-off-by: Tao Xu 
---

Changes in v2:
 - Resend to use double small than DBL_MIN
 - Add more test case for double overflow and underflow.
 - Set mul as int64_t (Markus)
 - Restore endptr (Markus)
---
  tests/test-cutils.c| 37 +++
  tests/test-keyval.c| 47 +
  tests/test-qemu-opts.c | 39 +---
  util/cutils.c  | 67 +++---
  4 files changed, 75 insertions(+), 115 deletions(-)

diff --git a/tests/test-cutils.c b/tests/test-cutils.c
index 1aa8351520..49e495b8ba 100644
--- a/tests/test-cutils.c
+++ b/tests/test-cutils.c
@@ -1970,40 +1970,25 @@ static void test_qemu_strtosz_simple(void)
  g_assert_cmpint(err, ==, 0);
  g_assert_cmpint(res, ==, 12345);
  
-/* Note: precision is 53 bits since we're parsing with strtod() */

-
-str = "9007199254740991"; /* 2^53-1 */
-err = qemu_strtosz(str, , );
-g_assert_cmpint(err, ==, 0);
-g_assert_cmpint(res, ==, 0x1f);
-g_assert(endptr == str + 16);
-
-str = "9007199254740992"; /* 2^53 */
-err = qemu_strtosz(str, , );
-g_assert_cmpint(err, ==, 0);
-g_assert_cmpint(res, ==, 0x20);
-g_assert(endptr == str + 16);
+/* Note: precision is 64 bits (UINT64_MAX) */
  
  str = "9007199254740993"; /* 2^53+1 */

  err = qemu_strtosz(str, , );
  g_assert_cmpint(err, ==, 0);
-g_assert_cmpint(res, ==, 0x20); /* rounded to 53 bits */
+g_assert_cmpint(res, ==, 0x21);
  g_assert(endptr == str + 16);
  
-str = "18446744073709549568"; /* 0xf800 (53 msbs set) */

+str = "18446744073709550591"; /* 0xfbff */
  err = qemu_strtosz(str, , );
  g_assert_cmpint(err, ==, 0);
-g_assert_cmpint(res, ==, 0xf800);
+g_assert_cmpint(res, ==, 0xfbff);
  g_assert(endptr == str + 20);
  
-str = "18446744073709550591"; /* 0xfbff */

+str = "18446744073709551615"; /* 2^64-1 (UINT64_MAX) */
  err = qemu_strtosz(str, , );
  g_assert_cmpint(err, ==, 0);
-g_assert_cmpint(res, ==, 0xf800); /* rounded to 53 bits */
+g_assert_cmpint(res, ==, 0x);
  g_assert(endptr == str + 20);
-
-/* 0x7e00..0x7fff get rounded to
- * 0x8000, thus -ERANGE; see test_qemu_strtosz_erange() */
  }
  
  static void test_qemu_strtosz_units(void)

@@ -2145,20 +2130,20 @@ static void test_qemu_strtosz_erange(void)
  g_assert_cmpint(err, ==, -ERANGE);
  g_assert(endptr == str + 2);
  
-str = "18446744073709550592"; /* 0xfc00 */

+str = "18446744073709551616"; /* 2^64 */
  err = qemu_strtosz(str, , );
  g_assert_cmpint(err, ==, -ERANGE);
  g_assert(endptr == str + 20);
  
-str = "18446744073709551615"; /* 2^64-1 */

+str = "1.7976931348623158e+308"; /* DBL_MAX, double overflows */
  err = qemu_strtosz(str, , );
  g_assert_cmpint(err, ==, -ERANGE);
-g_assert(endptr == str + 20);
+g_assert(endptr == str + 23);
  
-str = "18446744073709551616"; /* 2^64 */

+str = "2.225e-308"; /* Small than DBL_MIN, double underflows */
  err = qemu_strtosz(str, , );
  g_assert_cmpint(err, ==, -ERANGE);
-g_assert(endptr == str + 20);
+g_assert(endptr == str + 10);
  
  str = "20E";

  err = qemu_strtosz(str, , );
diff --git a/tests/test-keyval.c b/tests/test-keyval.c
index 09b0ae3c68..fad941fcb8 100644
--- a/tests/test-keyval.c
+++ b/tests/test-keyval.c
@@ -383,59 +383,26 @@ static void test_keyval_visit_size(void)
  visit_end_struct(v, NULL);
  visit_free(v);
  
-/* Note: precision is 53 bits since we're parsing with strtod() */

+/* Note: precision is 64 bits (UINT64_MAX) */
  
-/* Around limit of precision: 2^53-1, 2^53, 2^53+1 */

-qdict = keyval_parse("sz1=9007199254740991,"
- "sz2=9007199254740992,"
- "sz3=9007199254740993",
+/* Around limit of precision: UINT64_MAX - 1, UINT64_MAX */
+qdict = keyval_parse("sz1=18446744073709551614,"
+ "sz2=18446744073709551615",
   NULL, _abort);
  v = qobject_input_visitor_new_keyval(QOBJECT(qdict));
  qobject_unref(qdict);
  visit_start_struct(v, NULL, NULL, 0, _abort);
  visit_type_size(v, "sz1", , _abort);
-g_assert_cmphex(sz, ==, 0x1f);
+g_assert_cmphex(sz, ==, 0xfffe);
  visit_type_size(v, "sz2", , _a

Re: [PATCH v2 0/4] Add extra information to versioned CPU models

2019-12-16 Thread Tao Xu


Ping for comments.

On 12/9/2019 3:12 PM, Tao Xu wrote:

This series of patches will remove MPX from Denverton, remove Remove
monitor from some CPU models. Add additional information for -cpu help
to indicate the changes in this version of CPU model.

The output is as follows:
./x86_64-softmmu/qemu-system-x86_64 -cpu help | grep "\["
x86 Broadwell-v2  Intel Core Processor (Broadwell) [no TSX]
x86 Broadwell-v3  Intel Core Processor (Broadwell) [IBRS]
x86 Broadwell-v4  Intel Core Processor (Broadwell) [no TSX, IBRS]
x86 Cascadelake-Server-v2  Intel Xeon Processor (Cascadelake) 
[ARCH_CAPABILITIES]
x86 Cascadelake-Server-v3  Intel Xeon Processor (Cascadelake) 
[ARCH_CAPABILITIES, no TSX]
x86 Denverton-v2  Intel Atom Processor (Denverton) [no MPX, no MONITOR]
x86 Dhyana-v2 Hygon Dhyana Processor [no MONITOR]
x86 EPYC-v2   AMD EPYC Processor [IBPB]
x86 EPYC-v3   AMD EPYC Processor [IBPB, no MONITOR]
x86 Haswell-v2Intel Core Processor (Haswell) [no TSX]
x86 Haswell-v3Intel Core Processor (Haswell) [IBRS]
x86 Haswell-v4Intel Core Processor (Haswell) [no TSX, IBRS]
x86 Icelake-Client-v2 Intel Core Processor (Icelake) [no TSX]
x86 Icelake-Server-v2 Intel Xeon Processor (Icelake) [no TSX]
x86 IvyBridge-v2  Intel Xeon E3-12xx v2 (Ivy Bridge) [IBRS]
x86 Nehalem-v2Intel Core i7 9xx (Nehalem Class Core i7) [IBRS]
x86 Opteron_G3-v2 AMD Opteron 23xx (Gen 3 Class Opteron) [no MONITOR]
x86 SandyBridge-v2Intel Xeon E312xx (Sandy Bridge) [IBRS]
x86 Skylake-Client-v2 Intel Core Processor (Skylake) [IBRS]
x86 Skylake-Client-v3 Intel Core Processor (Skylake) [no TSX, IBRS]
x86 Skylake-Server-v2 Intel Xeon Processor (Skylake) [IBRS]
x86 Skylake-Server-v3 Intel Xeon Processor (Skylake) [no TSX, IBRS]
x86 Snowridge-v2  Intel Atom Processor (SnowRidge) [no MPX]
x86 Snowridge-v3  Intel Atom Processor (SnowRidge) [no MPX, no MONITOR]
x86 Westmere-v2   Westmere E56xx/L56xx/X56xx (Nehalem-C) [IBRS]

Changes in v2:
 - correct the note of Cascadelake v3 (Xiaoyao)

Tao Xu (4):
   target/i386: Add Denverton-v2 (no MPX) CPU model
   target/i386: Remove monitor from some CPU models
   target/i386: Add new property note to versioned CPU models
   target/i386: Add notes for versioned CPU models

  target/i386/cpu.c | 112 +++---
  1 file changed, 85 insertions(+), 27 deletions(-)

--
2.20.1

Re: [PATCH 2/2] numa: properly check if numa is supported

2019-12-15 Thread Tao Xu


On 12/13/2019 5:12 PM, Igor Mammedov wrote:

On Fri, 13 Dec 2019 09:33:10 +0800
Tao Xu  wrote:


On 12/12/2019 8:48 PM, Igor Mammedov wrote:

Commit aa57020774b, by mistake used MachineClass::numa_mem_supported
to check if NUMA is supported by machine and also as unrelated change
set it to true for sbsa-ref board.

Luckily change didn't break machines that support NUMA, as the field
is set to true for them.

But the field is not intended for checking if NUMA is supported and
will be flipped to false within this release for new machine types.

Fix it:
   - by using previously used condition
!mc->cpu_index_to_instance_props || !mc->get_default_cpu_node_id
 the first time and then use MachineState::numa_state down the road
 to check if NUMA is supported
   - dropping stray sbsa-ref chunk

Fixes: aa57020774b690a22be72453b8e91c9b5a68c516
Signed-off-by: Igor Mammedov 
---
CC: Radoslaw Biernacki 
CC: Peter Maydell 
CC: Leif Lindholm 
CC: qemu-...@nongnu.org
CC: qemu-sta...@nongnu.org


   hw/arm/sbsa-ref.c | 1 -
   hw/core/machine.c | 4 ++--
   2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c
index 27046cc..c6261d4 100644
--- a/hw/arm/sbsa-ref.c
+++ b/hw/arm/sbsa-ref.c
@@ -791,7 +791,6 @@ static void sbsa_ref_class_init(ObjectClass *oc, void *data)
   mc->possible_cpu_arch_ids = sbsa_ref_possible_cpu_arch_ids;
   mc->cpu_index_to_instance_props = sbsa_ref_cpu_index_to_props;
   mc->get_default_cpu_node_id = sbsa_ref_get_default_cpu_node_id;
-mc->numa_mem_supported = true;
   }
   
   static const TypeInfo sbsa_ref_info = {

diff --git a/hw/core/machine.c b/hw/core/machine.c
index 1689ad3..aa63231 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -958,7 +958,7 @@ static void machine_initfn(Object *obj)
   NULL);
   }
   
-if (mc->numa_mem_supported) {

+if (mc->cpu_index_to_instance_props && mc->get_default_cpu_node_id) {
   ms->numa_state = g_new0(NumaState, 1);
   }


I am wondering if @numa_mem_supported is unused here, it is unused for
QEMU, because the only usage of @numa_mem_supported is to initialize
@numa_state. Or there is other usage? So should it be removed from
struct MachineClass?

You are wrong, it's not intended for numa_state initialization,
read doc comment for it in include/hw/boards.h
(for full story look at commit cd5ff8333a3)


I understand.

Re: [PATCH RESEND v20 0/8] Build ACPI Heterogeneous Memory Attribute Table (HMAT)

2019-12-15 Thread Tao Xu


On 12/13/2019 6:06 PM, Michael S. Tsirkin wrote:

On Fri, Dec 13, 2019 at 09:19:21AM +0800, Tao Xu wrote:

This series of patches will build Heterogeneous Memory Attribute Table (HMAT)
according to the command line. The ACPI HMAT describes the memory attributes,
such as memory side cache attributes and bandwidth and latency details,
related to the Memory Proximity Domain.
The software is expected to use HMAT information as hint for optimization.

In the linux kernel, the codes in drivers/acpi/hmat/hmat.c parse and report
the platform's HMAT tables.

The V19 patches link:
https://patchwork.kernel.org/cover/11265525/


Looks good to me, I'll queue it for merge after the release. If possible
please ping me after the release to help make sure it didn't get
dropped.



Thank you!




Changelog:
v20:
 - Resend to fix the wrong target in pc_hmat_erange_cfg()
 - Use g_assert_true and g_assert_false to replace g_assert
   (Thomas and Markus)
 - Rename assoc as associativity, update the QAPI description (Markus)
 - Disable cache level 0 in hmat-cache option (Igor)
 - Keep base and bitmap unchanged when latency or bandwidth
   out of range
 - Fix the broken CI case when user input latency or bandwidth
   less than required.
v19:
 - Add description about the machine property 'hmat' in commit
   message (Markus)
 - Update the QAPI comments
 - Add a check for no memory side cache
 - Add some fail cases for hmat-cache when level=0
v18:
 - Defer patches 01/14~06/14 of V17, use qapi type uint64 and
   only nanosecond for latency (Markus)
 - Rewrite the lines over 80 characters(Igor)
v17:
 - Add check when user input latency or bandwidth 0, the
   lb_info_provided should also be 0. Because in ACPI 6.3 5.2.27.4,
   0 means the corresponding latency or bandwidth information is
   not provided.
 - Fix the infinite loop when node->latency is 0.
 - Use NumaHmatCacheOptions to replace HMAT_Cache_Info (Igor)
 - Add check for unordered cache level input (Igor)
 - Add some fail test cases (Igor)
v16:
 - Add and use qemu_strtold_finite to parse size, support full
   64bit precision, modify related test cases (Eduardo and Markus)
 - Simplify struct HMAT_LB_Info and related code, unify latency
   and bandwidth (Igor)
 - Add cross check with hmat_lb data (Igor)
 - Fields in Cache Attributes are promoted to uint32_t before
   shifting (Igor)
 - Add case for QMP build HMAT (Igor)
v15:
 - Add a new patch to refactor do_strtosz() (Eduardo)
 - Make tests without breaking CI (Michael)
v14:
 - Reuse the codes of do_strtosz to build qemu_strtotime_ns
   (Eduardo)
 - Squash patch v13 01/12 and 02/12 together (Daniel and Eduardo)
 - Drop time unit picosecond (Eric)
 - Use qemu ctz64 and clz64 instead of builtin function
v13:
 - Modify some text description
 - Drop "initiator_valid" field in struct NodeInfo
 - Reuse Garray to store the raw bandwidth and bandwidth data
 - Calculate common base unit using range bitmap
 - Add a patch to alculate hmat latency and bandwidth entry list
 - Drop the total_levels option and use readable cache size
 - Remove the unnecessary head file
 - Use decimal notation with appropriate suffix for cache size

Liu Jingqi (5):
   numa: Extend CLI to provide memory latency and bandwidth information
   numa: Extend CLI to provide memory side cache information
   hmat acpi: Build Memory Proximity Domain Attributes Structure(s)
   hmat acpi: Build System Locality Latency and Bandwidth Information
 Structure(s)
   hmat acpi: Build Memory Side Cache Information Structure(s)

Tao Xu (3):
   numa: Extend CLI to provide initiator information for numa nodes
   tests/numa: Add case for QMP build HMAT
   tests/bios-tables-test: add test cases for ACPI HMAT

  hw/acpi/Kconfig   |   7 +-
  hw/acpi/Makefile.objs |   1 +
  hw/acpi/hmat.c| 268 +++
  hw/acpi/hmat.h|  42 
  hw/core/machine.c |  64 ++
  hw/core/numa.c| 297 ++
  hw/i386/acpi-build.c  |   5 +
  include/sysemu/numa.h |  63 ++
  qapi/machine.json | 180 +++-
  qemu-options.hx   |  95 +++-
  tests/bios-tables-test-allowed-diff.h |   8 +
  tests/bios-tables-test.c  |  44 
  tests/data/acpi/pc/APIC.acpihmat  |   0
  tests/data/acpi/pc/DSDT.acpihmat  |   0
  tests/data/acpi/pc/HMAT.acpihmat  |   0
  tests/data/acpi/pc/SRAT.acpihmat  |   0
  tests/data/acpi/q35/APIC.acpihmat |   0
  tests/data/acpi/q35/DSDT.acpihmat |   0
  tests/data/acpi/q35/HMAT.acpihmat |   0
  tests/data/acpi/q35/SRAT.acpihmat |   0
  tests/numa-test.c | 213 ++

Re: [PATCH 2/2] numa: properly check if numa is supported

2019-12-12 Thread Tao Xu


On 12/12/2019 8:48 PM, Igor Mammedov wrote:

Commit aa57020774b, by mistake used MachineClass::numa_mem_supported
to check if NUMA is supported by machine and also as unrelated change
set it to true for sbsa-ref board.

Luckily change didn't break machines that support NUMA, as the field
is set to true for them.

But the field is not intended for checking if NUMA is supported and
will be flipped to false within this release for new machine types.

Fix it:
  - by using previously used condition
   !mc->cpu_index_to_instance_props || !mc->get_default_cpu_node_id
the first time and then use MachineState::numa_state down the road
to check if NUMA is supported
  - dropping stray sbsa-ref chunk

Fixes: aa57020774b690a22be72453b8e91c9b5a68c516
Signed-off-by: Igor Mammedov 
---
CC: Radoslaw Biernacki 
CC: Peter Maydell 
CC: Leif Lindholm 
CC: qemu-...@nongnu.org
CC: qemu-sta...@nongnu.org


  hw/arm/sbsa-ref.c | 1 -
  hw/core/machine.c | 4 ++--
  2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c
index 27046cc..c6261d4 100644
--- a/hw/arm/sbsa-ref.c
+++ b/hw/arm/sbsa-ref.c
@@ -791,7 +791,6 @@ static void sbsa_ref_class_init(ObjectClass *oc, void *data)
  mc->possible_cpu_arch_ids = sbsa_ref_possible_cpu_arch_ids;
  mc->cpu_index_to_instance_props = sbsa_ref_cpu_index_to_props;
  mc->get_default_cpu_node_id = sbsa_ref_get_default_cpu_node_id;
-mc->numa_mem_supported = true;
  }
  
  static const TypeInfo sbsa_ref_info = {

diff --git a/hw/core/machine.c b/hw/core/machine.c
index 1689ad3..aa63231 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -958,7 +958,7 @@ static void machine_initfn(Object *obj)
  NULL);
  }
  
-if (mc->numa_mem_supported) {

+if (mc->cpu_index_to_instance_props && mc->get_default_cpu_node_id) {
  ms->numa_state = g_new0(NumaState, 1);
  }


I am wondering if @numa_mem_supported is unused here, it is unused for 
QEMU, because the only usage of @numa_mem_supported is to initialize 
@numa_state. Or there is other usage? So should it be removed from 
struct MachineClass?

[PATCH RESEND v20 6/8] hmat acpi: Build Memory Side Cache Information Structure(s)

2019-12-12 Thread Tao Xu

From: Liu Jingqi 

This structure describes memory side cache information for memory
proximity domains if the memory side cache is present and the
physical device forms the memory side cache.
The software could use this information to effectively place
the data in memory to maximize the performance of the system
memory that use the memory side cache.

Acked-by: Markus Armbruster 
Reviewed-by: Igor Mammedov 
Reviewed-by: Daniel Black 
Reviewed-by: Jonathan Cameron 
Signed-off-by: Liu Jingqi 
Signed-off-by: Tao Xu 
---

No changes in v20.

Changes in v16:
- Use checks and assert to replace masks (Igor)
- Fields in Cache Attributes are promoted to uint32_t before
  shifting (Igor)
- Drop cpu_to_le32() (Igor)

Changes in v13:
- rename level as cache_level
---
 hw/acpi/hmat.c | 69 +-
 1 file changed, 68 insertions(+), 1 deletion(-)

diff --git a/hw/acpi/hmat.c b/hw/acpi/hmat.c
index 4635d45dee..7c24bb5371 100644
--- a/hw/acpi/hmat.c
+++ b/hw/acpi/hmat.c
@@ -143,14 +143,62 @@ static void build_hmat_lb(GArray *table_data, 
HMAT_LB_Info *hmat_lb,
 g_free(entry_list);
 }
 
+/* ACPI 6.3: 5.2.27.5 Memory Side Cache Information Structure: Table 5-147 */
+static void build_hmat_cache(GArray *table_data, uint8_t total_levels,
+ NumaHmatCacheOptions *hmat_cache)
+{
+/*
+ * Cache Attributes: Bits [3:0] – Total Cache Levels
+ * for this Memory Proximity Domain
+ */
+uint32_t cache_attr = total_levels;
+
+/* Bits [7:4] : Cache Level described in this structure */
+cache_attr |= (uint32_t) hmat_cache->level << 4;
+
+/* Bits [11:8] - Cache Associativity */
+cache_attr |= (uint32_t) hmat_cache->associativity << 8;
+
+/* Bits [15:12] - Write Policy */
+cache_attr |= (uint32_t) hmat_cache->policy << 12;
+
+/* Bits [31:16] - Cache Line size in bytes */
+cache_attr |= (uint32_t) hmat_cache->line << 16;
+
+/* Type */
+build_append_int_noprefix(table_data, 2, 2);
+/* Reserved */
+build_append_int_noprefix(table_data, 0, 2);
+/* Length */
+build_append_int_noprefix(table_data, 32, 4);
+/* Proximity Domain for the Memory */
+build_append_int_noprefix(table_data, hmat_cache->node_id, 4);
+/* Reserved */
+build_append_int_noprefix(table_data, 0, 4);
+/* Memory Side Cache Size */
+build_append_int_noprefix(table_data, hmat_cache->size, 8);
+/* Cache Attributes */
+build_append_int_noprefix(table_data, cache_attr, 4);
+/* Reserved */
+build_append_int_noprefix(table_data, 0, 2);
+/*
+ * Number of SMBIOS handles (n)
+ * Linux kernel uses Memory Side Cache Information Structure
+ * without SMBIOS entries for now, so set Number of SMBIOS handles
+ * as 0.
+ */
+build_append_int_noprefix(table_data, 0, 2);
+}
+
 /* Build HMAT sub table structures */
 static void hmat_build_table_structs(GArray *table_data, NumaState *numa_state)
 {
 uint16_t flags;
 uint32_t num_initiator = 0;
 uint32_t initiator_list[MAX_NODES];
-int i, hierarchy, type;
+int i, hierarchy, type, cache_level, total_levels;
 HMAT_LB_Info *hmat_lb;
+NumaHmatCacheOptions *hmat_cache;
 
 for (i = 0; i < numa_state->num_nodes; i++) {
 flags = 0;
@@ -184,6 +232,25 @@ static void hmat_build_table_structs(GArray *table_data, 
NumaState *numa_state)
 }
 }
 }
+
+/*
+ * ACPI 6.3: 5.2.27.5 Memory Side Cache Information Structure:
+ * Table 5-147
+ */
+for (i = 0; i < numa_state->num_nodes; i++) {
+total_levels = 0;
+for (cache_level = 1; cache_level < HMAT_LB_LEVELS; cache_level++) {
+if (numa_state->hmat_cache[i][cache_level]) {
+total_levels++;
+}
+}
+for (cache_level = 0; cache_level <= total_levels; cache_level++) {
+hmat_cache = numa_state->hmat_cache[i][cache_level];
+if (hmat_cache) {
+build_hmat_cache(table_data, total_levels, hmat_cache);
+}
+}
+}
 }
 
 void build_hmat(GArray *table_data, BIOSLinker *linker, NumaState *numa_state)
-- 
2.20.1

[PATCH RESEND v20 5/8] hmat acpi: Build System Locality Latency and Bandwidth Information Structure(s)

2019-12-12 Thread Tao Xu

From: Liu Jingqi 

This structure describes the memory access latency and bandwidth
information from various memory access initiator proximity domains.
The latency and bandwidth numbers represented in this structure
correspond to rated latency and bandwidth for the platform.
The software could use this information as hint for optimization.

Acked-by: Markus Armbruster 
Reviewed-by: Igor Mammedov 
Signed-off-by: Liu Jingqi 
Signed-off-by: Tao Xu 
---

Changes in v20:
- Fix the broken CI case when user input latency or bandwidth
  less than required

Changes in v17:
- Remove unnecessary header file (Igor)

Changes in v16:
- Add more description for lb_length (Igor)
- Drop entry_list and calculate entries in this patch (Igor)

Changes in v13:
- Calculate the entries in a new patch.
---
 hw/acpi/hmat.c | 104 -
 1 file changed, 103 insertions(+), 1 deletion(-)

diff --git a/hw/acpi/hmat.c b/hw/acpi/hmat.c
index 9ff79308a4..4635d45dee 100644
--- a/hw/acpi/hmat.c
+++ b/hw/acpi/hmat.c
@@ -25,6 +25,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "sysemu/numa.h"
 #include "hw/acpi/hmat.h"
 
@@ -67,11 +68,89 @@ static void build_hmat_mpda(GArray *table_data, uint16_t 
flags,
 build_append_int_noprefix(table_data, 0, 8);
 }
 
+/*
+ * ACPI 6.3: 5.2.27.4 System Locality Latency and Bandwidth Information
+ * Structure: Table 5-146
+ */
+static void build_hmat_lb(GArray *table_data, HMAT_LB_Info *hmat_lb,
+  uint32_t num_initiator, uint32_t num_target,
+  uint32_t *initiator_list)
+{
+int i, index;
+HMAT_LB_Data *lb_data;
+uint16_t *entry_list;
+uint32_t base;
+/* Length in bytes for entire structure */
+uint32_t lb_length
+= 32 /* Table length upto and including Entry Base Unit */
++ 4 * num_initiator /* Initiator Proximity Domain List */
++ 4 * num_target /* Target Proximity Domain List */
++ 2 * num_initiator * num_target; /* Latency or Bandwidth Entries */
+
+/* Type */
+build_append_int_noprefix(table_data, 1, 2);
+/* Reserved */
+build_append_int_noprefix(table_data, 0, 2);
+/* Length */
+build_append_int_noprefix(table_data, lb_length, 4);
+/* Flags: Bits [3:0] Memory Hierarchy, Bits[7:4] Reserved */
+assert(!(hmat_lb->hierarchy >> 4));
+build_append_int_noprefix(table_data, hmat_lb->hierarchy, 1);
+/* Data Type */
+build_append_int_noprefix(table_data, hmat_lb->data_type, 1);
+/* Reserved */
+build_append_int_noprefix(table_data, 0, 2);
+/* Number of Initiator Proximity Domains (s) */
+build_append_int_noprefix(table_data, num_initiator, 4);
+/* Number of Target Proximity Domains (t) */
+build_append_int_noprefix(table_data, num_target, 4);
+/* Reserved */
+build_append_int_noprefix(table_data, 0, 4);
+
+/* Entry Base Unit */
+if (hmat_lb->data_type <= HMAT_LB_DATA_WRITE_LATENCY) {
+/* Convert latency base from nanoseconds to picosecond */
+base = hmat_lb->base * 1000;
+} else {
+/* Convert bandwidth base from Byte to Megabyte */
+base = hmat_lb->base / MiB;
+}
+build_append_int_noprefix(table_data, base, 8);
+
+/* Initiator Proximity Domain List */
+for (i = 0; i < num_initiator; i++) {
+build_append_int_noprefix(table_data, initiator_list[i], 4);
+}
+
+/* Target Proximity Domain List */
+for (i = 0; i < num_target; i++) {
+build_append_int_noprefix(table_data, i, 4);
+}
+
+/* Latency or Bandwidth Entries */
+entry_list = g_malloc0(num_initiator * num_target * sizeof(uint16_t));
+for (i = 0; i < hmat_lb->list->len; i++) {
+lb_data = _array_index(hmat_lb->list, HMAT_LB_Data, i);
+index = lb_data->initiator * num_target + lb_data->target;
+
+entry_list[index] = (uint16_t)(lb_data->data / hmat_lb->base);
+}
+
+for (i = 0; i < num_initiator * num_target; i++) {
+build_append_int_noprefix(table_data, entry_list[i], 2);
+}
+
+g_free(entry_list);
+}
+
 /* Build HMAT sub table structures */
 static void hmat_build_table_structs(GArray *table_data, NumaState *numa_state)
 {
 uint16_t flags;
-int i;
+uint32_t num_initiator = 0;
+uint32_t initiator_list[MAX_NODES];
+int i, hierarchy, type;
+HMAT_LB_Info *hmat_lb;
 
 for (i = 0; i < numa_state->num_nodes; i++) {
 flags = 0;
@@ -82,6 +161,29 @@ static void hmat_build_table_structs(GArray *table_data, 
NumaState *numa_state)
 
 build_hmat_mpda(table_data, flags, numa_state->nodes[i].initiator, i);
 }
+
+for (i = 0; i < numa_state->num_nodes; i++) {
+if (numa_state->nodes[i].has_cpu) {
+initiator_list[num_initiator++] = i;
+}
+}

[PATCH RESEND v20 7/8] tests/numa: Add case for QMP build HMAT

2019-12-12 Thread Tao Xu

Check configuring HMAT usecase

Acked-by: Markus Armbruster 
Suggested-by: Igor Mammedov 
Signed-off-by: Tao Xu 
---

Changes in v20:
- Fix the wrong target in pc_hmat_erange_cfg
- Use g_assert_true and g_assert_false to replace g_assert
  (Thomas and Markus)

Changes in v19:
- Add some fail cases for hmat-cache when level=0

Changes in v18:
- Rewrite the lines over 80 characters

Chenges in v17:
- Add some fail test cases (Igor)
---
 tests/numa-test.c | 213 ++
 1 file changed, 213 insertions(+)

diff --git a/tests/numa-test.c b/tests/numa-test.c
index 8de8581231..17dd807d2a 100644
--- a/tests/numa-test.c
+++ b/tests/numa-test.c
@@ -327,6 +327,216 @@ static void pc_dynamic_cpu_cfg(const void *data)
 qtest_quit(qs);
 }
 
+static void pc_hmat_build_cfg(const void *data)
+{
+QTestState *qs = qtest_initf("%s -nodefaults --preconfig -machine hmat=on "
+ "-smp 2,sockets=2 "
+ "-m 128M,slots=2,maxmem=1G "
+ "-object memory-backend-ram,size=64M,id=m0 "
+ "-object memory-backend-ram,size=64M,id=m1 "
+ "-numa node,nodeid=0,memdev=m0 "
+ "-numa node,nodeid=1,memdev=m1,initiator=0 "
+ "-numa cpu,node-id=0,socket-id=0 "
+ "-numa cpu,node-id=0,socket-id=1",
+ data ? (char *)data : "");
+
+/* Fail: Initiator should be less than the number of nodes */
+g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+" 'arguments': { 'type': 'hmat-lb', 'initiator': 2, 'target': 0,"
+" 'hierarchy': \"memory\", 'data-type': \"access-latency\" } }")));
+
+/* Fail: Target should be less than the number of nodes */
+g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+" 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 2,"
+" 'hierarchy': \"memory\", 'data-type': \"access-latency\" } }")));
+
+/* Fail: Initiator should contain cpu */
+g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+" 'arguments': { 'type': 'hmat-lb', 'initiator': 1, 'target': 0,"
+" 'hierarchy': \"memory\", 'data-type': \"access-latency\" } }")));
+
+/* Fail: Data-type mismatch */
+g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+" 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0,"
+" 'hierarchy': \"memory\", 'data-type': \"write-latency\","
+" 'bandwidth': 524288000 } }")));
+g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+" 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0,"
+" 'hierarchy': \"memory\", 'data-type': \"read-bandwidth\","
+" 'latency': 5 } }")));
+
+/* Fail: Bandwidth should be 1MB (1048576) aligned */
+g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+" 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0,"
+" 'hierarchy': \"memory\", 'data-type': \"access-bandwidth\","
+" 'bandwidth': 1048575 } }")));
+
+/* Configuring HMAT bandwidth and latency details */
+g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+" 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0,"
+" 'hierarchy': \"memory\", 'data-type': \"access-latency\","
+" 'latency': 1 } }")));/* 1 ns */
+g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+" 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0,"
+" 'hierarchy': \"memory\", 'data-type': \"access-latency\","
+" 'latency': 5 } }")));/* Fail: Duplicate configuration */
+g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+" 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0,"
+" 'hierarchy': \"memory\", 'data-type': \"access-bandwidth\","
+" 'bandwidth': 68717379584 } }")));/* 65534 MB/s */
+g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+" 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 1,"
+" 'hierarchy': \"memory\", 'data-type

[PATCH RESEND v20 3/8] numa: Extend CLI to provide memory side cache information

2019-12-12 Thread Tao Xu

From: Liu Jingqi 

Add -numa hmat-cache option to provide Memory Side Cache Information.
These memory attributes help to build Memory Side Cache Information
Structure(s) in ACPI Heterogeneous Memory Attribute Table (HMAT).
Before using hmat-cache option, enable HMAT with -machine hmat=on.

Acked-by: Markus Armbruster 
Signed-off-by: Liu Jingqi 
Signed-off-by: Tao Xu 
---

Changes in v20:
- Disable cache level 0 in hmat-cache option (Igor)
- Update the QAPI description (Markus)

Changes in v19:
- Add description about the machine property 'hmat' in commit
  message (Markus)
- Update the QAPI comments
- Add a check for no memory side cache

Changes in v18:
- Update the error message (Igor)

Changes in v17:
- Use NumaHmatCacheOptions to replace HMAT_Cache_Info (Igor)
- Add check for unordered cache level input (Igor)

Changes in v16:
- Add cross check with hmat_lb data (Igor)
- Drop total_levels in struct HMAT_Cache_Info (Igor)
- Correct the error table number (Igor)
---
 hw/core/numa.c| 80 ++
 include/sysemu/numa.h |  5 +++
 qapi/machine.json | 81 +--
 qemu-options.hx   | 17 +++--
 4 files changed, 179 insertions(+), 4 deletions(-)

diff --git a/hw/core/numa.c b/hw/core/numa.c
index 34eb413f5d..33fda31a4c 100644
--- a/hw/core/numa.c
+++ b/hw/core/numa.c
@@ -379,6 +379,73 @@ void parse_numa_hmat_lb(NumaState *numa_state, 
NumaHmatLBOptions *node,
 g_array_append_val(hmat_lb->list, lb_data);
 }
 
+void parse_numa_hmat_cache(MachineState *ms, NumaHmatCacheOptions *node,
+   Error **errp)
+{
+int nb_numa_nodes = ms->numa_state->num_nodes;
+NodeInfo *numa_info = ms->numa_state->nodes;
+NumaHmatCacheOptions *hmat_cache = NULL;
+
+if (node->node_id >= nb_numa_nodes) {
+error_setg(errp, "Invalid node-id=%" PRIu32 ", it should be less "
+   "than %d", node->node_id, nb_numa_nodes);
+return;
+}
+
+if (numa_info[node->node_id].lb_info_provided != (BIT(0) | BIT(1))) {
+error_setg(errp, "The latency and bandwidth information of "
+   "node-id=%" PRIu32 " should be provided before memory side "
+   "cache attributes", node->node_id);
+return;
+}
+
+if (node->level < 1 || node->level >= HMAT_LB_LEVELS) {
+error_setg(errp, "Invalid level=%" PRIu8 ", it should be larger than 0 
"
+   "and less than or equal to %d", node->level,
+   HMAT_LB_LEVELS - 1);
+return;
+}
+
+assert(node->associativity < HMAT_CACHE_ASSOCIATIVITY__MAX);
+assert(node->policy < HMAT_CACHE_WRITE_POLICY__MAX);
+if (ms->numa_state->hmat_cache[node->node_id][node->level]) {
+error_setg(errp, "Duplicate configuration of the side cache for "
+   "node-id=%" PRIu32 " and level=%" PRIu8,
+   node->node_id, node->level);
+return;
+}
+
+if ((node->level > 1) &&
+ms->numa_state->hmat_cache[node->node_id][node->level - 1] &&
+(node->size >=
+ms->numa_state->hmat_cache[node->node_id][node->level - 1]->size)) 
{
+error_setg(errp, "Invalid size=%" PRIu64 ", the size of level=%" PRIu8
+   " should be less than the size(%" PRIu64 ") of "
+   "level=%" PRIu8, node->size, node->level,
+   ms->numa_state->hmat_cache[node->node_id]
+ [node->level - 1]->size,
+   node->level - 1);
+return;
+}
+
+if ((node->level < HMAT_LB_LEVELS - 1) &&
+ms->numa_state->hmat_cache[node->node_id][node->level + 1] &&
+(node->size <=
+ms->numa_state->hmat_cache[node->node_id][node->level + 1]->size)) 
{
+error_setg(errp, "Invalid size=%" PRIu64 ", the size of level=%" PRIu8
+   " should be larger than the size(%" PRIu64 ") of "
+   "level=%" PRIu8, node->size, node->level,
+   ms->numa_state->hmat_cache[node->node_id]
+ [node->level + 1]->size,
+   node->level + 1);
+return;
+}
+
+hmat_cache = g_malloc0(sizeof(*hmat_cache));
+memcpy(hmat_cache, node, sizeof(*hmat_cache));
+ms->numa_state->hmat_cache[node->node_id][node->level] = hmat_cache;
+}
+
 void set_numa_options(MachineState *ms, NumaOptions *ob

[PATCH RESEND v20 8/8] tests/bios-tables-test: add test cases for ACPI HMAT

2019-12-12 Thread Tao Xu

ACPI table HMAT has been introduced, QEMU now builds HMAT tables for
Heterogeneous Memory with boot option '-numa node'.

Add test cases on PC and Q35 machines with 2 numa nodes.
Because HMAT is generated when system enable numa, the
following tables need to be added for this test:
tests/data/acpi/pc/APIC.acpihmat
tests/data/acpi/pc/SRAT.acpihmat
tests/data/acpi/pc/HMAT.acpihmat
tests/data/acpi/pc/DSDT.acpihmat
tests/data/acpi/q35/APIC.acpihmat
tests/data/acpi/q35/SRAT.acpihmat
tests/data/acpi/q35/HMAT.acpihmat
tests/data/acpi/q35/DSDT.acpihmat

Acked-by: Markus Armbruster 
Reviewed-by: Igor Mammedov 
Reviewed-by: Daniel Black 
Reviewed-by: Jingqi Liu 
Suggested-by: Igor Mammedov 
Signed-off-by: Tao Xu 
---

No changes in v20.

Changes in v18:
- Remove unit "ns".

Changes in v17:
- Update the latency and bandwidth

Changes in v15:
- Make tests without breaking CI (Michael)

Changes in v13:
- Use decimal notation with appropriate suffix for cache size
---
 tests/bios-tables-test-allowed-diff.h |  8 +
 tests/bios-tables-test.c  | 44 +++
 tests/data/acpi/pc/APIC.acpihmat  |  0
 tests/data/acpi/pc/DSDT.acpihmat  |  0
 tests/data/acpi/pc/HMAT.acpihmat  |  0
 tests/data/acpi/pc/SRAT.acpihmat  |  0
 tests/data/acpi/q35/APIC.acpihmat |  0
 tests/data/acpi/q35/DSDT.acpihmat |  0
 tests/data/acpi/q35/HMAT.acpihmat |  0
 tests/data/acpi/q35/SRAT.acpihmat |  0
 10 files changed, 52 insertions(+)
 create mode 100644 tests/data/acpi/pc/APIC.acpihmat
 create mode 100644 tests/data/acpi/pc/DSDT.acpihmat
 create mode 100644 tests/data/acpi/pc/HMAT.acpihmat
 create mode 100644 tests/data/acpi/pc/SRAT.acpihmat
 create mode 100644 tests/data/acpi/q35/APIC.acpihmat
 create mode 100644 tests/data/acpi/q35/DSDT.acpihmat
 create mode 100644 tests/data/acpi/q35/HMAT.acpihmat
 create mode 100644 tests/data/acpi/q35/SRAT.acpihmat

diff --git a/tests/bios-tables-test-allowed-diff.h 
b/tests/bios-tables-test-allowed-diff.h
index dfb8523c8b..3c9e0c979b 100644
--- a/tests/bios-tables-test-allowed-diff.h
+++ b/tests/bios-tables-test-allowed-diff.h
@@ -1 +1,9 @@
 /* List of comma-separated changed AML files to ignore */
+"tests/data/acpi/pc/APIC.acpihmat",
+"tests/data/acpi/pc/SRAT.acpihmat",
+"tests/data/acpi/pc/HMAT.acpihmat",
+"tests/data/acpi/pc/DSDT.acpihmat",
+"tests/data/acpi/q35/APIC.acpihmat",
+"tests/data/acpi/q35/SRAT.acpihmat",
+"tests/data/acpi/q35/HMAT.acpihmat",
+"tests/data/acpi/q35/DSDT.acpihmat",
diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c
index 79f5da092f..9823820043 100644
--- a/tests/bios-tables-test.c
+++ b/tests/bios-tables-test.c
@@ -947,6 +947,48 @@ static void test_acpi_virt_tcg_numamem(void)
 
 }
 
+static void test_acpi_tcg_acpi_hmat(const char *machine)
+{
+test_data data;
+
+memset(, 0, sizeof(data));
+data.machine = machine;
+data.variant = ".acpihmat";
+test_acpi_one(" -machine hmat=on"
+  " -smp 2,sockets=2"
+  " -m 128M,slots=2,maxmem=1G"
+  " -object memory-backend-ram,size=64M,id=m0"
+  " -object memory-backend-ram,size=64M,id=m1"
+  " -numa node,nodeid=0,memdev=m0"
+  " -numa node,nodeid=1,memdev=m1,initiator=0"
+  " -numa cpu,node-id=0,socket-id=0"
+  " -numa cpu,node-id=0,socket-id=1"
+  " -numa hmat-lb,initiator=0,target=0,hierarchy=memory,"
+  "data-type=access-latency,latency=1"
+  " -numa hmat-lb,initiator=0,target=0,hierarchy=memory,"
+  "data-type=access-bandwidth,bandwidth=65534M"
+  " -numa hmat-lb,initiator=0,target=1,hierarchy=memory,"
+  "data-type=access-latency,latency=65534"
+  " -numa hmat-lb,initiator=0,target=1,hierarchy=memory,"
+  "data-type=access-bandwidth,bandwidth=32767M"
+  " -numa hmat-cache,node-id=0,size=10K,level=1,"
+  "associativity=direct,policy=write-back,line=8"
+  " -numa hmat-cache,node-id=1,size=10K,level=1,"
+  "associativity=direct,policy=write-back,line=8",
+  );
+free_test_data();
+}
+
+static void test_acpi_q35_tcg_acpi_hmat(void)
+{
+test_acpi_tcg_acpi_hmat(MACHINE_Q35);
+}
+
+static void test_acpi_piix4_tcg_acpi_hmat(void)
+{
+test_acpi_tcg_acpi_hmat(MACHINE_PC);
+}
+
 static void test_acpi_virt_tcg(void)
 {
 test_data data = {
@@ -991,6 +1033,8 @@ int main(int argc, char *argv[])
 qtest_add_func("acpi/q35/numamem", test_acpi

[PATCH RESEND v20 1/8] numa: Extend CLI to provide initiator information for numa nodes

2019-12-12 Thread Tao Xu

In ACPI 6.3 chapter 5.2.27 Heterogeneous Memory Attribute Table (HMAT),
The initiator represents processor which access to memory. And in 5.2.27.3
Memory Proximity Domain Attributes Structure, the attached initiator is
defined as where the memory controller responsible for a memory proximity
domain. With attached initiator information, the topology of heterogeneous
memory can be described. Add new machine property 'hmat' to enable all
HMAT specific options.

Extend CLI of "-numa node" option to indicate the initiator numa node-id.
In the linux kernel, the codes in drivers/acpi/hmat/hmat.c parse and report
the platform's HMAT tables. Before using initiator option, enable HMAT with
-machine hmat=on.

Acked-by: Markus Armbruster 
Reviewed-by: Igor Mammedov 
Reviewed-by: Jingqi Liu 
Suggested-by: Dan Williams 
Signed-off-by: Tao Xu 
---

No changes in v20.

Changes in v19:
- Add description about the machine property 'hmat' in commit
  message (Markus)

Changes in v15:
- Change the QAPI version tag to 5.0 (Eric)
---
 hw/core/machine.c | 64 +++
 hw/core/numa.c| 23 
 include/sysemu/numa.h |  5 
 qapi/machine.json | 10 ++-
 qemu-options.hx   | 35 +++
 5 files changed, 131 insertions(+), 6 deletions(-)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index 1689ad3bf8..d7d2cfa66d 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -518,6 +518,20 @@ static void machine_set_nvdimm(Object *obj, bool value, 
Error **errp)
 ms->nvdimms_state->is_enabled = value;
 }
 
+static bool machine_get_hmat(Object *obj, Error **errp)
+{
+MachineState *ms = MACHINE(obj);
+
+return ms->numa_state->hmat_enabled;
+}
+
+static void machine_set_hmat(Object *obj, bool value, Error **errp)
+{
+MachineState *ms = MACHINE(obj);
+
+ms->numa_state->hmat_enabled = value;
+}
+
 static char *machine_get_nvdimm_persistence(Object *obj, Error **errp)
 {
 MachineState *ms = MACHINE(obj);
@@ -645,6 +659,7 @@ void machine_set_cpu_numa_node(MachineState *machine,
const CpuInstanceProperties *props, Error 
**errp)
 {
 MachineClass *mc = MACHINE_GET_CLASS(machine);
+NodeInfo *numa_info = machine->numa_state->nodes;
 bool match = false;
 int i;
 
@@ -714,6 +729,17 @@ void machine_set_cpu_numa_node(MachineState *machine,
 match = true;
 slot->props.node_id = props->node_id;
 slot->props.has_node_id = props->has_node_id;
+
+if (machine->numa_state->hmat_enabled) {
+if ((numa_info[props->node_id].initiator < MAX_NODES) &&
+(props->node_id != numa_info[props->node_id].initiator)) {
+error_setg(errp, "The initiator of CPU NUMA node %" PRId64
+" should be itself", props->node_id);
+return;
+}
+numa_info[props->node_id].has_cpu = true;
+numa_info[props->node_id].initiator = props->node_id;
+}
 }
 
 if (!match) {
@@ -960,6 +986,13 @@ static void machine_initfn(Object *obj)
 
 if (mc->numa_mem_supported) {
 ms->numa_state = g_new0(NumaState, 1);
+object_property_add_bool(obj, "hmat",
+ machine_get_hmat, machine_set_hmat,
+ _abort);
+object_property_set_description(obj, "hmat",
+"Set on/off to enable/disable "
+"ACPI Heterogeneous Memory Attribute "
+"Table (HMAT)", NULL);
 }
 
 /* Register notifier when init is done for sysbus sanity checks */
@@ -1048,6 +1081,32 @@ static char *cpu_slot_to_string(const CPUArchId *cpu)
 return g_string_free(s, false);
 }
 
+static void numa_validate_initiator(NumaState *numa_state)
+{
+int i;
+NodeInfo *numa_info = numa_state->nodes;
+
+for (i = 0; i < numa_state->num_nodes; i++) {
+if (numa_info[i].initiator == MAX_NODES) {
+error_report("The initiator of NUMA node %d is missing, use "
+ "'-numa node,initiator' option to declare it", i);
+exit(1);
+}
+
+if (!numa_info[numa_info[i].initiator].present) {
+error_report("NUMA node %" PRIu16 " is missing, use "
+ "'-numa node' option to declare it first",
+ numa_info[i].initiator);
+exit(1);
+}
+
+if (!numa_info[numa_info[i].initiator].has_cpu) {
+error_report("The initiator of NUMA node %d is invalid", i);
+exit(1);
+}
+}
+}
+
 static void machine_numa_finish_cpu_init(Mach

[PATCH RESEND v20 2/8] numa: Extend CLI to provide memory latency and bandwidth information

2019-12-12 Thread Tao Xu

From: Liu Jingqi 

Add -numa hmat-lb option to provide System Locality Latency and
Bandwidth Information. These memory attributes help to build
System Locality Latency and Bandwidth Information Structure(s)
in ACPI Heterogeneous Memory Attribute Table (HMAT). Before using
hmat-lb option, enable HMAT with -machine hmat=on.

Acked-by: Markus Armbruster 
Signed-off-by: Liu Jingqi 
Signed-off-by: Tao Xu 
---

Changes in v20:
- Update the QAPI description (Markus)
- Keep base and bitmap unchanged when latency or bandwidth
  out of range

Changes in v19:
- Add description about the machine property 'hmat' in commit
  message (Markus)

Changes in v18:
- Use qapi type uint64 and only nanosecond for latency (Markus)

Changes in v17:
- Add check when user input latency or bandwidth 0, the
  lb_info_provided should also be 0. Because in ACPI 6.3 5.2.27.4,
  0 means the corresponding latency or bandwidth information is
  not provided.
- Fix the infinite loop when node->latency is 0.
---
 hw/core/numa.c| 194 ++
 include/sysemu/numa.h |  53 
 qapi/machine.json |  93 +++-
 qemu-options.hx   |  47 +-
 4 files changed, 384 insertions(+), 3 deletions(-)

diff --git a/hw/core/numa.c b/hw/core/numa.c
index e60da99293..34eb413f5d 100644
--- a/hw/core/numa.c
+++ b/hw/core/numa.c
@@ -23,6 +23,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "sysemu/hostmem.h"
 #include "sysemu/numa.h"
 #include "sysemu/sysemu.h"
@@ -198,6 +199,186 @@ void parse_numa_distance(MachineState *ms, 
NumaDistOptions *dist, Error **errp)
 ms->numa_state->have_numa_distance = true;
 }
 
+void parse_numa_hmat_lb(NumaState *numa_state, NumaHmatLBOptions *node,
+Error **errp)
+{
+int i, first_bit, last_bit;
+uint64_t max_entry, temp_base, bitmap_copy;
+NodeInfo *numa_info = numa_state->nodes;
+HMAT_LB_Info *hmat_lb =
+numa_state->hmat_lb[node->hierarchy][node->data_type];
+HMAT_LB_Data lb_data = {};
+HMAT_LB_Data *lb_temp;
+
+/* Error checking */
+if (node->initiator > numa_state->num_nodes) {
+error_setg(errp, "Invalid initiator=%d, it should be less than %d",
+   node->initiator, numa_state->num_nodes);
+return;
+}
+if (node->target > numa_state->num_nodes) {
+error_setg(errp, "Invalid target=%d, it should be less than %d",
+   node->target, numa_state->num_nodes);
+return;
+}
+if (!numa_info[node->initiator].has_cpu) {
+error_setg(errp, "Invalid initiator=%d, it isn't an "
+   "initiator proximity domain", node->initiator);
+return;
+}
+if (!numa_info[node->target].present) {
+error_setg(errp, "The target=%d should point to an existing node",
+   node->target);
+return;
+}
+
+if (!hmat_lb) {
+hmat_lb = g_malloc0(sizeof(*hmat_lb));
+numa_state->hmat_lb[node->hierarchy][node->data_type] = hmat_lb;
+hmat_lb->list = g_array_new(false, true, sizeof(HMAT_LB_Data));
+}
+hmat_lb->hierarchy = node->hierarchy;
+hmat_lb->data_type = node->data_type;
+lb_data.initiator = node->initiator;
+lb_data.target = node->target;
+
+if (node->data_type <= HMATLB_DATA_TYPE_WRITE_LATENCY) {
+/* Input latency data */
+
+if (!node->has_latency) {
+error_setg(errp, "Missing 'latency' option");
+return;
+}
+if (node->has_bandwidth) {
+error_setg(errp, "Invalid option 'bandwidth' since "
+   "the data type is latency");
+return;
+}
+
+/* Detect duplicate configuration */
+for (i = 0; i < hmat_lb->list->len; i++) {
+lb_temp = _array_index(hmat_lb->list, HMAT_LB_Data, i);
+
+if (node->initiator == lb_temp->initiator &&
+node->target == lb_temp->target) {
+error_setg(errp, "Duplicate configuration of the latency for "
+"initiator=%d and target=%d", node->initiator,
+node->target);
+return;
+}
+}
+
+hmat_lb->base = hmat_lb->base ? hmat_lb->base : UINT64_MAX;
+
+if (node->latency) {
+/* Calculate the temporary base and compressed latency */
+max_entry = node->latency;
+temp_base = 1;
+while (QEMU_IS_ALIGNED(max_entry, 10)) {
+max_entry /= 10;
+temp_base *= 10;
+}
+
+/* C

[PATCH RESEND v20 4/8] hmat acpi: Build Memory Proximity Domain Attributes Structure(s)

2019-12-12 Thread Tao Xu

From: Liu Jingqi 

HMAT is defined in ACPI 6.3: 5.2.27 Heterogeneous Memory Attribute Table
(HMAT). The specification references below link:
http://www.uefi.org/sites/default/files/resources/ACPI_6_3_final_Jan30.pdf

It describes the memory attributes, such as memory side cache
attributes and bandwidth and latency details, related to the
Memory Proximity Domain. The software is
expected to use this information as hint for optimization.

This structure describes Memory Proximity Domain Attributes by memory
subsystem and its associativity with processor proximity domain as well as
hint for memory usage.

In the linux kernel, the codes in drivers/acpi/hmat/hmat.c parse and report
the platform's HMAT tables.

Acked-by: Markus Armbruster 
Reviewed-by: Igor Mammedov 
Reviewed-by: Daniel Black 
Reviewed-by: Jonathan Cameron 
Signed-off-by: Liu Jingqi 
Signed-off-by: Tao Xu 
---

No changes in v20.

Changes in v16:
- Use uint32_t for initiator and mem_node

Changes in v13:
- Remove the unnecessary head file.
---
 hw/acpi/Kconfig   |  7 ++-
 hw/acpi/Makefile.objs |  1 +
 hw/acpi/hmat.c| 99 +++
 hw/acpi/hmat.h| 42 ++
 hw/i386/acpi-build.c  |  5 +++
 5 files changed, 152 insertions(+), 2 deletions(-)
 create mode 100644 hw/acpi/hmat.c
 create mode 100644 hw/acpi/hmat.h

diff --git a/hw/acpi/Kconfig b/hw/acpi/Kconfig
index 12e3f1e86e..54209c6f2f 100644
--- a/hw/acpi/Kconfig
+++ b/hw/acpi/Kconfig
@@ -7,6 +7,7 @@ config ACPI_X86
 select ACPI_NVDIMM
 select ACPI_CPU_HOTPLUG
 select ACPI_MEMORY_HOTPLUG
+select ACPI_HMAT
 
 config ACPI_X86_ICH
 bool
@@ -23,6 +24,10 @@ config ACPI_NVDIMM
 bool
 depends on ACPI
 
+config ACPI_HMAT
+bool
+depends on ACPI
+
 config ACPI_PCI
 bool
 depends on ACPI && PCI
@@ -33,5 +38,3 @@ config ACPI_VMGENID
 depends on PC
 
 config ACPI_HW_REDUCED
-bool
-depends on ACPI
diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs
index 655a9c1973..517bd88704 100644
--- a/hw/acpi/Makefile.objs
+++ b/hw/acpi/Makefile.objs
@@ -7,6 +7,7 @@ common-obj-$(CONFIG_ACPI_CPU_HOTPLUG) += cpu.o
 common-obj-$(CONFIG_ACPI_NVDIMM) += nvdimm.o
 common-obj-$(CONFIG_ACPI_VMGENID) += vmgenid.o
 common-obj-$(CONFIG_ACPI_HW_REDUCED) += generic_event_device.o
+common-obj-$(CONFIG_ACPI_HMAT) += hmat.o
 common-obj-$(call lnot,$(CONFIG_ACPI_X86)) += acpi-stub.o
 
 common-obj-y += acpi_interface.o
diff --git a/hw/acpi/hmat.c b/hw/acpi/hmat.c
new file mode 100644
index 00..9ff79308a4
--- /dev/null
+++ b/hw/acpi/hmat.c
@@ -0,0 +1,99 @@
+/*
+ * HMAT ACPI Implementation
+ *
+ * Copyright(C) 2019 Intel Corporation.
+ *
+ * Author:
+ *  Liu jingqi 
+ *  Tao Xu 
+ *
+ * HMAT is defined in ACPI 6.3: 5.2.27 Heterogeneous Memory Attribute Table
+ * (HMAT)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>
+ */
+
+#include "qemu/osdep.h"
+#include "sysemu/numa.h"
+#include "hw/acpi/hmat.h"
+
+/*
+ * ACPI 6.3:
+ * 5.2.27.3 Memory Proximity Domain Attributes Structure: Table 5-145
+ */
+static void build_hmat_mpda(GArray *table_data, uint16_t flags,
+uint32_t initiator, uint32_t mem_node)
+{
+
+/* Memory Proximity Domain Attributes Structure */
+/* Type */
+build_append_int_noprefix(table_data, 0, 2);
+/* Reserved */
+build_append_int_noprefix(table_data, 0, 2);
+/* Length */
+build_append_int_noprefix(table_data, 40, 4);
+/* Flags */
+build_append_int_noprefix(table_data, flags, 2);
+/* Reserved */
+build_append_int_noprefix(table_data, 0, 2);
+/* Proximity Domain for the Attached Initiator */
+build_append_int_noprefix(table_data, initiator, 4);
+/* Proximity Domain for the Memory */
+build_append_int_noprefix(table_data, mem_node, 4);
+/* Reserved */
+build_append_int_noprefix(table_data, 0, 4);
+/*
+ * Reserved:
+ * Previously defined as the Start Address of the System Physical
+ * Address Range. Deprecated since ACPI Spec 6.3.
+ */
+build_append_int_noprefix(table_data, 0, 8);
+/*
+ * Reserved:
+ * Previously defined as the Range Length of the region in bytes.
+ * Deprecated since ACPI Spec 6.3.
+ */
+build_append_int_noprefix(table_data, 0, 8);
+}
+
+/* Bui

[PATCH RESEND v20 0/8] Build ACPI Heterogeneous Memory Attribute Table (HMAT)

2019-12-12 Thread Tao Xu

This series of patches will build Heterogeneous Memory Attribute Table (HMAT)
according to the command line. The ACPI HMAT describes the memory attributes,
such as memory side cache attributes and bandwidth and latency details,
related to the Memory Proximity Domain.
The software is expected to use HMAT information as hint for optimization.

In the linux kernel, the codes in drivers/acpi/hmat/hmat.c parse and report
the platform's HMAT tables.

The V19 patches link:
https://patchwork.kernel.org/cover/11265525/

Changelog:
v20:
- Resend to fix the wrong target in pc_hmat_erange_cfg()
- Use g_assert_true and g_assert_false to replace g_assert
  (Thomas and Markus)
- Rename assoc as associativity, update the QAPI description (Markus)
- Disable cache level 0 in hmat-cache option (Igor)
- Keep base and bitmap unchanged when latency or bandwidth
  out of range
- Fix the broken CI case when user input latency or bandwidth
  less than required.
v19:
- Add description about the machine property 'hmat' in commit
  message (Markus)
- Update the QAPI comments
- Add a check for no memory side cache
- Add some fail cases for hmat-cache when level=0
v18:
- Defer patches 01/14~06/14 of V17, use qapi type uint64 and
  only nanosecond for latency (Markus)
- Rewrite the lines over 80 characters(Igor)
v17:
- Add check when user input latency or bandwidth 0, the
  lb_info_provided should also be 0. Because in ACPI 6.3 5.2.27.4,
  0 means the corresponding latency or bandwidth information is
  not provided.
- Fix the infinite loop when node->latency is 0.
- Use NumaHmatCacheOptions to replace HMAT_Cache_Info (Igor)
- Add check for unordered cache level input (Igor)
- Add some fail test cases (Igor)
v16:
- Add and use qemu_strtold_finite to parse size, support full
  64bit precision, modify related test cases (Eduardo and Markus)
- Simplify struct HMAT_LB_Info and related code, unify latency
  and bandwidth (Igor)
- Add cross check with hmat_lb data (Igor)
- Fields in Cache Attributes are promoted to uint32_t before
  shifting (Igor)
- Add case for QMP build HMAT (Igor)
v15:
- Add a new patch to refactor do_strtosz() (Eduardo)
- Make tests without breaking CI (Michael)
v14:
- Reuse the codes of do_strtosz to build qemu_strtotime_ns
  (Eduardo)
- Squash patch v13 01/12 and 02/12 together (Daniel and Eduardo)
- Drop time unit picosecond (Eric)
- Use qemu ctz64 and clz64 instead of builtin function
v13:
- Modify some text description
- Drop "initiator_valid" field in struct NodeInfo
- Reuse Garray to store the raw bandwidth and bandwidth data
- Calculate common base unit using range bitmap
- Add a patch to alculate hmat latency and bandwidth entry list
- Drop the total_levels option and use readable cache size
- Remove the unnecessary head file
- Use decimal notation with appropriate suffix for cache size

Liu Jingqi (5):
  numa: Extend CLI to provide memory latency and bandwidth information
  numa: Extend CLI to provide memory side cache information
  hmat acpi: Build Memory Proximity Domain Attributes Structure(s)
  hmat acpi: Build System Locality Latency and Bandwidth Information
Structure(s)
  hmat acpi: Build Memory Side Cache Information Structure(s)

Tao Xu (3):
  numa: Extend CLI to provide initiator information for numa nodes
  tests/numa: Add case for QMP build HMAT
  tests/bios-tables-test: add test cases for ACPI HMAT

 hw/acpi/Kconfig   |   7 +-
 hw/acpi/Makefile.objs |   1 +
 hw/acpi/hmat.c| 268 +++
 hw/acpi/hmat.h|  42 
 hw/core/machine.c |  64 ++
 hw/core/numa.c| 297 ++
 hw/i386/acpi-build.c  |   5 +
 include/sysemu/numa.h |  63 ++
 qapi/machine.json | 180 +++-
 qemu-options.hx   |  95 +++-
 tests/bios-tables-test-allowed-diff.h |   8 +
 tests/bios-tables-test.c  |  44 
 tests/data/acpi/pc/APIC.acpihmat  |   0
 tests/data/acpi/pc/DSDT.acpihmat  |   0
 tests/data/acpi/pc/HMAT.acpihmat  |   0
 tests/data/acpi/pc/SRAT.acpihmat  |   0
 tests/data/acpi/q35/APIC.acpihmat |   0
 tests/data/acpi/q35/DSDT.acpihmat |   0
 tests/data/acpi/q35/HMAT.acpihmat |   0
 tests/data/acpi/q35/SRAT.acpihmat |   0
 tests/numa-test.c | 213 ++
 21 files changed, 1276 insertions(+), 11 deletions(-)
 create mode 100644 hw/acpi/hmat.c
 create mode 100644 hw/acpi/hmat.h
 create mode 100644 tests/data/acpi/pc/APIC.acpihmat
 create mode 100644 tests/data/acpi/pc/DSDT.acpihmat
 create mode 100644 tests/data/acpi/pc/HMAT.acpihmat
 create mode 100644 tests/data/acpi/pc/SRAT.acpihmat
 creat

[PATCH RESEND v2] util/cutils: Expand do_strtosz parsing precision to 64 bits

2019-12-09 Thread Tao Xu

Parse input string both as a double and as a uint64_t, then use the
method which consumes more characters. Update the related test cases.

Signed-off-by: Tao Xu 
---

Changes in v2:
- Resend to use double small than DBL_MIN
- Add more test case for double overflow and underflow.
- Set mul as int64_t (Markus)
- Restore endptr (Markus)
---
 tests/test-cutils.c| 37 +++
 tests/test-keyval.c| 47 +
 tests/test-qemu-opts.c | 39 +---
 util/cutils.c  | 67 +++---
 4 files changed, 75 insertions(+), 115 deletions(-)

diff --git a/tests/test-cutils.c b/tests/test-cutils.c
index 1aa8351520..49e495b8ba 100644
--- a/tests/test-cutils.c
+++ b/tests/test-cutils.c
@@ -1970,40 +1970,25 @@ static void test_qemu_strtosz_simple(void)
 g_assert_cmpint(err, ==, 0);
 g_assert_cmpint(res, ==, 12345);
 
-/* Note: precision is 53 bits since we're parsing with strtod() */
-
-str = "9007199254740991"; /* 2^53-1 */
-err = qemu_strtosz(str, , );
-g_assert_cmpint(err, ==, 0);
-g_assert_cmpint(res, ==, 0x1f);
-g_assert(endptr == str + 16);
-
-str = "9007199254740992"; /* 2^53 */
-err = qemu_strtosz(str, , );
-g_assert_cmpint(err, ==, 0);
-g_assert_cmpint(res, ==, 0x20);
-g_assert(endptr == str + 16);
+/* Note: precision is 64 bits (UINT64_MAX) */
 
 str = "9007199254740993"; /* 2^53+1 */
 err = qemu_strtosz(str, , );
 g_assert_cmpint(err, ==, 0);
-g_assert_cmpint(res, ==, 0x20); /* rounded to 53 bits */
+g_assert_cmpint(res, ==, 0x21);
 g_assert(endptr == str + 16);
 
-str = "18446744073709549568"; /* 0xf800 (53 msbs set) */
+str = "18446744073709550591"; /* 0xfbff */
 err = qemu_strtosz(str, , );
 g_assert_cmpint(err, ==, 0);
-g_assert_cmpint(res, ==, 0xf800);
+g_assert_cmpint(res, ==, 0xfbff);
 g_assert(endptr == str + 20);
 
-str = "18446744073709550591"; /* 0xfbff */
+str = "18446744073709551615"; /* 2^64-1 (UINT64_MAX) */
 err = qemu_strtosz(str, , );
 g_assert_cmpint(err, ==, 0);
-g_assert_cmpint(res, ==, 0xf800); /* rounded to 53 bits */
+g_assert_cmpint(res, ==, 0x);
 g_assert(endptr == str + 20);
-
-/* 0x7e00..0x7fff get rounded to
- * 0x8000, thus -ERANGE; see test_qemu_strtosz_erange() */
 }
 
 static void test_qemu_strtosz_units(void)
@@ -2145,20 +2130,20 @@ static void test_qemu_strtosz_erange(void)
 g_assert_cmpint(err, ==, -ERANGE);
 g_assert(endptr == str + 2);
 
-str = "18446744073709550592"; /* 0xfc00 */
+str = "18446744073709551616"; /* 2^64 */
 err = qemu_strtosz(str, , );
 g_assert_cmpint(err, ==, -ERANGE);
 g_assert(endptr == str + 20);
 
-str = "18446744073709551615"; /* 2^64-1 */
+str = "1.7976931348623158e+308"; /* DBL_MAX, double overflows */
 err = qemu_strtosz(str, , );
 g_assert_cmpint(err, ==, -ERANGE);
-g_assert(endptr == str + 20);
+g_assert(endptr == str + 23);
 
-str = "18446744073709551616"; /* 2^64 */
+str = "2.225e-308"; /* Small than DBL_MIN, double underflows */
 err = qemu_strtosz(str, , );
 g_assert_cmpint(err, ==, -ERANGE);
-g_assert(endptr == str + 20);
+g_assert(endptr == str + 10);
 
 str = "20E";
 err = qemu_strtosz(str, , );
diff --git a/tests/test-keyval.c b/tests/test-keyval.c
index 09b0ae3c68..fad941fcb8 100644
--- a/tests/test-keyval.c
+++ b/tests/test-keyval.c
@@ -383,59 +383,26 @@ static void test_keyval_visit_size(void)
 visit_end_struct(v, NULL);
 visit_free(v);
 
-/* Note: precision is 53 bits since we're parsing with strtod() */
+/* Note: precision is 64 bits (UINT64_MAX) */
 
-/* Around limit of precision: 2^53-1, 2^53, 2^53+1 */
-qdict = keyval_parse("sz1=9007199254740991,"
- "sz2=9007199254740992,"
- "sz3=9007199254740993",
+/* Around limit of precision: UINT64_MAX - 1, UINT64_MAX */
+qdict = keyval_parse("sz1=18446744073709551614,"
+ "sz2=18446744073709551615",
  NULL, _abort);
 v = qobject_input_visitor_new_keyval(QOBJECT(qdict));
 qobject_unref(qdict);
 visit_start_struct(v, NULL, NULL, 0, _abort);
 visit_type_size(v, "sz1", , _abort);
-g_assert_cmphex(sz, ==, 0x1f);
+g_assert_cmphex(sz, ==, 0xfffe);
 visit_type_size(v, "sz2", , _abort);
-g_assert_cmphex(sz, ==, 0x20);
-visit_type_size(v, "sz3", , _abort);
-g_assert_cmphex

[PATCH v2 4/4] target/i386: Add notes for versioned CPU models

2019-12-08 Thread Tao Xu

Add which features are added or removed in this version. Remove the
changed model-id in versioned CPU models.

Signed-off-by: Tao Xu 
---

Changes in v2:
- correct the note of Cascadelake v3 (Xiaoyao)
---
 target/i386/cpu.c | 50 +++
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 7b3bd6d4db..4717862cee 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -2281,10 +2281,9 @@ static X86CPUDefinition builtin_x86_defs[] = {
 {
 .version = 2,
 .alias = "Nehalem-IBRS",
+.note = "IBRS",
 .props = (PropValue[]) {
 { "spec-ctrl", "on" },
-{ "model-id",
-  "Intel Core i7 9xx (Nehalem Core i7, IBRS update)" },
 { /* end of list */ }
 }
 },
@@ -2362,10 +2361,9 @@ static X86CPUDefinition builtin_x86_defs[] = {
 {
 .version = 2,
 .alias = "Westmere-IBRS",
+.note = "IBRS",
 .props = (PropValue[]) {
 { "spec-ctrl", "on" },
-{ "model-id",
-  "Westmere E56xx/L56xx/X56xx (IBRS update)" },
 { /* end of list */ }
 }
 },
@@ -2448,10 +2446,9 @@ static X86CPUDefinition builtin_x86_defs[] = {
 {
 .version = 2,
 .alias = "SandyBridge-IBRS",
+.note = "IBRS",
 .props = (PropValue[]) {
 { "spec-ctrl", "on" },
-{ "model-id",
-  "Intel Xeon E312xx (Sandy Bridge, IBRS update)" },
 { /* end of list */ }
 }
 },
@@ -2540,10 +2537,9 @@ static X86CPUDefinition builtin_x86_defs[] = {
 {
 .version = 2,
 .alias = "IvyBridge-IBRS",
+.note = "IBRS",
 .props = (PropValue[]) {
 { "spec-ctrl", "on" },
-{ "model-id",
-  "Intel Xeon E3-12xx v2 (Ivy Bridge, IBRS)" },
 { /* end of list */ }
 }
 },
@@ -2637,17 +2633,18 @@ static X86CPUDefinition builtin_x86_defs[] = {
 {
 .version = 2,
 .alias = "Haswell-noTSX",
+.note = "no TSX",
 .props = (PropValue[]) {
 { "hle", "off" },
 { "rtm", "off" },
 { "stepping", "1" },
-{ "model-id", "Intel Core Processor (Haswell, no TSX)", },
 { /* end of list */ }
 },
 },
 {
 .version = 3,
 .alias = "Haswell-IBRS",
+.note = "IBRS",
 .props = (PropValue[]) {
 /* Restore TSX features removed by -v2 above */
 { "hle", "on" },
@@ -2658,21 +2655,18 @@ static X86CPUDefinition builtin_x86_defs[] = {
  */
 { "stepping", "4" },
 { "spec-ctrl", "on" },
-{ "model-id",
-  "Intel Core Processor (Haswell, IBRS)" },
 { /* end of list */ }
 }
 },
 {
 .version = 4,
 .alias = "Haswell-noTSX-IBRS",
+.note = "no TSX, IBRS",
 .props = (PropValue[]) {
 { "hle", "off" },
 { "rtm", "off" },
 /* spec-ctrl was already enabled by -v3 above */
 { "stepping", "1" },
-{ "model-id",
-  "Intel Core Processor (Haswell, no TSX, IBRS)" },
 { /* end of list */ }
 }
 },
@@ -2768,35 +2762,33 @@ static X86CPUDefinition builtin_x86_defs[] = {
 {
 .version = 2,
 .alias = "Broadwell-noTSX",
+.note = "no TSX",
 .props = (PropValue[]) {
 { "hle", "off" },
 { "rtm", "off" },
-{ "model-id", "Inte

[PATCH v2 1/4] target/i386: Add Denverton-v2 (no MPX) CPU model

2019-12-08 Thread Tao Xu

Because MPX is being removed from the linux kernel, remove MPX feature
from Denverton.

Signed-off-by: Tao Xu 
---
 target/i386/cpu.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 69f518a21a..06a3077f95 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -3482,6 +3482,18 @@ static X86CPUDefinition builtin_x86_defs[] = {
 .features[FEAT_VMX_VMFUNC] = MSR_VMX_VMFUNC_EPT_SWITCHING,
 .xlevel = 0x8008,
 .model_id = "Intel Atom Processor (Denverton)",
+.versions = (X86CPUVersionDefinition[]) {
+{ .version = 1 },
+{
+.version = 2,
+.props = (PropValue[]) {
+{ "monitor", "off" },
+{ "mpx", "off" },
+{ /* end of list */ },
+},
+},
+{ /* end of list */ },
+},
 },
 {
 .name = "Snowridge",
-- 
2.20.1

[PATCH v2 0/4] Add extra information to versioned CPU models

2019-12-08 Thread Tao Xu

This series of patches will remove MPX from Denverton, remove Remove
monitor from some CPU models. Add additional information for -cpu help
to indicate the changes in this version of CPU model.

The output is as follows:
./x86_64-softmmu/qemu-system-x86_64 -cpu help | grep "\["
x86 Broadwell-v2  Intel Core Processor (Broadwell) [no TSX] 

x86 Broadwell-v3  Intel Core Processor (Broadwell) [IBRS]   

x86 Broadwell-v4  Intel Core Processor (Broadwell) [no TSX, IBRS]   

x86 Cascadelake-Server-v2  Intel Xeon Processor (Cascadelake) 
[ARCH_CAPABILITIES]
x86 Cascadelake-Server-v3  Intel Xeon Processor (Cascadelake) 
[ARCH_CAPABILITIES, no TSX]
x86 Denverton-v2  Intel Atom Processor (Denverton) [no MPX, no MONITOR] 

x86 Dhyana-v2 Hygon Dhyana Processor [no MONITOR]   

x86 EPYC-v2   AMD EPYC Processor [IBPB] 

x86 EPYC-v3   AMD EPYC Processor [IBPB, no MONITOR] 

x86 Haswell-v2Intel Core Processor (Haswell) [no TSX]   

x86 Haswell-v3Intel Core Processor (Haswell) [IBRS] 

x86 Haswell-v4Intel Core Processor (Haswell) [no TSX, IBRS] 

x86 Icelake-Client-v2 Intel Core Processor (Icelake) [no TSX]   

x86 Icelake-Server-v2 Intel Xeon Processor (Icelake) [no TSX]   

x86 IvyBridge-v2  Intel Xeon E3-12xx v2 (Ivy Bridge) [IBRS] 

x86 Nehalem-v2Intel Core i7 9xx (Nehalem Class Core i7) [IBRS]  

x86 Opteron_G3-v2 AMD Opteron 23xx (Gen 3 Class Opteron) [no MONITOR]   

x86 SandyBridge-v2Intel Xeon E312xx (Sandy Bridge) [IBRS]   

x86 Skylake-Client-v2 Intel Core Processor (Skylake) [IBRS] 

x86 Skylake-Client-v3 Intel Core Processor (Skylake) [no TSX, IBRS] 

x86 Skylake-Server-v2 Intel Xeon Processor (Skylake) [IBRS] 

x86 Skylake-Server-v3 Intel Xeon Processor (Skylake) [no TSX, IBRS] 

x86 Snowridge-v2  Intel Atom Processor (SnowRidge) [no MPX] 

x86 Snowridge-v3  Intel Atom Processor (SnowRidge) [no MPX, no MONITOR] 

x86 Westmere-v2   Westmere E56xx/L56xx/X56xx (Nehalem-C) [IBRS]

Changes in v2:
- correct the note of Cascadelake v3 (Xiaoyao)

Tao Xu (4):
  target/i386: Add Denverton-v2 (no MPX) CPU model
  target/i386: Remove monitor from some CPU models
  target/i386: Add new property note to versioned CPU models
  target/i386: Add notes for versioned CPU models

 target/i386/cpu.c | 112 +++---
 1 file changed, 85 insertions(+), 27 deletions(-)

-- 
2.20.1

[PATCH v2 2/4] target/i386: Remove monitor from some CPU models

2019-12-08 Thread Tao Xu

Add new version of Snowridge, Denverton, Opteron_G3, EPYC, and Dhyana
CPU model to remove MONITOR/MWAIT feature.

After QEMU/KVM use "-overcommit cpu-pm=on" to expose MONITOR/MWAIT
(commit id 6f131f13e68d648a8e4f083c667ab1acd88ce4cd), the MONITOR/MWAIT
feature in these CPU model is unused.

Signed-off-by: Tao Xu 
---
 target/i386/cpu.c | 38 ++
 1 file changed, 38 insertions(+)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 06a3077f95..b09ac38409 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -3621,6 +3621,14 @@ static X86CPUDefinition builtin_x86_defs[] = {
 { /* end of list */ },
 },
 },
+{
+.version = 3,
+.props = (PropValue[]) {
+/* mpx was already removed by -v2 above */
+{ "monitor", "off" },
+{ /* end of list */ },
+},
+},
 { /* end of list */ },
 },
 },
@@ -3732,6 +3740,17 @@ static X86CPUDefinition builtin_x86_defs[] = {
 CPUID_EXT3_ABM | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM,
 .xlevel = 0x8008,
 .model_id = "AMD Opteron 23xx (Gen 3 Class Opteron)",
+.versions = (X86CPUVersionDefinition[]) {
+{ .version = 1 },
+{
+.version = 2,
+.props = (PropValue[]) {
+{ "monitor", "off" },
+{ /* end of list */ },
+},
+},
+{ /* end of list */ },
+},
 },
 {
 .name = "Opteron_G4",
@@ -3856,6 +3875,14 @@ static X86CPUDefinition builtin_x86_defs[] = {
 { /* end of list */ }
 }
 },
+{
+.version = 3,
+.props = (PropValue[]) {
+/* ibpb was already enabled by -v2 above */
+{ "monitor", "off" },
+{ /* end of list */ },
+},
+},
 { /* end of list */ }
 }
 },
@@ -3908,6 +3935,17 @@ static X86CPUDefinition builtin_x86_defs[] = {
 .xlevel = 0x801E,
 .model_id = "Hygon Dhyana Processor",
 .cache_info = _cache_info,
+.versions = (X86CPUVersionDefinition[]) {
+{ .version = 1 },
+{
+.version = 2,
+.props = (PropValue[]) {
+{ "monitor", "off" },
+{ /* end of list */ },
+},
+},
+{ /* end of list */ },
+},
 },
 };
 
-- 
2.20.1

[PATCH v2 3/4] target/i386: Add new property note to versioned CPU models

2019-12-08 Thread Tao Xu

Add additional information for -cpu help to indicate the changes in this
version of CPU model.

Suggested-by: Eduardo Habkost 
Signed-off-by: Tao Xu 
---
 target/i386/cpu.c | 12 ++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index b09ac38409..7b3bd6d4db 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -1693,6 +1693,7 @@ typedef struct PropValue {
 typedef struct X86CPUVersionDefinition {
 X86CPUVersion version;
 const char *alias;
+const char *note;
 PropValue *props;
 } X86CPUVersionDefinition;
 
@@ -1723,6 +1724,7 @@ struct X86CPUModel {
 X86CPUDefinition *cpudef;
 /* CPU model version */
 X86CPUVersion version;
+const char *note;
 /*
  * If true, this is an alias CPU model.
  * This matters only for "-cpu help" and query-cpu-definitions
@@ -4788,6 +4790,7 @@ static void x86_cpu_list_entry(gpointer data, gpointer 
user_data)
 char *name = x86_cpu_class_get_model_name(cc);
 char *desc = g_strdup(cc->model_description);
 char *alias_of = x86_cpu_class_get_alias_of(cc);
+char *model_id = x86_cpu_class_get_model_id(cc);
 
 if (!desc && alias_of) {
 if (cc->model && cc->model->version == CPU_VERSION_AUTO) {
@@ -4796,14 +4799,18 @@ static void x86_cpu_list_entry(gpointer data, gpointer 
user_data)
 desc = g_strdup_printf("(alias of %s)", alias_of);
 }
 }
+if (!desc && cc->model && cc->model->note) {
+desc = g_strdup_printf("%s [%s]", model_id, cc->model->note);
+}
 if (!desc) {
-desc = x86_cpu_class_get_model_id(cc);
+desc = g_strdup_printf("%s", model_id);
 }
 
-qemu_printf("x86 %-20s  %-48s\n", name, desc);
+qemu_printf("x86 %-20s  %-58s\n", name, desc);
 g_free(name);
 g_free(desc);
 g_free(alias_of);
+g_free(model_id);
 }
 
 /* list available CPU models and flags */
@@ -5280,6 +5287,7 @@ static void x86_register_cpudef_types(X86CPUDefinition 
*def)
 X86CPUModel *m = g_new0(X86CPUModel, 1);
 m->cpudef = def;
 m->version = vdef->version;
+m->note = vdef->note;
 name = x86_cpu_versioned_model_name(def, vdef->version);
 x86_register_cpu_model_type(name, m);
 g_free(name);
-- 
2.20.1

[PATCH v2] util/cutils: Expand do_strtosz parsing precision to 64 bits

2019-12-08 Thread Tao Xu

Parse input string both as a double and as a uint64_t, then use the
method which consumes more characters. Update the related test cases.

Signed-off-by: Tao Xu 
---

Changes in v2:
- Add more test case for double overflow and underflow.
- Set mul as int64_t (Markus)
- Restore endptr (Markus)
---
 tests/test-cutils.c| 37 +++
 tests/test-keyval.c| 47 +
 tests/test-qemu-opts.c | 39 +---
 util/cutils.c  | 67 +++---
 4 files changed, 75 insertions(+), 115 deletions(-)

diff --git a/tests/test-cutils.c b/tests/test-cutils.c
index 1aa8351520..6fa9f88488 100644
--- a/tests/test-cutils.c
+++ b/tests/test-cutils.c
@@ -1970,40 +1970,25 @@ static void test_qemu_strtosz_simple(void)
 g_assert_cmpint(err, ==, 0);
 g_assert_cmpint(res, ==, 12345);
 
-/* Note: precision is 53 bits since we're parsing with strtod() */
-
-str = "9007199254740991"; /* 2^53-1 */
-err = qemu_strtosz(str, , );
-g_assert_cmpint(err, ==, 0);
-g_assert_cmpint(res, ==, 0x1f);
-g_assert(endptr == str + 16);
-
-str = "9007199254740992"; /* 2^53 */
-err = qemu_strtosz(str, , );
-g_assert_cmpint(err, ==, 0);
-g_assert_cmpint(res, ==, 0x20);
-g_assert(endptr == str + 16);
+/* Note: precision is 64 bits (UINT64_MAX) */
 
 str = "9007199254740993"; /* 2^53+1 */
 err = qemu_strtosz(str, , );
 g_assert_cmpint(err, ==, 0);
-g_assert_cmpint(res, ==, 0x20); /* rounded to 53 bits */
+g_assert_cmpint(res, ==, 0x21);
 g_assert(endptr == str + 16);
 
-str = "18446744073709549568"; /* 0xf800 (53 msbs set) */
+str = "18446744073709550591"; /* 0xfbff */
 err = qemu_strtosz(str, , );
 g_assert_cmpint(err, ==, 0);
-g_assert_cmpint(res, ==, 0xf800);
+g_assert_cmpint(res, ==, 0xfbff);
 g_assert(endptr == str + 20);
 
-str = "18446744073709550591"; /* 0xfbff */
+str = "18446744073709551615"; /* 2^64-1 (UINT64_MAX) */
 err = qemu_strtosz(str, , );
 g_assert_cmpint(err, ==, 0);
-g_assert_cmpint(res, ==, 0xf800); /* rounded to 53 bits */
+g_assert_cmpint(res, ==, 0x);
 g_assert(endptr == str + 20);
-
-/* 0x7e00..0x7fff get rounded to
- * 0x8000, thus -ERANGE; see test_qemu_strtosz_erange() */
 }
 
 static void test_qemu_strtosz_units(void)
@@ -2145,20 +2130,20 @@ static void test_qemu_strtosz_erange(void)
 g_assert_cmpint(err, ==, -ERANGE);
 g_assert(endptr == str + 2);
 
-str = "18446744073709550592"; /* 0xfc00 */
+str = "18446744073709551616"; /* 2^64 */
 err = qemu_strtosz(str, , );
 g_assert_cmpint(err, ==, -ERANGE);
 g_assert(endptr == str + 20);
 
-str = "18446744073709551615"; /* 2^64-1 */
+str = "1.7976931348623158e+308"; /* DBL_MAX */
 err = qemu_strtosz(str, , );
 g_assert_cmpint(err, ==, -ERANGE);
-g_assert(endptr == str + 20);
+g_assert(endptr == str + 23);
 
-str = "18446744073709551616"; /* 2^64 */
+str = "2.2250738585072014e-308"; /* DBL_MIN */
 err = qemu_strtosz(str, , );
 g_assert_cmpint(err, ==, -ERANGE);
-g_assert(endptr == str + 20);
+g_assert(endptr == str + 23);
 
 str = "20E";
 err = qemu_strtosz(str, , );
diff --git a/tests/test-keyval.c b/tests/test-keyval.c
index 09b0ae3c68..fad941fcb8 100644
--- a/tests/test-keyval.c
+++ b/tests/test-keyval.c
@@ -383,59 +383,26 @@ static void test_keyval_visit_size(void)
 visit_end_struct(v, NULL);
 visit_free(v);
 
-/* Note: precision is 53 bits since we're parsing with strtod() */
+/* Note: precision is 64 bits (UINT64_MAX) */
 
-/* Around limit of precision: 2^53-1, 2^53, 2^53+1 */
-qdict = keyval_parse("sz1=9007199254740991,"
- "sz2=9007199254740992,"
- "sz3=9007199254740993",
+/* Around limit of precision: UINT64_MAX - 1, UINT64_MAX */
+qdict = keyval_parse("sz1=18446744073709551614,"
+ "sz2=18446744073709551615",
  NULL, _abort);
 v = qobject_input_visitor_new_keyval(QOBJECT(qdict));
 qobject_unref(qdict);
 visit_start_struct(v, NULL, NULL, 0, _abort);
 visit_type_size(v, "sz1", , _abort);
-g_assert_cmphex(sz, ==, 0x1f);
+g_assert_cmphex(sz, ==, 0xfffe);
 visit_type_size(v, "sz2", , _abort);
-g_assert_cmphex(sz, ==, 0x20);
-visit_type_size(v, "sz3", , _abort);
-g_assert_cmphex(sz, ==, 0x20);
-visit_check_struct(v, _abort);
-visit_e

Re: [PATCH] util/cutils: Expand do_strtosz parsing precision to 64 bits

2019-12-08 Thread Tao Xu





On 12/5/19 11:29 PM, Markus Armbruster wrote:

Tao Xu  writes:


Parse input string both as a double and as a uint64_t, then use the
method which consumes more characters. Update the related test cases.

Signed-off-by: Tao Xu 
---

[...]

diff --git a/util/cutils.c b/util/cutils.c
index 77acadc70a..b08058c57c 100644
--- a/util/cutils.c
+++ b/util/cutils.c
@@ -212,24 +212,43 @@ static int do_strtosz(const char *nptr, const char **end,
const char default_suffix, int64_t unit,
uint64_t *result)
  {
-int retval;
-const char *endptr;
+int retval, retd, retu;
+const char *suffix, *suffixd, *suffixu;
  unsigned char c;
  int mul_required = 0;
-double val, mul, integral, fraction;
+bool use_strtod;
+uint64_t valu;
+double vald, mul, integral, fraction;


Note for later: @mul is double.


+
+retd = qemu_strtod_finite(nptr, , );
+retu = qemu_strtou64(nptr, , 0, );
+use_strtod = strlen(suffixd) < strlen(suffixu);
+
+/*
+ * Parse @nptr both as a double and as a uint64_t, then use the method
+ * which consumes more characters.
+ */


The comment is in a funny place.  I'd put it right before the
qemu_strtod_finite() line.


+if (use_strtod) {
+suffix = suffixd;
+retval = retd;
+} else {
+suffix = suffixu;
+retval = retu;
+}
  
-retval = qemu_strtod_finite(nptr, , );

  if (retval) {
  goto out;
  }


This is even more subtle than it looks.

A close reading of the function contracts leads to three cases for each
conversion:

* parse error (including infinity and NaN)

   @retu / @retd is -EINVAL
   @valu / @vald is uninitialized
   @suffixu / @suffixd is @nptr

* range error

   @retu / @retd is -ERANGE
   @valu / @vald is our best approximation of the conversion result
   @suffixu / @suffixd points to the first character not consumed by the
   conversion.

   Sub-cases:

   - uint64_t overflow

 We know the conversion result exceeds UINT64_MAX.

   - double overflow

 we know the conversion result's magnitude exceeds the largest
 representable finite double DBL_MAX.

   - double underflow

 we know the conversion result is close to zero (closer than DBL_MIN,
 the smallest normalized positive double).

* success

   @retu / @retd is 0
   @valu / @vald is the conversion result
   @suffixu / @suffixd points to the first character not consumed by the
   conversion.

This leads to a matrix (parse error, uint64_t overflow, success) x
(parse error, double overflow, double underflow, success).  We need to
check the code does what we want for each element of this matrix, and
document any behavior that's not perfectly obvious.

(success, success): we pick uint64_t if qemu_strtou64() consumed more
characters than qemu_strtod_finite(), else double.  "More" is important
here; when they consume the same characters, we *need* to use the
uint64_t result.  Example: for "18446744073709551615", we need to use
uint64_t 18446744073709551615, not double 18446744073709551616.0.  But
for "18446744073709551616.", we need to use the double.  Good.

(success, parse error) and (parse error, success): we pick the one that
succeeds, because success consumes characters, and failure to parse does
not.  Good.

(parse error, parse error): neither consumes characters, so we pick
uint64_t.  Good.

(parse error, double overflow), (parse error, double underflow) and
(uint64_t overflow, parse error): we pick the range error, because it
consumes characters.  Good.

These are the simple combinations.  The remainder are hairier: (success,
double overflow), (success, double underflow), (uint64_t overflow,
success).  I lack the time to analyze them today.  Must be done before
we take this patch.  Any takers?


(success, double overflow), (success, double underflow), pick double 
overflow error, return -ERANGE. Because it consumes characters. Example: 
for "1.79769e+309", qemu_strtou64 consumes "1", and prases as uint64_t; 
but qemu_strtod_finite return -ERANGE and consumes all characters. It is OK.


(uint64_t overflow, success), consume the same characters, use the
uint64_t return -ERANGE. Note that even if qemu_strtod_finite can parse 
these cases such as "18446744073709551617", but the result is uint64_t 
so we also need to return -ERANGE. It is OK.


Thank you for your analysis and suggestion. I will add more test cases 
to cover some of these analysis.



-fraction = modf(val, );
-if (fraction != 0) {
-mul_required = 1;
+if (use_strtod) {
+fraction = modf(vald, );
+if (fraction != 0) {
+mul_required = 1;
+}
  }


Here, @suffix points to the suffix character, if any.


-c = *endptr;
+c = *suffix;
  mul = suffix_mul(c, unit);
  if (mul >= 0) {
-endptr++;
+suffix++;


Now @suffix points to the first

Re: [PATCH RESEND 4/4] target/i386: Add notes for versioned CPU models

2019-12-08 Thread Tao Xu


On 12/5/2019 4:44 PM, Xiaoyao Li wrote:

On 12/2/2019 2:32 PM, Tao Xu wrote:

Add which features are added or removed in this version. Remove the
changed model-id in versioned CPU models.

Signed-off-by: Tao Xu 
---
   target/i386/cpu.c | 50 +++
   1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 7b3bd6d4db..c82fbfd02e 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c


[...]


@@ -3141,6 +3133,7 @@ static X86CPUDefinition builtin_x86_defs[] = {
   .versions = (X86CPUVersionDefinition[]) {
   { .version = 1 },
   { .version = 2,
+  .note = "ARCH_CAPABILITIES",


Here ARCH_CAPABILITIES doesn't tell what bits in
MSR_IA32_ARCH_CAPABILITIES this version has, which makes it meaningless.

Maybe
 .note = "ARCH_CAPABLITIES(rdctl-no, ibrs-all, skip-l1dfl-vmentry, 
mds-no)",

is better?



But it is too long for -cpu help, break the info into 2 lines.

Re: [PATCH RESEND 0/4] Add extra information to versioned CPU models

2019-12-08 Thread Tao Xu


On 12/5/2019 4:55 PM, Xiaoyao Li wrote:

On 12/2/2019 2:32 PM, Tao Xu wrote:

This series of patches will remove MPX from Denverton, remove Remove
monitor from some CPU models. Add additional information for -cpu help
to indicate the changes in this version of CPU model.

The output is as follows:
x86_64-softmmu/qemu-system-x86_64 -cpu help | grep "\["
x86 Broadwell-v2  Intel Core Processor (Broadwell) [no TSX]
x86 Broadwell-v3  Intel Core Processor (Broadwell) [IBRS]
x86 Broadwell-v4  Intel Core Processor (Broadwell) [no TSX, IBRS]


Above the changes of each Broadwell-v{2,3,4} are based on Broadwell-v1.


x86 Cascadelake-Server-v2  Intel Xeon Processor (Cascadelake) 
[ARCH_CAPABILITIES]
x86 Cascadelake-Server-v3  Intel Xeon Processor (Cascadelake) [no TSX]


But in the code, Cascadelake-Server-v3 inherits all the features in
Cascadelake-Server-v2 and removes TSX related hle & rtm.

So if we keep the same rule based on v1, it should be

   x86 Cascadelake-Server-v3  Intel Xeon Processor (Cascadelake)
[ARCH_CAPABILITIES, no TSX]


Thank you for your suggestion. I will correct this.

[PATCH] util/cutils: Expand do_strtosz parsing precision to 64 bits

2019-12-04 Thread Tao Xu

Parse input string both as a double and as a uint64_t, then use the
method which consumes more characters. Update the related test cases.

Signed-off-by: Tao Xu 
---
 tests/test-cutils.c| 37 -
 tests/test-keyval.c| 47 ---
 tests/test-qemu-opts.c | 39 --
 util/cutils.c  | 74 ++
 4 files changed, 73 insertions(+), 124 deletions(-)

diff --git a/tests/test-cutils.c b/tests/test-cutils.c
index 1aa8351520..4a7030c611 100644
--- a/tests/test-cutils.c
+++ b/tests/test-cutils.c
@@ -1970,40 +1970,25 @@ static void test_qemu_strtosz_simple(void)
 g_assert_cmpint(err, ==, 0);
 g_assert_cmpint(res, ==, 12345);
 
-/* Note: precision is 53 bits since we're parsing with strtod() */
-
-str = "9007199254740991"; /* 2^53-1 */
-err = qemu_strtosz(str, , );
-g_assert_cmpint(err, ==, 0);
-g_assert_cmpint(res, ==, 0x1f);
-g_assert(endptr == str + 16);
-
-str = "9007199254740992"; /* 2^53 */
-err = qemu_strtosz(str, , );
-g_assert_cmpint(err, ==, 0);
-g_assert_cmpint(res, ==, 0x20);
-g_assert(endptr == str + 16);
+/* Note: precision is 64 bits (UINT64_MAX) */
 
 str = "9007199254740993"; /* 2^53+1 */
 err = qemu_strtosz(str, , );
 g_assert_cmpint(err, ==, 0);
-g_assert_cmpint(res, ==, 0x20); /* rounded to 53 bits */
+g_assert_cmpint(res, ==, 0x21);
 g_assert(endptr == str + 16);
 
-str = "18446744073709549568"; /* 0xf800 (53 msbs set) */
+str = "18446744073709550591"; /* 0xfbff */
 err = qemu_strtosz(str, , );
 g_assert_cmpint(err, ==, 0);
-g_assert_cmpint(res, ==, 0xf800);
+g_assert_cmpint(res, ==, 0xfbff);
 g_assert(endptr == str + 20);
 
-str = "18446744073709550591"; /* 0xfbff */
+str = "18446744073709551615"; /* 2^64-1 (UINT64_MAX) */
 err = qemu_strtosz(str, , );
 g_assert_cmpint(err, ==, 0);
-g_assert_cmpint(res, ==, 0xf800); /* rounded to 53 bits */
+g_assert_cmpint(res, ==, 0x);
 g_assert(endptr == str + 20);
-
-/* 0x7e00..0x7fff get rounded to
- * 0x8000, thus -ERANGE; see test_qemu_strtosz_erange() */
 }
 
 static void test_qemu_strtosz_units(void)
@@ -2145,16 +2130,6 @@ static void test_qemu_strtosz_erange(void)
 g_assert_cmpint(err, ==, -ERANGE);
 g_assert(endptr == str + 2);
 
-str = "18446744073709550592"; /* 0xfc00 */
-err = qemu_strtosz(str, , );
-g_assert_cmpint(err, ==, -ERANGE);
-g_assert(endptr == str + 20);
-
-str = "18446744073709551615"; /* 2^64-1 */
-err = qemu_strtosz(str, , );
-g_assert_cmpint(err, ==, -ERANGE);
-g_assert(endptr == str + 20);
-
 str = "18446744073709551616"; /* 2^64 */
 err = qemu_strtosz(str, , );
 g_assert_cmpint(err, ==, -ERANGE);
diff --git a/tests/test-keyval.c b/tests/test-keyval.c
index 09b0ae3c68..fad941fcb8 100644
--- a/tests/test-keyval.c
+++ b/tests/test-keyval.c
@@ -383,59 +383,26 @@ static void test_keyval_visit_size(void)
 visit_end_struct(v, NULL);
 visit_free(v);
 
-/* Note: precision is 53 bits since we're parsing with strtod() */
+/* Note: precision is 64 bits (UINT64_MAX) */
 
-/* Around limit of precision: 2^53-1, 2^53, 2^53+1 */
-qdict = keyval_parse("sz1=9007199254740991,"
- "sz2=9007199254740992,"
- "sz3=9007199254740993",
+/* Around limit of precision: UINT64_MAX - 1, UINT64_MAX */
+qdict = keyval_parse("sz1=18446744073709551614,"
+ "sz2=18446744073709551615",
  NULL, _abort);
 v = qobject_input_visitor_new_keyval(QOBJECT(qdict));
 qobject_unref(qdict);
 visit_start_struct(v, NULL, NULL, 0, _abort);
 visit_type_size(v, "sz1", , _abort);
-g_assert_cmphex(sz, ==, 0x1f);
+g_assert_cmphex(sz, ==, 0xfffe);
 visit_type_size(v, "sz2", , _abort);
-g_assert_cmphex(sz, ==, 0x20);
-visit_type_size(v, "sz3", , _abort);
-g_assert_cmphex(sz, ==, 0x20);
-visit_check_struct(v, _abort);
-visit_end_struct(v, NULL);
-visit_free(v);
-
-/* Close to signed upper limit 0x7c00 (53 msbs set) */
-qdict = keyval_parse("sz1=9223372036854774784," /* 7c00 */
- "sz2=9223372036854775295", /* 7dff */
- NULL, _abort);
-v = qobject_input_visitor_new_keyval(QOBJECT(qdict));
-qobject_unref(qdict);
-visit_start_struct(v, NULL, NULL, 0, _abort);
-visit_type_size(v, "sz1",

Re: [PATCH v20 0/8] Build ACPI Heterogeneous Memory Attribute Table (HMAT)

2019-12-02 Thread Tao Xu


On 12/3/2019 2:25 PM, Michael S. Tsirkin wrote:

On Tue, Dec 03, 2019 at 07:00:53AM +0100, Markus Armbruster wrote:

"Michael S. Tsirkin"  writes:


On Tue, Dec 03, 2019 at 08:53:30AM +0800, Tao Xu wrote:

Hi Michael,

Could this patch series be queued?
Thank you very much!

Tao


QEMU is in freeze, so not yet. Please ping after the release.


Just to avoid confusion: it's Michael's personal preference not to
process patches for the next version during freeze.  Other maintainers
do, and that's actually the project's policy:

Subject: QEMU Summit 2017: minutes
Message-ID: 
https://lists.nongnu.org/archive/html/qemu-devel/2017-11/msg04453.html

 qemu-next:
  * Problem 1: Contributors cannot get patches merged during freeze
(bad experience)
  [...]
  * Markus Armbruster: Problem 1 is solved if maintainers keep their own
-next trees
  * Paolo Bonzini: Maintaining -next could slow down or create work for
-freeze (e.g. who does backports)
  * Action: Maintainers mustn't tell submitters to go away just because
we're in a release freeze (it's up to them whether they prefer to
maintain a "-next" tree for their subsystem with patches queued for
the following release, or track which patches they've accepted
some other way)
  * We're not going to have an official project-wide "-next" tree, though

Michael, would queuing up patches in a -next branch really be too much
trouble for you?


Thanks for pointing this out!

I stopped asking for re-post since awhile ago.  I don't queue patches in
a public tree but I do review and do keep track of pending patches.

I tend to ask contributors to also ping because sometimes there's a
problem with rebase, I drop the patch but forget to tell the
contributor, and it tends to happen more with big patchsets posted during
freeze as there's a rush to merge changes right after that.
I usually don't bother people with this for small patches though.

I'll try to be clearer in my communication so contributors don't feel
stressed.

Would something like:

"I'll queue it for merge after the release. If possible please ping me
after the release to help make sure it didn't get dropped."

be clearer?

Hopefully windows CI efforts will soon bear fruit to the point where
they stress PCI enough to make maintaining next worth the effort.

I see. Thanks for Markus and Michael's kindly response. I feel happy 
rather than stressed in QEMU community :)

Re: [PATCH v20 0/8] Build ACPI Heterogeneous Memory Attribute Table (HMAT)

2019-12-02 Thread Tao Xu


On 12/3/2019 1:35 PM, Michael S. Tsirkin wrote:

On Tue, Dec 03, 2019 at 08:53:30AM +0800, Tao Xu wrote:

Hi Michael,

Could this patch series be queued?
Thank you very much!

Tao


QEMU is in freeze, so not yet. Please ping after the release.


OK, Thank you!

Re: [PATCH v20 0/8] Build ACPI Heterogeneous Memory Attribute Table (HMAT)

2019-12-02 Thread Tao Xu


Hi Michael,

Could this patch series be queued?
Thank you very much!

Tao

On 11/29/2019 3:56 PM, Xu, Tao3 wrote:

This series of patches will build Heterogeneous Memory Attribute Table (HMAT)
according to the command line. The ACPI HMAT describes the memory attributes,
such as memory side cache attributes and bandwidth and latency details,
related to the Memory Proximity Domain.
The software is expected to use HMAT information as hint for optimization.

In the linux kernel, the codes in drivers/acpi/hmat/hmat.c parse and report
the platform's HMAT tables.

The V19 patches link:
https://patchwork.kernel.org/cover/11265525/

Changelog:
v20:
 - Use g_assert_true and g_assert_false to replace g_assert
   (Thomas and Markus)
 - Rename assoc as associativity, update the QAPI description (Markus)
 - Disable cache level 0 in hmat-cache option (Igor)
 - Keep base and bitmap unchanged when latency or bandwidth
   out of range
 - Fix the broken CI case when user input latency or bandwidth
   less than required.
v19:
 - Add description about the machine property 'hmat' in commit
   message (Markus)
 - Update the QAPI comments
 - Add a check for no memory side cache
 - Add some fail cases for hmat-cache when level=0
v18:
 - Defer patches 01/14~06/14 of V17, use qapi type uint64 and
   only nanosecond for latency (Markus)
 - Rewrite the lines over 80 characters(Igor)
v17:
 - Add check when user input latency or bandwidth 0, the
   lb_info_provided should also be 0. Because in ACPI 6.3 5.2.27.4,
   0 means the corresponding latency or bandwidth information is
   not provided.
 - Fix the infinite loop when node->latency is 0.
 - Use NumaHmatCacheOptions to replace HMAT_Cache_Info (Igor)
 - Add check for unordered cache level input (Igor)
 - Add some fail test cases (Igor)
v16:
 - Add and use qemu_strtold_finite to parse size, support full
   64bit precision, modify related test cases (Eduardo and Markus)
 - Simplify struct HMAT_LB_Info and related code, unify latency
   and bandwidth (Igor)
 - Add cross check with hmat_lb data (Igor)
 - Fields in Cache Attributes are promoted to uint32_t before
   shifting (Igor)
 - Add case for QMP build HMAT (Igor)
v15:
 - Add a new patch to refactor do_strtosz() (Eduardo)
 - Make tests without breaking CI (Michael)
v14:
 - Reuse the codes of do_strtosz to build qemu_strtotime_ns
   (Eduardo)
 - Squash patch v13 01/12 and 02/12 together (Daniel and Eduardo)
 - Drop time unit picosecond (Eric)
 - Use qemu ctz64 and clz64 instead of builtin function
v13:
 - Modify some text description
 - Drop "initiator_valid" field in struct NodeInfo
 - Reuse Garray to store the raw bandwidth and bandwidth data
 - Calculate common base unit using range bitmap
 - Add a patch to alculate hmat latency and bandwidth entry list
 - Drop the total_levels option and use readable cache size
 - Remove the unnecessary head file
 - Use decimal notation with appropriate suffix for cache size

Liu Jingqi (5):
   numa: Extend CLI to provide memory latency and bandwidth information
   numa: Extend CLI to provide memory side cache information
   hmat acpi: Build Memory Proximity Domain Attributes Structure(s)
   hmat acpi: Build System Locality Latency and Bandwidth Information
 Structure(s)
   hmat acpi: Build Memory Side Cache Information Structure(s)

Tao Xu (3):
   numa: Extend CLI to provide initiator information for numa nodes
   tests/numa: Add case for QMP build HMAT
   tests/bios-tables-test: add test cases for ACPI HMAT

  hw/acpi/Kconfig   |   7 +-
  hw/acpi/Makefile.objs |   1 +
  hw/acpi/hmat.c| 268 +++
  hw/acpi/hmat.h|  42 
  hw/core/machine.c |  64 ++
  hw/core/numa.c| 297 ++
  hw/i386/acpi-build.c  |   5 +
  include/sysemu/numa.h |  63 ++
  qapi/machine.json | 180 +++-
  qemu-options.hx   |  95 +++-
  tests/bios-tables-test-allowed-diff.h |   8 +
  tests/bios-tables-test.c  |  44 
  tests/data/acpi/pc/APIC.acpihmat  |   0
  tests/data/acpi/pc/DSDT.acpihmat  |   0
  tests/data/acpi/pc/HMAT.acpihmat  |   0
  tests/data/acpi/pc/SRAT.acpihmat  |   0
  tests/data/acpi/q35/APIC.acpihmat |   0
  tests/data/acpi/q35/DSDT.acpihmat |   0
  tests/data/acpi/q35/HMAT.acpihmat |   0
  tests/data/acpi/q35/SRAT.acpihmat |   0
  tests/numa-test.c | 213 ++
  21 files changed, 1276 insertions(+), 11 deletions(-)
  create mode 100644 hw/acpi/hmat.c
  create mode 100644 hw/acpi/hmat.h
  create mode 100644 tests/data/acpi/pc/APIC.acpihmat
  create mode 100644 tests

Re: [PATCH] target/i386: Remove monitor from some CPU model

2019-12-01 Thread Tao Xu


I am so forry for sending this old version patch by mistake.

Please ignore this patch.

On 12/2/2019 2:28 PM, Xu, Tao3 wrote:

Add new version of Snowridge, Denverton, Opteron_G3, EPYC, and Dhyana
CPU model to remove MONITOR/MWAIT feature.

After QEMU/KVM use "-overcommit cpu-pm=on" to expose MONITOR/MWAIT
(commit id 6f131f13e68d648a8e4f083c667ab1acd88ce4cd), the MONITOR/MWAIT
feature in these CPU model is unused.

Signed-off-by: Tao Xu 
---
  target/i386/cpu.c | 58 +++
  1 file changed, 58 insertions(+)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index a624163ac2..7c5f1e8fe0 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -2770,6 +2770,19 @@ static X86CPUDefinition builtin_x86_defs[] = {
  MSR_ARCH_CAP_RDCL_NO | MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY,
  .xlevel = 0x8008,
  .model_id = "Intel Atom Processor (Denverton)",
+.versions = (X86CPUVersionDefinition[]) {
+{ .version = 1 },
+{
+.version = 2,
+.props = (PropValue[]) {
+{ "monitor", "off" },
+{ "model-id",
+  "Intel Atom Processor (Denverton, no MONITOR)" },
+{ /* end of list */ },
+},
+},
+{ /* end of list */ },
+},
  },
  {
  .name = "Snowridge",
@@ -2850,6 +2863,16 @@ static X86CPUDefinition builtin_x86_defs[] = {
  { /* end of list */ },
  },
  },
+{
+.version = 3,
+.props = (PropValue[]) {
+/* mpx was already removed by -v2 above */
+{ "monitor", "off" },
+{ "model-id",
+  "Intel Atom Processor (Snowridge, no MPX, no MONITOR)" },
+{ /* end of list */ },
+},
+},
  { /* end of list */ },
  },
  },
@@ -2961,6 +2984,19 @@ static X86CPUDefinition builtin_x86_defs[] = {
  CPUID_EXT3_ABM | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM,
  .xlevel = 0x8008,
  .model_id = "AMD Opteron 23xx (Gen 3 Class Opteron)",
+.versions = (X86CPUVersionDefinition[]) {
+{ .version = 1 },
+{
+.version = 2,
+.props = (PropValue[]) {
+{ "monitor", "off" },
+{ "model-id",
+  "AMD Opteron 23xx (Gen 3 Class Opteron, no MONITOR)" },
+{ /* end of list */ },
+},
+},
+{ /* end of list */ },
+},
  },
  {
  .name = "Opteron_G4",
@@ -3085,6 +3121,16 @@ static X86CPUDefinition builtin_x86_defs[] = {
  { /* end of list */ }
  }
  },
+{
+.version = 3,
+.props = (PropValue[]) {
+/* ibpb was already enabled by -v2 above */
+{ "monitor", "off" },
+{ "model-id",
+  "AMD EPYC Processor (with IBPB, no MONITOR)" },
+{ /* end of list */ },
+},
+},
  { /* end of list */ }
  }
  },
@@ -3137,6 +3183,18 @@ static X86CPUDefinition builtin_x86_defs[] = {
  .xlevel = 0x801E,
  .model_id = "Hygon Dhyana Processor",
  .cache_info = _cache_info,
+.versions = (X86CPUVersionDefinition[]) {
+{ .version = 1 },
+{
+.version = 2,
+.props = (PropValue[]) {
+{ "monitor", "off" },
+{ "model-id", "Hygon Dhyana Processor (no MONITOR)" },
+{ /* end of list */ },
+},
+},
+{ /* end of list */ },
+},
  },
  };

[PATCH RESEND 2/4] target/i386: Remove monitor from some CPU models

2019-12-01 Thread Tao Xu

Add new version of Snowridge, Denverton, Opteron_G3, EPYC, and Dhyana
CPU model to remove MONITOR/MWAIT feature.

After QEMU/KVM use "-overcommit cpu-pm=on" to expose MONITOR/MWAIT
(commit id 6f131f13e68d648a8e4f083c667ab1acd88ce4cd), the MONITOR/MWAIT
feature in these CPU model is unused.

Signed-off-by: Tao Xu 
---
 target/i386/cpu.c | 38 ++
 1 file changed, 38 insertions(+)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 06a3077f95..b09ac38409 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -3621,6 +3621,14 @@ static X86CPUDefinition builtin_x86_defs[] = {
 { /* end of list */ },
 },
 },
+{
+.version = 3,
+.props = (PropValue[]) {
+/* mpx was already removed by -v2 above */
+{ "monitor", "off" },
+{ /* end of list */ },
+},
+},
 { /* end of list */ },
 },
 },
@@ -3732,6 +3740,17 @@ static X86CPUDefinition builtin_x86_defs[] = {
 CPUID_EXT3_ABM | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM,
 .xlevel = 0x8008,
 .model_id = "AMD Opteron 23xx (Gen 3 Class Opteron)",
+.versions = (X86CPUVersionDefinition[]) {
+{ .version = 1 },
+{
+.version = 2,
+.props = (PropValue[]) {
+{ "monitor", "off" },
+{ /* end of list */ },
+},
+},
+{ /* end of list */ },
+},
 },
 {
 .name = "Opteron_G4",
@@ -3856,6 +3875,14 @@ static X86CPUDefinition builtin_x86_defs[] = {
 { /* end of list */ }
 }
 },
+{
+.version = 3,
+.props = (PropValue[]) {
+/* ibpb was already enabled by -v2 above */
+{ "monitor", "off" },
+{ /* end of list */ },
+},
+},
 { /* end of list */ }
 }
 },
@@ -3908,6 +3935,17 @@ static X86CPUDefinition builtin_x86_defs[] = {
 .xlevel = 0x801E,
 .model_id = "Hygon Dhyana Processor",
 .cache_info = _cache_info,
+.versions = (X86CPUVersionDefinition[]) {
+{ .version = 1 },
+{
+.version = 2,
+.props = (PropValue[]) {
+{ "monitor", "off" },
+{ /* end of list */ },
+},
+},
+{ /* end of list */ },
+},
 },
 };
 
-- 
2.20.1

[PATCH RESEND 1/4] target/i386: Add Denverton-v2 (no MPX) CPU model

2019-12-01 Thread Tao Xu

Because MPX is being removed from the linux kernel, remove MPX feature
from Denverton.

Signed-off-by: Tao Xu 
---
 target/i386/cpu.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 69f518a21a..06a3077f95 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -3482,6 +3482,18 @@ static X86CPUDefinition builtin_x86_defs[] = {
 .features[FEAT_VMX_VMFUNC] = MSR_VMX_VMFUNC_EPT_SWITCHING,
 .xlevel = 0x8008,
 .model_id = "Intel Atom Processor (Denverton)",
+.versions = (X86CPUVersionDefinition[]) {
+{ .version = 1 },
+{
+.version = 2,
+.props = (PropValue[]) {
+{ "monitor", "off" },
+{ "mpx", "off" },
+{ /* end of list */ },
+},
+},
+{ /* end of list */ },
+},
 },
 {
 .name = "Snowridge",
-- 
2.20.1

[PATCH RESEND 0/4] Add extra information to versioned CPU models

2019-12-01 Thread Tao Xu

This series of patches will remove MPX from Denverton, remove Remove
monitor from some CPU models. Add additional information for -cpu help
to indicate the changes in this version of CPU model.

The output is as follows:
x86_64-softmmu/qemu-system-x86_64 -cpu help | grep "\["
x86 Broadwell-v2  Intel Core Processor (Broadwell) [no TSX] 

x86 Broadwell-v3  Intel Core Processor (Broadwell) [IBRS]   

x86 Broadwell-v4  Intel Core Processor (Broadwell) [no TSX, IBRS]   

x86 Cascadelake-Server-v2  Intel Xeon Processor (Cascadelake) 
[ARCH_CAPABILITIES]
x86 Cascadelake-Server-v3  Intel Xeon Processor (Cascadelake) [no TSX]  
 
x86 Denverton-v2  Intel Atom Processor (Denverton) [no MPX, no MONITOR] 

x86 Dhyana-v2 Hygon Dhyana Processor [no MONITOR]   

x86 EPYC-v2   AMD EPYC Processor [IBPB] 

x86 EPYC-v3   AMD EPYC Processor [IBPB, no MONITOR] 

x86 Haswell-v2Intel Core Processor (Haswell) [no TSX]   

x86 Haswell-v3Intel Core Processor (Haswell) [IBRS] 

x86 Haswell-v4Intel Core Processor (Haswell) [no TSX, IBRS] 

x86 Icelake-Client-v2 Intel Core Processor (Icelake) [no TSX]   

x86 Icelake-Server-v2 Intel Xeon Processor (Icelake) [no TSX]   

x86 IvyBridge-v2  Intel Xeon E3-12xx v2 (Ivy Bridge) [IBRS] 

x86 Nehalem-v2Intel Core i7 9xx (Nehalem Class Core i7) [IBRS]  

x86 Opteron_G3-v2 AMD Opteron 23xx (Gen 3 Class Opteron) [no MONITOR]   

x86 SandyBridge-v2Intel Xeon E312xx (Sandy Bridge) [IBRS]   

x86 Skylake-Client-v2 Intel Core Processor (Skylake) [IBRS] 

x86 Skylake-Client-v3 Intel Core Processor (Skylake) [no TSX, IBRS] 

x86 Skylake-Server-v2 Intel Xeon Processor (Skylake) [IBRS] 

x86 Skylake-Server-v3 Intel Xeon Processor (Skylake) [no TSX, IBRS] 

x86 Snowridge-v2  Intel Atom Processor (SnowRidge) [no MPX] 

x86 Snowridge-v3  Intel Atom Processor (SnowRidge) [no MPX, no MONITOR] 

x86 Westmere-v2   Westmere E56xx/L56xx/X56xx (Nehalem-C) [IBRS]

Tao Xu (4):
  target/i386: Add Denverton-v2 (no MPX) CPU model
  target/i386: Remove monitor from some CPU models
  target/i386: Add new property note to versioned CPU models
  target/i386: Add notes for versioned CPU models

 target/i386/cpu.c | 112 +++---
 1 file changed, 85 insertions(+), 27 deletions(-)

-- 
2.20.1

[PATCH RESEND 3/4] target/i386: Add new property note to versioned CPU models

2019-12-01 Thread Tao Xu

Add additional information for -cpu help to indicate the changes in this
version of CPU model.

Suggested-by: Eduardo Habkost 
Signed-off-by: Tao Xu 
---
 target/i386/cpu.c | 12 ++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index b09ac38409..7b3bd6d4db 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -1693,6 +1693,7 @@ typedef struct PropValue {
 typedef struct X86CPUVersionDefinition {
 X86CPUVersion version;
 const char *alias;
+const char *note;
 PropValue *props;
 } X86CPUVersionDefinition;
 
@@ -1723,6 +1724,7 @@ struct X86CPUModel {
 X86CPUDefinition *cpudef;
 /* CPU model version */
 X86CPUVersion version;
+const char *note;
 /*
  * If true, this is an alias CPU model.
  * This matters only for "-cpu help" and query-cpu-definitions
@@ -4788,6 +4790,7 @@ static void x86_cpu_list_entry(gpointer data, gpointer 
user_data)
 char *name = x86_cpu_class_get_model_name(cc);
 char *desc = g_strdup(cc->model_description);
 char *alias_of = x86_cpu_class_get_alias_of(cc);
+char *model_id = x86_cpu_class_get_model_id(cc);
 
 if (!desc && alias_of) {
 if (cc->model && cc->model->version == CPU_VERSION_AUTO) {
@@ -4796,14 +4799,18 @@ static void x86_cpu_list_entry(gpointer data, gpointer 
user_data)
 desc = g_strdup_printf("(alias of %s)", alias_of);
 }
 }
+if (!desc && cc->model && cc->model->note) {
+desc = g_strdup_printf("%s [%s]", model_id, cc->model->note);
+}
 if (!desc) {
-desc = x86_cpu_class_get_model_id(cc);
+desc = g_strdup_printf("%s", model_id);
 }
 
-qemu_printf("x86 %-20s  %-48s\n", name, desc);
+qemu_printf("x86 %-20s  %-58s\n", name, desc);
 g_free(name);
 g_free(desc);
 g_free(alias_of);
+g_free(model_id);
 }
 
 /* list available CPU models and flags */
@@ -5280,6 +5287,7 @@ static void x86_register_cpudef_types(X86CPUDefinition 
*def)
 X86CPUModel *m = g_new0(X86CPUModel, 1);
 m->cpudef = def;
 m->version = vdef->version;
+m->note = vdef->note;
 name = x86_cpu_versioned_model_name(def, vdef->version);
 x86_register_cpu_model_type(name, m);
 g_free(name);
-- 
2.20.1

[PATCH RESEND 4/4] target/i386: Add notes for versioned CPU models

2019-12-01 Thread Tao Xu

Add which features are added or removed in this version. Remove the
changed model-id in versioned CPU models.

Signed-off-by: Tao Xu 
---
 target/i386/cpu.c | 50 +++
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 7b3bd6d4db..c82fbfd02e 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -2281,10 +2281,9 @@ static X86CPUDefinition builtin_x86_defs[] = {
 {
 .version = 2,
 .alias = "Nehalem-IBRS",
+.note = "IBRS",
 .props = (PropValue[]) {
 { "spec-ctrl", "on" },
-{ "model-id",
-  "Intel Core i7 9xx (Nehalem Core i7, IBRS update)" },
 { /* end of list */ }
 }
 },
@@ -2362,10 +2361,9 @@ static X86CPUDefinition builtin_x86_defs[] = {
 {
 .version = 2,
 .alias = "Westmere-IBRS",
+.note = "IBRS",
 .props = (PropValue[]) {
 { "spec-ctrl", "on" },
-{ "model-id",
-  "Westmere E56xx/L56xx/X56xx (IBRS update)" },
 { /* end of list */ }
 }
 },
@@ -2448,10 +2446,9 @@ static X86CPUDefinition builtin_x86_defs[] = {
 {
 .version = 2,
 .alias = "SandyBridge-IBRS",
+.note = "IBRS",
 .props = (PropValue[]) {
 { "spec-ctrl", "on" },
-{ "model-id",
-  "Intel Xeon E312xx (Sandy Bridge, IBRS update)" },
 { /* end of list */ }
 }
 },
@@ -2540,10 +2537,9 @@ static X86CPUDefinition builtin_x86_defs[] = {
 {
 .version = 2,
 .alias = "IvyBridge-IBRS",
+.note = "IBRS",
 .props = (PropValue[]) {
 { "spec-ctrl", "on" },
-{ "model-id",
-  "Intel Xeon E3-12xx v2 (Ivy Bridge, IBRS)" },
 { /* end of list */ }
 }
 },
@@ -2637,17 +2633,18 @@ static X86CPUDefinition builtin_x86_defs[] = {
 {
 .version = 2,
 .alias = "Haswell-noTSX",
+.note = "no TSX",
 .props = (PropValue[]) {
 { "hle", "off" },
 { "rtm", "off" },
 { "stepping", "1" },
-{ "model-id", "Intel Core Processor (Haswell, no TSX)", },
 { /* end of list */ }
 },
 },
 {
 .version = 3,
 .alias = "Haswell-IBRS",
+.note = "IBRS",
 .props = (PropValue[]) {
 /* Restore TSX features removed by -v2 above */
 { "hle", "on" },
@@ -2658,21 +2655,18 @@ static X86CPUDefinition builtin_x86_defs[] = {
  */
 { "stepping", "4" },
 { "spec-ctrl", "on" },
-{ "model-id",
-  "Intel Core Processor (Haswell, IBRS)" },
 { /* end of list */ }
 }
 },
 {
 .version = 4,
 .alias = "Haswell-noTSX-IBRS",
+.note = "no TSX, IBRS",
 .props = (PropValue[]) {
 { "hle", "off" },
 { "rtm", "off" },
 /* spec-ctrl was already enabled by -v3 above */
 { "stepping", "1" },
-{ "model-id",
-  "Intel Core Processor (Haswell, no TSX, IBRS)" },
 { /* end of list */ }
 }
 },
@@ -2768,35 +2762,33 @@ static X86CPUDefinition builtin_x86_defs[] = {
 {
 .version = 2,
 .alias = "Broadwell-noTSX",
+.note = "no TSX",
 .props = (PropValue[]) {
 { "hle", "off" },
 { "rtm", "off" },
-{ "model-id", "Inte

[PATCH 4/4] target/i386: Add notes for versioned CPU models

2019-12-01 Thread Tao Xu

Add which features are added or removed in this version. Remove the
changed model-id in versioned CPU models.

Signed-off-by: Tao Xu 
---
 target/i386/cpu.c | 50 +++
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 7b3bd6d4db..c82fbfd02e 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -2281,10 +2281,9 @@ static X86CPUDefinition builtin_x86_defs[] = {
 {
 .version = 2,
 .alias = "Nehalem-IBRS",
+.note = "IBRS",
 .props = (PropValue[]) {
 { "spec-ctrl", "on" },
-{ "model-id",
-  "Intel Core i7 9xx (Nehalem Core i7, IBRS update)" },
 { /* end of list */ }
 }
 },
@@ -2362,10 +2361,9 @@ static X86CPUDefinition builtin_x86_defs[] = {
 {
 .version = 2,
 .alias = "Westmere-IBRS",
+.note = "IBRS",
 .props = (PropValue[]) {
 { "spec-ctrl", "on" },
-{ "model-id",
-  "Westmere E56xx/L56xx/X56xx (IBRS update)" },
 { /* end of list */ }
 }
 },
@@ -2448,10 +2446,9 @@ static X86CPUDefinition builtin_x86_defs[] = {
 {
 .version = 2,
 .alias = "SandyBridge-IBRS",
+.note = "IBRS",
 .props = (PropValue[]) {
 { "spec-ctrl", "on" },
-{ "model-id",
-  "Intel Xeon E312xx (Sandy Bridge, IBRS update)" },
 { /* end of list */ }
 }
 },
@@ -2540,10 +2537,9 @@ static X86CPUDefinition builtin_x86_defs[] = {
 {
 .version = 2,
 .alias = "IvyBridge-IBRS",
+.note = "IBRS",
 .props = (PropValue[]) {
 { "spec-ctrl", "on" },
-{ "model-id",
-  "Intel Xeon E3-12xx v2 (Ivy Bridge, IBRS)" },
 { /* end of list */ }
 }
 },
@@ -2637,17 +2633,18 @@ static X86CPUDefinition builtin_x86_defs[] = {
 {
 .version = 2,
 .alias = "Haswell-noTSX",
+.note = "no TSX",
 .props = (PropValue[]) {
 { "hle", "off" },
 { "rtm", "off" },
 { "stepping", "1" },
-{ "model-id", "Intel Core Processor (Haswell, no TSX)", },
 { /* end of list */ }
 },
 },
 {
 .version = 3,
 .alias = "Haswell-IBRS",
+.note = "IBRS",
 .props = (PropValue[]) {
 /* Restore TSX features removed by -v2 above */
 { "hle", "on" },
@@ -2658,21 +2655,18 @@ static X86CPUDefinition builtin_x86_defs[] = {
  */
 { "stepping", "4" },
 { "spec-ctrl", "on" },
-{ "model-id",
-  "Intel Core Processor (Haswell, IBRS)" },
 { /* end of list */ }
 }
 },
 {
 .version = 4,
 .alias = "Haswell-noTSX-IBRS",
+.note = "no TSX, IBRS",
 .props = (PropValue[]) {
 { "hle", "off" },
 { "rtm", "off" },
 /* spec-ctrl was already enabled by -v3 above */
 { "stepping", "1" },
-{ "model-id",
-  "Intel Core Processor (Haswell, no TSX, IBRS)" },
 { /* end of list */ }
 }
 },
@@ -2768,35 +2762,33 @@ static X86CPUDefinition builtin_x86_defs[] = {
 {
 .version = 2,
 .alias = "Broadwell-noTSX",
+.note = "no TSX",
 .props = (PropValue[]) {
 { "hle", "off" },
 { "rtm", "off" },
-{ "model-id", "Inte

[PATCH 2/4] target/i386: Remove monitor from some CPU models

2019-12-01 Thread Tao Xu

Add new version of Snowridge, Denverton, Opteron_G3, EPYC, and Dhyana
CPU model to remove MONITOR/MWAIT feature.

After QEMU/KVM use "-overcommit cpu-pm=on" to expose MONITOR/MWAIT
(commit id 6f131f13e68d648a8e4f083c667ab1acd88ce4cd), the MONITOR/MWAIT
feature in these CPU model is unused.

Signed-off-by: Tao Xu 
---
 target/i386/cpu.c | 38 ++
 1 file changed, 38 insertions(+)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 06a3077f95..b09ac38409 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -3621,6 +3621,14 @@ static X86CPUDefinition builtin_x86_defs[] = {
 { /* end of list */ },
 },
 },
+{
+.version = 3,
+.props = (PropValue[]) {
+/* mpx was already removed by -v2 above */
+{ "monitor", "off" },
+{ /* end of list */ },
+},
+},
 { /* end of list */ },
 },
 },
@@ -3732,6 +3740,17 @@ static X86CPUDefinition builtin_x86_defs[] = {
 CPUID_EXT3_ABM | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM,
 .xlevel = 0x8008,
 .model_id = "AMD Opteron 23xx (Gen 3 Class Opteron)",
+.versions = (X86CPUVersionDefinition[]) {
+{ .version = 1 },
+{
+.version = 2,
+.props = (PropValue[]) {
+{ "monitor", "off" },
+{ /* end of list */ },
+},
+},
+{ /* end of list */ },
+},
 },
 {
 .name = "Opteron_G4",
@@ -3856,6 +3875,14 @@ static X86CPUDefinition builtin_x86_defs[] = {
 { /* end of list */ }
 }
 },
+{
+.version = 3,
+.props = (PropValue[]) {
+/* ibpb was already enabled by -v2 above */
+{ "monitor", "off" },
+{ /* end of list */ },
+},
+},
 { /* end of list */ }
 }
 },
@@ -3908,6 +3935,17 @@ static X86CPUDefinition builtin_x86_defs[] = {
 .xlevel = 0x801E,
 .model_id = "Hygon Dhyana Processor",
 .cache_info = _cache_info,
+.versions = (X86CPUVersionDefinition[]) {
+{ .version = 1 },
+{
+.version = 2,
+.props = (PropValue[]) {
+{ "monitor", "off" },
+{ /* end of list */ },
+},
+},
+{ /* end of list */ },
+},
 },
 };
 
-- 
2.20.1

[PATCH 1/4] target/i386: Add Denverton-v2 (no MPX) CPU model

2019-12-01 Thread Tao Xu

Because MPX is being removed from the linux kernel, remove MPX feature
from Denverton.

Signed-off-by: Tao Xu 
---
 target/i386/cpu.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 69f518a21a..06a3077f95 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -3482,6 +3482,18 @@ static X86CPUDefinition builtin_x86_defs[] = {
 .features[FEAT_VMX_VMFUNC] = MSR_VMX_VMFUNC_EPT_SWITCHING,
 .xlevel = 0x8008,
 .model_id = "Intel Atom Processor (Denverton)",
+.versions = (X86CPUVersionDefinition[]) {
+{ .version = 1 },
+{
+.version = 2,
+.props = (PropValue[]) {
+{ "monitor", "off" },
+{ "mpx", "off" },
+{ /* end of list */ },
+},
+},
+{ /* end of list */ },
+},
 },
 {
 .name = "Snowridge",
-- 
2.20.1

[PATCH 3/4] target/i386: Add new property note to versioned CPU models

2019-12-01 Thread Tao Xu

Add additional information for -cpu help to indicate the changes in this
version of CPU model.

Suggested-by: Eduardo Habkost 
Signed-off-by: Tao Xu 
---
 target/i386/cpu.c | 12 ++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index b09ac38409..7b3bd6d4db 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -1693,6 +1693,7 @@ typedef struct PropValue {
 typedef struct X86CPUVersionDefinition {
 X86CPUVersion version;
 const char *alias;
+const char *note;
 PropValue *props;
 } X86CPUVersionDefinition;
 
@@ -1723,6 +1724,7 @@ struct X86CPUModel {
 X86CPUDefinition *cpudef;
 /* CPU model version */
 X86CPUVersion version;
+const char *note;
 /*
  * If true, this is an alias CPU model.
  * This matters only for "-cpu help" and query-cpu-definitions
@@ -4788,6 +4790,7 @@ static void x86_cpu_list_entry(gpointer data, gpointer 
user_data)
 char *name = x86_cpu_class_get_model_name(cc);
 char *desc = g_strdup(cc->model_description);
 char *alias_of = x86_cpu_class_get_alias_of(cc);
+char *model_id = x86_cpu_class_get_model_id(cc);
 
 if (!desc && alias_of) {
 if (cc->model && cc->model->version == CPU_VERSION_AUTO) {
@@ -4796,14 +4799,18 @@ static void x86_cpu_list_entry(gpointer data, gpointer 
user_data)
 desc = g_strdup_printf("(alias of %s)", alias_of);
 }
 }
+if (!desc && cc->model && cc->model->note) {
+desc = g_strdup_printf("%s [%s]", model_id, cc->model->note);
+}
 if (!desc) {
-desc = x86_cpu_class_get_model_id(cc);
+desc = g_strdup_printf("%s", model_id);
 }
 
-qemu_printf("x86 %-20s  %-48s\n", name, desc);
+qemu_printf("x86 %-20s  %-58s\n", name, desc);
 g_free(name);
 g_free(desc);
 g_free(alias_of);
+g_free(model_id);
 }
 
 /* list available CPU models and flags */
@@ -5280,6 +5287,7 @@ static void x86_register_cpudef_types(X86CPUDefinition 
*def)
 X86CPUModel *m = g_new0(X86CPUModel, 1);
 m->cpudef = def;
 m->version = vdef->version;
+m->note = vdef->note;
 name = x86_cpu_versioned_model_name(def, vdef->version);
 x86_register_cpu_model_type(name, m);
 g_free(name);
-- 
2.20.1

[PATCH v20 7/8] tests/numa: Add case for QMP build HMAT

2019-11-29 Thread Tao Xu

Check configuring HMAT usecase

Acked-by: Markus Armbruster 
Suggested-by: Igor Mammedov 
Signed-off-by: Tao Xu 
---

Changes in v20:
- Use g_assert_true and g_assert_false to replace g_assert
  (Thomas and Markus)

Changes in v19:
- Add some fail cases for hmat-cache when level=0

Changes in v18:
- Rewrite the lines over 80 characters

Chenges in v17:
- Add some fail test cases (Igor)
---
 tests/numa-test.c | 213 ++
 1 file changed, 213 insertions(+)

diff --git a/tests/numa-test.c b/tests/numa-test.c
index 8de8581231..da1c19ef74 100644
--- a/tests/numa-test.c
+++ b/tests/numa-test.c
@@ -327,6 +327,216 @@ static void pc_dynamic_cpu_cfg(const void *data)
 qtest_quit(qs);
 }
 
+static void pc_hmat_build_cfg(const void *data)
+{
+QTestState *qs = qtest_initf("%s -nodefaults --preconfig -machine hmat=on "
+ "-smp 2,sockets=2 "
+ "-m 128M,slots=2,maxmem=1G "
+ "-object memory-backend-ram,size=64M,id=m0 "
+ "-object memory-backend-ram,size=64M,id=m1 "
+ "-numa node,nodeid=0,memdev=m0 "
+ "-numa node,nodeid=1,memdev=m1,initiator=0 "
+ "-numa cpu,node-id=0,socket-id=0 "
+ "-numa cpu,node-id=0,socket-id=1",
+ data ? (char *)data : "");
+
+/* Fail: Initiator should be less than the number of nodes */
+g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+" 'arguments': { 'type': 'hmat-lb', 'initiator': 2, 'target': 0,"
+" 'hierarchy': \"memory\", 'data-type': \"access-latency\" } }")));
+
+/* Fail: Target should be less than the number of nodes */
+g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+" 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 2,"
+" 'hierarchy': \"memory\", 'data-type': \"access-latency\" } }")));
+
+/* Fail: Initiator should contain cpu */
+g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+" 'arguments': { 'type': 'hmat-lb', 'initiator': 1, 'target': 0,"
+" 'hierarchy': \"memory\", 'data-type': \"access-latency\" } }")));
+
+/* Fail: Data-type mismatch */
+g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+" 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0,"
+" 'hierarchy': \"memory\", 'data-type': \"write-latency\","
+" 'bandwidth': 524288000 } }")));
+g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+" 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0,"
+" 'hierarchy': \"memory\", 'data-type': \"read-bandwidth\","
+" 'latency': 5 } }")));
+
+/* Fail: Bandwidth should be 1MB (1048576) aligned */
+g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+" 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0,"
+" 'hierarchy': \"memory\", 'data-type': \"access-bandwidth\","
+" 'bandwidth': 1048575 } }")));
+
+/* Configuring HMAT bandwidth and latency details */
+g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+" 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0,"
+" 'hierarchy': \"memory\", 'data-type': \"access-latency\","
+" 'latency': 1 } }")));/* 1 ns */
+g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+" 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0,"
+" 'hierarchy': \"memory\", 'data-type': \"access-latency\","
+" 'latency': 5 } }")));/* Fail: Duplicate configuration */
+g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+" 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0,"
+" 'hierarchy': \"memory\", 'data-type': \"access-bandwidth\","
+" 'bandwidth': 68717379584 } }")));/* 65534 MB/s */
+g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+" 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 1,"
+" 'hierarchy': \"memory\", 'data-type': \"access-latency\","
+&qu

[PATCH v20 5/8] hmat acpi: Build System Locality Latency and Bandwidth Information Structure(s)

2019-11-29 Thread Tao Xu

From: Liu Jingqi 

This structure describes the memory access latency and bandwidth
information from various memory access initiator proximity domains.
The latency and bandwidth numbers represented in this structure
correspond to rated latency and bandwidth for the platform.
The software could use this information as hint for optimization.

Acked-by: Markus Armbruster 
Reviewed-by: Igor Mammedov 
Signed-off-by: Liu Jingqi 
Signed-off-by: Tao Xu 
---

Changes in v20:
- Fix the broken CI case when user input latency or bandwidth
  less than required

Changes in v17:
- Remove unnecessary header file (Igor)

Changes in v16:
- Add more description for lb_length (Igor)
- Drop entry_list and calculate entries in this patch (Igor)

Changes in v13:
- Calculate the entries in a new patch.
---
 hw/acpi/hmat.c | 104 -
 1 file changed, 103 insertions(+), 1 deletion(-)

diff --git a/hw/acpi/hmat.c b/hw/acpi/hmat.c
index 9ff79308a4..4635d45dee 100644
--- a/hw/acpi/hmat.c
+++ b/hw/acpi/hmat.c
@@ -25,6 +25,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "sysemu/numa.h"
 #include "hw/acpi/hmat.h"
 
@@ -67,11 +68,89 @@ static void build_hmat_mpda(GArray *table_data, uint16_t 
flags,
 build_append_int_noprefix(table_data, 0, 8);
 }
 
+/*
+ * ACPI 6.3: 5.2.27.4 System Locality Latency and Bandwidth Information
+ * Structure: Table 5-146
+ */
+static void build_hmat_lb(GArray *table_data, HMAT_LB_Info *hmat_lb,
+  uint32_t num_initiator, uint32_t num_target,
+  uint32_t *initiator_list)
+{
+int i, index;
+HMAT_LB_Data *lb_data;
+uint16_t *entry_list;
+uint32_t base;
+/* Length in bytes for entire structure */
+uint32_t lb_length
+= 32 /* Table length upto and including Entry Base Unit */
++ 4 * num_initiator /* Initiator Proximity Domain List */
++ 4 * num_target /* Target Proximity Domain List */
++ 2 * num_initiator * num_target; /* Latency or Bandwidth Entries */
+
+/* Type */
+build_append_int_noprefix(table_data, 1, 2);
+/* Reserved */
+build_append_int_noprefix(table_data, 0, 2);
+/* Length */
+build_append_int_noprefix(table_data, lb_length, 4);
+/* Flags: Bits [3:0] Memory Hierarchy, Bits[7:4] Reserved */
+assert(!(hmat_lb->hierarchy >> 4));
+build_append_int_noprefix(table_data, hmat_lb->hierarchy, 1);
+/* Data Type */
+build_append_int_noprefix(table_data, hmat_lb->data_type, 1);
+/* Reserved */
+build_append_int_noprefix(table_data, 0, 2);
+/* Number of Initiator Proximity Domains (s) */
+build_append_int_noprefix(table_data, num_initiator, 4);
+/* Number of Target Proximity Domains (t) */
+build_append_int_noprefix(table_data, num_target, 4);
+/* Reserved */
+build_append_int_noprefix(table_data, 0, 4);
+
+/* Entry Base Unit */
+if (hmat_lb->data_type <= HMAT_LB_DATA_WRITE_LATENCY) {
+/* Convert latency base from nanoseconds to picosecond */
+base = hmat_lb->base * 1000;
+} else {
+/* Convert bandwidth base from Byte to Megabyte */
+base = hmat_lb->base / MiB;
+}
+build_append_int_noprefix(table_data, base, 8);
+
+/* Initiator Proximity Domain List */
+for (i = 0; i < num_initiator; i++) {
+build_append_int_noprefix(table_data, initiator_list[i], 4);
+}
+
+/* Target Proximity Domain List */
+for (i = 0; i < num_target; i++) {
+build_append_int_noprefix(table_data, i, 4);
+}
+
+/* Latency or Bandwidth Entries */
+entry_list = g_malloc0(num_initiator * num_target * sizeof(uint16_t));
+for (i = 0; i < hmat_lb->list->len; i++) {
+lb_data = _array_index(hmat_lb->list, HMAT_LB_Data, i);
+index = lb_data->initiator * num_target + lb_data->target;
+
+entry_list[index] = (uint16_t)(lb_data->data / hmat_lb->base);
+}
+
+for (i = 0; i < num_initiator * num_target; i++) {
+build_append_int_noprefix(table_data, entry_list[i], 2);
+}
+
+g_free(entry_list);
+}
+
 /* Build HMAT sub table structures */
 static void hmat_build_table_structs(GArray *table_data, NumaState *numa_state)
 {
 uint16_t flags;
-int i;
+uint32_t num_initiator = 0;
+uint32_t initiator_list[MAX_NODES];
+int i, hierarchy, type;
+HMAT_LB_Info *hmat_lb;
 
 for (i = 0; i < numa_state->num_nodes; i++) {
 flags = 0;
@@ -82,6 +161,29 @@ static void hmat_build_table_structs(GArray *table_data, 
NumaState *numa_state)
 
 build_hmat_mpda(table_data, flags, numa_state->nodes[i].initiator, i);
 }
+
+for (i = 0; i < numa_state->num_nodes; i++) {
+if (numa_state->nodes[i].has_cpu) {
+initiator_list[num_initiator++] = i;
+}
+}

[PATCH v20 0/8] Build ACPI Heterogeneous Memory Attribute Table (HMAT)

2019-11-29 Thread Tao Xu

This series of patches will build Heterogeneous Memory Attribute Table (HMAT)
according to the command line. The ACPI HMAT describes the memory attributes,
such as memory side cache attributes and bandwidth and latency details,
related to the Memory Proximity Domain.
The software is expected to use HMAT information as hint for optimization.

In the linux kernel, the codes in drivers/acpi/hmat/hmat.c parse and report
the platform's HMAT tables.

The V19 patches link:
https://patchwork.kernel.org/cover/11265525/

Changelog:
v20:
- Use g_assert_true and g_assert_false to replace g_assert
  (Thomas and Markus)
- Rename assoc as associativity, update the QAPI description (Markus)
- Disable cache level 0 in hmat-cache option (Igor)
- Keep base and bitmap unchanged when latency or bandwidth
  out of range
- Fix the broken CI case when user input latency or bandwidth
  less than required.
v19:
- Add description about the machine property 'hmat' in commit
  message (Markus)
- Update the QAPI comments
- Add a check for no memory side cache
- Add some fail cases for hmat-cache when level=0
v18:
- Defer patches 01/14~06/14 of V17, use qapi type uint64 and
  only nanosecond for latency (Markus)
- Rewrite the lines over 80 characters(Igor)
v17:
- Add check when user input latency or bandwidth 0, the
  lb_info_provided should also be 0. Because in ACPI 6.3 5.2.27.4,
  0 means the corresponding latency or bandwidth information is
  not provided.
- Fix the infinite loop when node->latency is 0.
- Use NumaHmatCacheOptions to replace HMAT_Cache_Info (Igor)
- Add check for unordered cache level input (Igor)
- Add some fail test cases (Igor)
v16:
- Add and use qemu_strtold_finite to parse size, support full
  64bit precision, modify related test cases (Eduardo and Markus)
- Simplify struct HMAT_LB_Info and related code, unify latency
  and bandwidth (Igor)
- Add cross check with hmat_lb data (Igor)
- Fields in Cache Attributes are promoted to uint32_t before
  shifting (Igor)
- Add case for QMP build HMAT (Igor)
v15:
- Add a new patch to refactor do_strtosz() (Eduardo)
- Make tests without breaking CI (Michael)
v14:
- Reuse the codes of do_strtosz to build qemu_strtotime_ns
  (Eduardo)
- Squash patch v13 01/12 and 02/12 together (Daniel and Eduardo)
- Drop time unit picosecond (Eric)
- Use qemu ctz64 and clz64 instead of builtin function
v13:
- Modify some text description
- Drop "initiator_valid" field in struct NodeInfo
- Reuse Garray to store the raw bandwidth and bandwidth data
- Calculate common base unit using range bitmap
- Add a patch to alculate hmat latency and bandwidth entry list
- Drop the total_levels option and use readable cache size
- Remove the unnecessary head file
- Use decimal notation with appropriate suffix for cache size

Liu Jingqi (5):
  numa: Extend CLI to provide memory latency and bandwidth information
  numa: Extend CLI to provide memory side cache information
  hmat acpi: Build Memory Proximity Domain Attributes Structure(s)
  hmat acpi: Build System Locality Latency and Bandwidth Information
Structure(s)
  hmat acpi: Build Memory Side Cache Information Structure(s)

Tao Xu (3):
  numa: Extend CLI to provide initiator information for numa nodes
  tests/numa: Add case for QMP build HMAT
  tests/bios-tables-test: add test cases for ACPI HMAT

 hw/acpi/Kconfig   |   7 +-
 hw/acpi/Makefile.objs |   1 +
 hw/acpi/hmat.c| 268 +++
 hw/acpi/hmat.h|  42 
 hw/core/machine.c |  64 ++
 hw/core/numa.c| 297 ++
 hw/i386/acpi-build.c  |   5 +
 include/sysemu/numa.h |  63 ++
 qapi/machine.json | 180 +++-
 qemu-options.hx   |  95 +++-
 tests/bios-tables-test-allowed-diff.h |   8 +
 tests/bios-tables-test.c  |  44 
 tests/data/acpi/pc/APIC.acpihmat  |   0
 tests/data/acpi/pc/DSDT.acpihmat  |   0
 tests/data/acpi/pc/HMAT.acpihmat  |   0
 tests/data/acpi/pc/SRAT.acpihmat  |   0
 tests/data/acpi/q35/APIC.acpihmat |   0
 tests/data/acpi/q35/DSDT.acpihmat |   0
 tests/data/acpi/q35/HMAT.acpihmat |   0
 tests/data/acpi/q35/SRAT.acpihmat |   0
 tests/numa-test.c | 213 ++
 21 files changed, 1276 insertions(+), 11 deletions(-)
 create mode 100644 hw/acpi/hmat.c
 create mode 100644 hw/acpi/hmat.h
 create mode 100644 tests/data/acpi/pc/APIC.acpihmat
 create mode 100644 tests/data/acpi/pc/DSDT.acpihmat
 create mode 100644 tests/data/acpi/pc/HMAT.acpihmat
 create mode 100644 tests/data/acpi/pc/SRAT.acpihmat
 create mode 100644 tests/data/acpi/q35/APIC.acpihmat
 create mode 1006

[PATCH v20 8/8] tests/bios-tables-test: add test cases for ACPI HMAT

2019-11-29 Thread Tao Xu

ACPI table HMAT has been introduced, QEMU now builds HMAT tables for
Heterogeneous Memory with boot option '-numa node'.

Add test cases on PC and Q35 machines with 2 numa nodes.
Because HMAT is generated when system enable numa, the
following tables need to be added for this test:
tests/data/acpi/pc/APIC.acpihmat
tests/data/acpi/pc/SRAT.acpihmat
tests/data/acpi/pc/HMAT.acpihmat
tests/data/acpi/pc/DSDT.acpihmat
tests/data/acpi/q35/APIC.acpihmat
tests/data/acpi/q35/SRAT.acpihmat
tests/data/acpi/q35/HMAT.acpihmat
tests/data/acpi/q35/DSDT.acpihmat

Acked-by: Markus Armbruster 
Reviewed-by: Igor Mammedov 
Reviewed-by: Daniel Black 
Reviewed-by: Jingqi Liu 
Suggested-by: Igor Mammedov 
Signed-off-by: Tao Xu 
---

No changes in v20.

Changes in v18:
- Remove unit "ns".

Changes in v17:
- Update the latency and bandwidth

Changes in v15:
- Make tests without breaking CI (Michael)

Changes in v13:
- Use decimal notation with appropriate suffix for cache size
---
 tests/bios-tables-test-allowed-diff.h |  8 +
 tests/bios-tables-test.c  | 44 +++
 tests/data/acpi/pc/APIC.acpihmat  |  0
 tests/data/acpi/pc/DSDT.acpihmat  |  0
 tests/data/acpi/pc/HMAT.acpihmat  |  0
 tests/data/acpi/pc/SRAT.acpihmat  |  0
 tests/data/acpi/q35/APIC.acpihmat |  0
 tests/data/acpi/q35/DSDT.acpihmat |  0
 tests/data/acpi/q35/HMAT.acpihmat |  0
 tests/data/acpi/q35/SRAT.acpihmat |  0
 10 files changed, 52 insertions(+)
 create mode 100644 tests/data/acpi/pc/APIC.acpihmat
 create mode 100644 tests/data/acpi/pc/DSDT.acpihmat
 create mode 100644 tests/data/acpi/pc/HMAT.acpihmat
 create mode 100644 tests/data/acpi/pc/SRAT.acpihmat
 create mode 100644 tests/data/acpi/q35/APIC.acpihmat
 create mode 100644 tests/data/acpi/q35/DSDT.acpihmat
 create mode 100644 tests/data/acpi/q35/HMAT.acpihmat
 create mode 100644 tests/data/acpi/q35/SRAT.acpihmat

diff --git a/tests/bios-tables-test-allowed-diff.h 
b/tests/bios-tables-test-allowed-diff.h
index dfb8523c8b..3c9e0c979b 100644
--- a/tests/bios-tables-test-allowed-diff.h
+++ b/tests/bios-tables-test-allowed-diff.h
@@ -1 +1,9 @@
 /* List of comma-separated changed AML files to ignore */
+"tests/data/acpi/pc/APIC.acpihmat",
+"tests/data/acpi/pc/SRAT.acpihmat",
+"tests/data/acpi/pc/HMAT.acpihmat",
+"tests/data/acpi/pc/DSDT.acpihmat",
+"tests/data/acpi/q35/APIC.acpihmat",
+"tests/data/acpi/q35/SRAT.acpihmat",
+"tests/data/acpi/q35/HMAT.acpihmat",
+"tests/data/acpi/q35/DSDT.acpihmat",
diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c
index 79f5da092f..9823820043 100644
--- a/tests/bios-tables-test.c
+++ b/tests/bios-tables-test.c
@@ -947,6 +947,48 @@ static void test_acpi_virt_tcg_numamem(void)
 
 }
 
+static void test_acpi_tcg_acpi_hmat(const char *machine)
+{
+test_data data;
+
+memset(, 0, sizeof(data));
+data.machine = machine;
+data.variant = ".acpihmat";
+test_acpi_one(" -machine hmat=on"
+  " -smp 2,sockets=2"
+  " -m 128M,slots=2,maxmem=1G"
+  " -object memory-backend-ram,size=64M,id=m0"
+  " -object memory-backend-ram,size=64M,id=m1"
+  " -numa node,nodeid=0,memdev=m0"
+  " -numa node,nodeid=1,memdev=m1,initiator=0"
+  " -numa cpu,node-id=0,socket-id=0"
+  " -numa cpu,node-id=0,socket-id=1"
+  " -numa hmat-lb,initiator=0,target=0,hierarchy=memory,"
+  "data-type=access-latency,latency=1"
+  " -numa hmat-lb,initiator=0,target=0,hierarchy=memory,"
+  "data-type=access-bandwidth,bandwidth=65534M"
+  " -numa hmat-lb,initiator=0,target=1,hierarchy=memory,"
+  "data-type=access-latency,latency=65534"
+  " -numa hmat-lb,initiator=0,target=1,hierarchy=memory,"
+  "data-type=access-bandwidth,bandwidth=32767M"
+  " -numa hmat-cache,node-id=0,size=10K,level=1,"
+  "associativity=direct,policy=write-back,line=8"
+  " -numa hmat-cache,node-id=1,size=10K,level=1,"
+  "associativity=direct,policy=write-back,line=8",
+  );
+free_test_data();
+}
+
+static void test_acpi_q35_tcg_acpi_hmat(void)
+{
+test_acpi_tcg_acpi_hmat(MACHINE_Q35);
+}
+
+static void test_acpi_piix4_tcg_acpi_hmat(void)
+{
+test_acpi_tcg_acpi_hmat(MACHINE_PC);
+}
+
 static void test_acpi_virt_tcg(void)
 {
 test_data data = {
@@ -991,6 +1033,8 @@ int main(int argc, char *argv[])
 qtest_add_func("acpi/q35/numamem", test_acpi

[PATCH v20 2/8] numa: Extend CLI to provide memory latency and bandwidth information

2019-11-29 Thread Tao Xu

From: Liu Jingqi 

Add -numa hmat-lb option to provide System Locality Latency and
Bandwidth Information. These memory attributes help to build
System Locality Latency and Bandwidth Information Structure(s)
in ACPI Heterogeneous Memory Attribute Table (HMAT). Before using
hmat-lb option, enable HMAT with -machine hmat=on.

Acked-by: Markus Armbruster 
Signed-off-by: Liu Jingqi 
Signed-off-by: Tao Xu 
---

Changes in v20:
- Update the QAPI description (Markus)
- Keep base and bitmap unchanged when latency or bandwidth
  out of range

Changes in v19:
- Add description about the machine property 'hmat' in commit
  message (Markus)

Changes in v18:
- Use qapi type uint64 and only nanosecond for latency (Markus)

Changes in v17:
- Add check when user input latency or bandwidth 0, the
  lb_info_provided should also be 0. Because in ACPI 6.3 5.2.27.4,
  0 means the corresponding latency or bandwidth information is
  not provided.
- Fix the infinite loop when node->latency is 0.
---
 hw/core/numa.c| 194 ++
 include/sysemu/numa.h |  53 
 qapi/machine.json |  93 +++-
 qemu-options.hx   |  47 +-
 4 files changed, 384 insertions(+), 3 deletions(-)

diff --git a/hw/core/numa.c b/hw/core/numa.c
index e60da99293..34eb413f5d 100644
--- a/hw/core/numa.c
+++ b/hw/core/numa.c
@@ -23,6 +23,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "sysemu/hostmem.h"
 #include "sysemu/numa.h"
 #include "sysemu/sysemu.h"
@@ -198,6 +199,186 @@ void parse_numa_distance(MachineState *ms, 
NumaDistOptions *dist, Error **errp)
 ms->numa_state->have_numa_distance = true;
 }
 
+void parse_numa_hmat_lb(NumaState *numa_state, NumaHmatLBOptions *node,
+Error **errp)
+{
+int i, first_bit, last_bit;
+uint64_t max_entry, temp_base, bitmap_copy;
+NodeInfo *numa_info = numa_state->nodes;
+HMAT_LB_Info *hmat_lb =
+numa_state->hmat_lb[node->hierarchy][node->data_type];
+HMAT_LB_Data lb_data = {};
+HMAT_LB_Data *lb_temp;
+
+/* Error checking */
+if (node->initiator > numa_state->num_nodes) {
+error_setg(errp, "Invalid initiator=%d, it should be less than %d",
+   node->initiator, numa_state->num_nodes);
+return;
+}
+if (node->target > numa_state->num_nodes) {
+error_setg(errp, "Invalid target=%d, it should be less than %d",
+   node->target, numa_state->num_nodes);
+return;
+}
+if (!numa_info[node->initiator].has_cpu) {
+error_setg(errp, "Invalid initiator=%d, it isn't an "
+   "initiator proximity domain", node->initiator);
+return;
+}
+if (!numa_info[node->target].present) {
+error_setg(errp, "The target=%d should point to an existing node",
+   node->target);
+return;
+}
+
+if (!hmat_lb) {
+hmat_lb = g_malloc0(sizeof(*hmat_lb));
+numa_state->hmat_lb[node->hierarchy][node->data_type] = hmat_lb;
+hmat_lb->list = g_array_new(false, true, sizeof(HMAT_LB_Data));
+}
+hmat_lb->hierarchy = node->hierarchy;
+hmat_lb->data_type = node->data_type;
+lb_data.initiator = node->initiator;
+lb_data.target = node->target;
+
+if (node->data_type <= HMATLB_DATA_TYPE_WRITE_LATENCY) {
+/* Input latency data */
+
+if (!node->has_latency) {
+error_setg(errp, "Missing 'latency' option");
+return;
+}
+if (node->has_bandwidth) {
+error_setg(errp, "Invalid option 'bandwidth' since "
+   "the data type is latency");
+return;
+}
+
+/* Detect duplicate configuration */
+for (i = 0; i < hmat_lb->list->len; i++) {
+lb_temp = _array_index(hmat_lb->list, HMAT_LB_Data, i);
+
+if (node->initiator == lb_temp->initiator &&
+node->target == lb_temp->target) {
+error_setg(errp, "Duplicate configuration of the latency for "
+"initiator=%d and target=%d", node->initiator,
+node->target);
+return;
+}
+}
+
+hmat_lb->base = hmat_lb->base ? hmat_lb->base : UINT64_MAX;
+
+if (node->latency) {
+/* Calculate the temporary base and compressed latency */
+max_entry = node->latency;
+temp_base = 1;
+while (QEMU_IS_ALIGNED(max_entry, 10)) {
+max_entry /= 10;
+temp_base *= 10;
+}
+
+/* C

[PATCH v20 4/8] hmat acpi: Build Memory Proximity Domain Attributes Structure(s)

2019-11-29 Thread Tao Xu

From: Liu Jingqi 

HMAT is defined in ACPI 6.3: 5.2.27 Heterogeneous Memory Attribute Table
(HMAT). The specification references below link:
http://www.uefi.org/sites/default/files/resources/ACPI_6_3_final_Jan30.pdf

It describes the memory attributes, such as memory side cache
attributes and bandwidth and latency details, related to the
Memory Proximity Domain. The software is
expected to use this information as hint for optimization.

This structure describes Memory Proximity Domain Attributes by memory
subsystem and its associativity with processor proximity domain as well as
hint for memory usage.

In the linux kernel, the codes in drivers/acpi/hmat/hmat.c parse and report
the platform's HMAT tables.

Acked-by: Markus Armbruster 
Reviewed-by: Igor Mammedov 
Reviewed-by: Daniel Black 
Reviewed-by: Jonathan Cameron 
Signed-off-by: Liu Jingqi 
Signed-off-by: Tao Xu 
---

No changes in v20.

Changes in v16:
- Use uint32_t for initiator and mem_node

Changes in v13:
- Remove the unnecessary head file.
---
 hw/acpi/Kconfig   |  7 ++-
 hw/acpi/Makefile.objs |  1 +
 hw/acpi/hmat.c| 99 +++
 hw/acpi/hmat.h| 42 ++
 hw/i386/acpi-build.c  |  5 +++
 5 files changed, 152 insertions(+), 2 deletions(-)
 create mode 100644 hw/acpi/hmat.c
 create mode 100644 hw/acpi/hmat.h

diff --git a/hw/acpi/Kconfig b/hw/acpi/Kconfig
index 12e3f1e86e..54209c6f2f 100644
--- a/hw/acpi/Kconfig
+++ b/hw/acpi/Kconfig
@@ -7,6 +7,7 @@ config ACPI_X86
 select ACPI_NVDIMM
 select ACPI_CPU_HOTPLUG
 select ACPI_MEMORY_HOTPLUG
+select ACPI_HMAT
 
 config ACPI_X86_ICH
 bool
@@ -23,6 +24,10 @@ config ACPI_NVDIMM
 bool
 depends on ACPI
 
+config ACPI_HMAT
+bool
+depends on ACPI
+
 config ACPI_PCI
 bool
 depends on ACPI && PCI
@@ -33,5 +38,3 @@ config ACPI_VMGENID
 depends on PC
 
 config ACPI_HW_REDUCED
-bool
-depends on ACPI
diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs
index 655a9c1973..517bd88704 100644
--- a/hw/acpi/Makefile.objs
+++ b/hw/acpi/Makefile.objs
@@ -7,6 +7,7 @@ common-obj-$(CONFIG_ACPI_CPU_HOTPLUG) += cpu.o
 common-obj-$(CONFIG_ACPI_NVDIMM) += nvdimm.o
 common-obj-$(CONFIG_ACPI_VMGENID) += vmgenid.o
 common-obj-$(CONFIG_ACPI_HW_REDUCED) += generic_event_device.o
+common-obj-$(CONFIG_ACPI_HMAT) += hmat.o
 common-obj-$(call lnot,$(CONFIG_ACPI_X86)) += acpi-stub.o
 
 common-obj-y += acpi_interface.o
diff --git a/hw/acpi/hmat.c b/hw/acpi/hmat.c
new file mode 100644
index 00..9ff79308a4
--- /dev/null
+++ b/hw/acpi/hmat.c
@@ -0,0 +1,99 @@
+/*
+ * HMAT ACPI Implementation
+ *
+ * Copyright(C) 2019 Intel Corporation.
+ *
+ * Author:
+ *  Liu jingqi 
+ *  Tao Xu 
+ *
+ * HMAT is defined in ACPI 6.3: 5.2.27 Heterogeneous Memory Attribute Table
+ * (HMAT)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>
+ */
+
+#include "qemu/osdep.h"
+#include "sysemu/numa.h"
+#include "hw/acpi/hmat.h"
+
+/*
+ * ACPI 6.3:
+ * 5.2.27.3 Memory Proximity Domain Attributes Structure: Table 5-145
+ */
+static void build_hmat_mpda(GArray *table_data, uint16_t flags,
+uint32_t initiator, uint32_t mem_node)
+{
+
+/* Memory Proximity Domain Attributes Structure */
+/* Type */
+build_append_int_noprefix(table_data, 0, 2);
+/* Reserved */
+build_append_int_noprefix(table_data, 0, 2);
+/* Length */
+build_append_int_noprefix(table_data, 40, 4);
+/* Flags */
+build_append_int_noprefix(table_data, flags, 2);
+/* Reserved */
+build_append_int_noprefix(table_data, 0, 2);
+/* Proximity Domain for the Attached Initiator */
+build_append_int_noprefix(table_data, initiator, 4);
+/* Proximity Domain for the Memory */
+build_append_int_noprefix(table_data, mem_node, 4);
+/* Reserved */
+build_append_int_noprefix(table_data, 0, 4);
+/*
+ * Reserved:
+ * Previously defined as the Start Address of the System Physical
+ * Address Range. Deprecated since ACPI Spec 6.3.
+ */
+build_append_int_noprefix(table_data, 0, 8);
+/*
+ * Reserved:
+ * Previously defined as the Range Length of the region in bytes.
+ * Deprecated since ACPI Spec 6.3.
+ */
+build_append_int_noprefix(table_data, 0, 8);
+}
+
+/* Bui

[PATCH v20 1/8] numa: Extend CLI to provide initiator information for numa nodes

2019-11-29 Thread Tao Xu

In ACPI 6.3 chapter 5.2.27 Heterogeneous Memory Attribute Table (HMAT),
The initiator represents processor which access to memory. And in 5.2.27.3
Memory Proximity Domain Attributes Structure, the attached initiator is
defined as where the memory controller responsible for a memory proximity
domain. With attached initiator information, the topology of heterogeneous
memory can be described. Add new machine property 'hmat' to enable all
HMAT specific options.

Extend CLI of "-numa node" option to indicate the initiator numa node-id.
In the linux kernel, the codes in drivers/acpi/hmat/hmat.c parse and report
the platform's HMAT tables. Before using initiator option, enable HMAT with
-machine hmat=on.

Acked-by: Markus Armbruster 
Reviewed-by: Igor Mammedov 
Reviewed-by: Jingqi Liu 
Suggested-by: Dan Williams 
Signed-off-by: Tao Xu 
---

No changes in v20.

Changes in v19:
- Add description about the machine property 'hmat' in commit
  message (Markus)

Changes in v15:
- Change the QAPI version tag to 5.0 (Eric)
---
 hw/core/machine.c | 64 +++
 hw/core/numa.c| 23 
 include/sysemu/numa.h |  5 
 qapi/machine.json | 10 ++-
 qemu-options.hx   | 35 +++
 5 files changed, 131 insertions(+), 6 deletions(-)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index 1689ad3bf8..d7d2cfa66d 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -518,6 +518,20 @@ static void machine_set_nvdimm(Object *obj, bool value, 
Error **errp)
 ms->nvdimms_state->is_enabled = value;
 }
 
+static bool machine_get_hmat(Object *obj, Error **errp)
+{
+MachineState *ms = MACHINE(obj);
+
+return ms->numa_state->hmat_enabled;
+}
+
+static void machine_set_hmat(Object *obj, bool value, Error **errp)
+{
+MachineState *ms = MACHINE(obj);
+
+ms->numa_state->hmat_enabled = value;
+}
+
 static char *machine_get_nvdimm_persistence(Object *obj, Error **errp)
 {
 MachineState *ms = MACHINE(obj);
@@ -645,6 +659,7 @@ void machine_set_cpu_numa_node(MachineState *machine,
const CpuInstanceProperties *props, Error 
**errp)
 {
 MachineClass *mc = MACHINE_GET_CLASS(machine);
+NodeInfo *numa_info = machine->numa_state->nodes;
 bool match = false;
 int i;
 
@@ -714,6 +729,17 @@ void machine_set_cpu_numa_node(MachineState *machine,
 match = true;
 slot->props.node_id = props->node_id;
 slot->props.has_node_id = props->has_node_id;
+
+if (machine->numa_state->hmat_enabled) {
+if ((numa_info[props->node_id].initiator < MAX_NODES) &&
+(props->node_id != numa_info[props->node_id].initiator)) {
+error_setg(errp, "The initiator of CPU NUMA node %" PRId64
+" should be itself", props->node_id);
+return;
+}
+numa_info[props->node_id].has_cpu = true;
+numa_info[props->node_id].initiator = props->node_id;
+}
 }
 
 if (!match) {
@@ -960,6 +986,13 @@ static void machine_initfn(Object *obj)
 
 if (mc->numa_mem_supported) {
 ms->numa_state = g_new0(NumaState, 1);
+object_property_add_bool(obj, "hmat",
+ machine_get_hmat, machine_set_hmat,
+ _abort);
+object_property_set_description(obj, "hmat",
+"Set on/off to enable/disable "
+"ACPI Heterogeneous Memory Attribute "
+"Table (HMAT)", NULL);
 }
 
 /* Register notifier when init is done for sysbus sanity checks */
@@ -1048,6 +1081,32 @@ static char *cpu_slot_to_string(const CPUArchId *cpu)
 return g_string_free(s, false);
 }
 
+static void numa_validate_initiator(NumaState *numa_state)
+{
+int i;
+NodeInfo *numa_info = numa_state->nodes;
+
+for (i = 0; i < numa_state->num_nodes; i++) {
+if (numa_info[i].initiator == MAX_NODES) {
+error_report("The initiator of NUMA node %d is missing, use "
+ "'-numa node,initiator' option to declare it", i);
+exit(1);
+}
+
+if (!numa_info[numa_info[i].initiator].present) {
+error_report("NUMA node %" PRIu16 " is missing, use "
+ "'-numa node' option to declare it first",
+ numa_info[i].initiator);
+exit(1);
+}
+
+if (!numa_info[numa_info[i].initiator].has_cpu) {
+error_report("The initiator of NUMA node %d is invalid", i);
+exit(1);
+}
+}
+}
+
 static void machine_numa_finish_cpu_init(Mach

[PATCH v20 6/8] hmat acpi: Build Memory Side Cache Information Structure(s)

2019-11-29 Thread Tao Xu

From: Liu Jingqi 

This structure describes memory side cache information for memory
proximity domains if the memory side cache is present and the
physical device forms the memory side cache.
The software could use this information to effectively place
the data in memory to maximize the performance of the system
memory that use the memory side cache.

Acked-by: Markus Armbruster 
Reviewed-by: Igor Mammedov 
Reviewed-by: Daniel Black 
Reviewed-by: Jonathan Cameron 
Signed-off-by: Liu Jingqi 
Signed-off-by: Tao Xu 
---

No changes in v20.

Changes in v16:
- Use checks and assert to replace masks (Igor)
- Fields in Cache Attributes are promoted to uint32_t before
  shifting (Igor)
- Drop cpu_to_le32() (Igor)

Changes in v13:
- rename level as cache_level
---
 hw/acpi/hmat.c | 69 +-
 1 file changed, 68 insertions(+), 1 deletion(-)

diff --git a/hw/acpi/hmat.c b/hw/acpi/hmat.c
index 4635d45dee..7c24bb5371 100644
--- a/hw/acpi/hmat.c
+++ b/hw/acpi/hmat.c
@@ -143,14 +143,62 @@ static void build_hmat_lb(GArray *table_data, 
HMAT_LB_Info *hmat_lb,
 g_free(entry_list);
 }
 
+/* ACPI 6.3: 5.2.27.5 Memory Side Cache Information Structure: Table 5-147 */
+static void build_hmat_cache(GArray *table_data, uint8_t total_levels,
+ NumaHmatCacheOptions *hmat_cache)
+{
+/*
+ * Cache Attributes: Bits [3:0] – Total Cache Levels
+ * for this Memory Proximity Domain
+ */
+uint32_t cache_attr = total_levels;
+
+/* Bits [7:4] : Cache Level described in this structure */
+cache_attr |= (uint32_t) hmat_cache->level << 4;
+
+/* Bits [11:8] - Cache Associativity */
+cache_attr |= (uint32_t) hmat_cache->associativity << 8;
+
+/* Bits [15:12] - Write Policy */
+cache_attr |= (uint32_t) hmat_cache->policy << 12;
+
+/* Bits [31:16] - Cache Line size in bytes */
+cache_attr |= (uint32_t) hmat_cache->line << 16;
+
+/* Type */
+build_append_int_noprefix(table_data, 2, 2);
+/* Reserved */
+build_append_int_noprefix(table_data, 0, 2);
+/* Length */
+build_append_int_noprefix(table_data, 32, 4);
+/* Proximity Domain for the Memory */
+build_append_int_noprefix(table_data, hmat_cache->node_id, 4);
+/* Reserved */
+build_append_int_noprefix(table_data, 0, 4);
+/* Memory Side Cache Size */
+build_append_int_noprefix(table_data, hmat_cache->size, 8);
+/* Cache Attributes */
+build_append_int_noprefix(table_data, cache_attr, 4);
+/* Reserved */
+build_append_int_noprefix(table_data, 0, 2);
+/*
+ * Number of SMBIOS handles (n)
+ * Linux kernel uses Memory Side Cache Information Structure
+ * without SMBIOS entries for now, so set Number of SMBIOS handles
+ * as 0.
+ */
+build_append_int_noprefix(table_data, 0, 2);
+}
+
 /* Build HMAT sub table structures */
 static void hmat_build_table_structs(GArray *table_data, NumaState *numa_state)
 {
 uint16_t flags;
 uint32_t num_initiator = 0;
 uint32_t initiator_list[MAX_NODES];
-int i, hierarchy, type;
+int i, hierarchy, type, cache_level, total_levels;
 HMAT_LB_Info *hmat_lb;
+NumaHmatCacheOptions *hmat_cache;
 
 for (i = 0; i < numa_state->num_nodes; i++) {
 flags = 0;
@@ -184,6 +232,25 @@ static void hmat_build_table_structs(GArray *table_data, 
NumaState *numa_state)
 }
 }
 }
+
+/*
+ * ACPI 6.3: 5.2.27.5 Memory Side Cache Information Structure:
+ * Table 5-147
+ */
+for (i = 0; i < numa_state->num_nodes; i++) {
+total_levels = 0;
+for (cache_level = 1; cache_level < HMAT_LB_LEVELS; cache_level++) {
+if (numa_state->hmat_cache[i][cache_level]) {
+total_levels++;
+}
+}
+for (cache_level = 0; cache_level <= total_levels; cache_level++) {
+hmat_cache = numa_state->hmat_cache[i][cache_level];
+if (hmat_cache) {
+build_hmat_cache(table_data, total_levels, hmat_cache);
+}
+}
+}
 }
 
 void build_hmat(GArray *table_data, BIOSLinker *linker, NumaState *numa_state)
-- 
2.20.1

[PATCH v20 3/8] numa: Extend CLI to provide memory side cache information

2019-11-28 Thread Tao Xu

From: Liu Jingqi 

Add -numa hmat-cache option to provide Memory Side Cache Information.
These memory attributes help to build Memory Side Cache Information
Structure(s) in ACPI Heterogeneous Memory Attribute Table (HMAT).
Before using hmat-cache option, enable HMAT with -machine hmat=on.

Acked-by: Markus Armbruster 
Signed-off-by: Liu Jingqi 
Signed-off-by: Tao Xu 
---

Changes in v20:
- Disable cache level 0 in hmat-cache option (Igor)
- Update the QAPI description (Markus)

Changes in v19:
- Add description about the machine property 'hmat' in commit
  message (Markus)
- Update the QAPI comments
- Add a check for no memory side cache

Changes in v18:
- Update the error message (Igor)

Changes in v17:
- Use NumaHmatCacheOptions to replace HMAT_Cache_Info (Igor)
- Add check for unordered cache level input (Igor)

Changes in v16:
- Add cross check with hmat_lb data (Igor)
- Drop total_levels in struct HMAT_Cache_Info (Igor)
- Correct the error table number (Igor)
---
 hw/core/numa.c| 80 ++
 include/sysemu/numa.h |  5 +++
 qapi/machine.json | 81 +--
 qemu-options.hx   | 17 +++--
 4 files changed, 179 insertions(+), 4 deletions(-)

diff --git a/hw/core/numa.c b/hw/core/numa.c
index 34eb413f5d..33fda31a4c 100644
--- a/hw/core/numa.c
+++ b/hw/core/numa.c
@@ -379,6 +379,73 @@ void parse_numa_hmat_lb(NumaState *numa_state, 
NumaHmatLBOptions *node,
 g_array_append_val(hmat_lb->list, lb_data);
 }
 
+void parse_numa_hmat_cache(MachineState *ms, NumaHmatCacheOptions *node,
+   Error **errp)
+{
+int nb_numa_nodes = ms->numa_state->num_nodes;
+NodeInfo *numa_info = ms->numa_state->nodes;
+NumaHmatCacheOptions *hmat_cache = NULL;
+
+if (node->node_id >= nb_numa_nodes) {
+error_setg(errp, "Invalid node-id=%" PRIu32 ", it should be less "
+   "than %d", node->node_id, nb_numa_nodes);
+return;
+}
+
+if (numa_info[node->node_id].lb_info_provided != (BIT(0) | BIT(1))) {
+error_setg(errp, "The latency and bandwidth information of "
+   "node-id=%" PRIu32 " should be provided before memory side "
+   "cache attributes", node->node_id);
+return;
+}
+
+if (node->level < 1 || node->level >= HMAT_LB_LEVELS) {
+error_setg(errp, "Invalid level=%" PRIu8 ", it should be larger than 0 
"
+   "and less than or equal to %d", node->level,
+   HMAT_LB_LEVELS - 1);
+return;
+}
+
+assert(node->associativity < HMAT_CACHE_ASSOCIATIVITY__MAX);
+assert(node->policy < HMAT_CACHE_WRITE_POLICY__MAX);
+if (ms->numa_state->hmat_cache[node->node_id][node->level]) {
+error_setg(errp, "Duplicate configuration of the side cache for "
+   "node-id=%" PRIu32 " and level=%" PRIu8,
+   node->node_id, node->level);
+return;
+}
+
+if ((node->level > 1) &&
+ms->numa_state->hmat_cache[node->node_id][node->level - 1] &&
+(node->size >=
+ms->numa_state->hmat_cache[node->node_id][node->level - 1]->size)) 
{
+error_setg(errp, "Invalid size=%" PRIu64 ", the size of level=%" PRIu8
+   " should be less than the size(%" PRIu64 ") of "
+   "level=%" PRIu8, node->size, node->level,
+   ms->numa_state->hmat_cache[node->node_id]
+ [node->level - 1]->size,
+   node->level - 1);
+return;
+}
+
+if ((node->level < HMAT_LB_LEVELS - 1) &&
+ms->numa_state->hmat_cache[node->node_id][node->level + 1] &&
+(node->size <=
+ms->numa_state->hmat_cache[node->node_id][node->level + 1]->size)) 
{
+error_setg(errp, "Invalid size=%" PRIu64 ", the size of level=%" PRIu8
+   " should be larger than the size(%" PRIu64 ") of "
+   "level=%" PRIu8, node->size, node->level,
+   ms->numa_state->hmat_cache[node->node_id]
+ [node->level + 1]->size,
+   node->level + 1);
+return;
+}
+
+hmat_cache = g_malloc0(sizeof(*hmat_cache));
+memcpy(hmat_cache, node, sizeof(*hmat_cache));
+ms->numa_state->hmat_cache[node->node_id][node->level] = hmat_cache;
+}
+
 void set_numa_options(MachineState *ms, NumaOptions *ob

Re: [PATCH v19 7/8] tests/numa: Add case for QMP build HMAT

2019-11-28 Thread Tao Xu


On 11/28/2019 7:53 PM, Thomas Huth wrote:

On 28/11/2019 12.49, Markus Armbruster wrote:

Tao Xu  writes:


Check configuring HMAT usecase

Reviewed-by: Igor Mammedov 
Suggested-by: Igor Mammedov 
Signed-off-by: Tao Xu 
---

Changes in v19:
  - Add some fail cases for hmat-cache when level=0

Changes in v18:
  - Rewrite the lines over 80 characters

Chenges in v17:
  - Add some fail test cases (Igor)
---
   tests/numa-test.c | 213 ++
   1 file changed, 213 insertions(+)

diff --git a/tests/numa-test.c b/tests/numa-test.c
index 8de8581231..aed7b2f31b 100644
--- a/tests/numa-test.c
+++ b/tests/numa-test.c
@@ -327,6 +327,216 @@ static void pc_dynamic_cpu_cfg(const void *data)
   qtest_quit(qs);
   }
   
+static void pc_hmat_build_cfg(const void *data)

+{
+QTestState *qs = qtest_initf("%s -nodefaults --preconfig -machine hmat=on "
+ "-smp 2,sockets=2 "
+ "-m 128M,slots=2,maxmem=1G "
+ "-object memory-backend-ram,size=64M,id=m0 "
+ "-object memory-backend-ram,size=64M,id=m1 "
+ "-numa node,nodeid=0,memdev=m0 "
+ "-numa node,nodeid=1,memdev=m1,initiator=0 "
+ "-numa cpu,node-id=0,socket-id=0 "
+ "-numa cpu,node-id=0,socket-id=1",
+ data ? (char *)data : "");
+
+/* Fail: Initiator should be less than the number of nodes */
+g_assert(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+" 'arguments': { 'type': 'hmat-lb', 'initiator': 2, 'target': 0,"
+" 'hierarchy': \"memory\", 'data-type': \"access-latency\" } }")));


Code smell: side effect within assert().

Harmless here, because compiling tests with NDEBUG is pointless.  Still,
it sets a bad example.  Not your idea, the pattern seems to go back to
commit c35665e1ee3 and fb1e58f72ba.


... maybe best to use g_assert_true() which can't be disabled and thus
should be used in tests. See:

   https://developer.gnome.org/glib/unstable/glib-Testing.html#g-assert-true

   Thomas

Thank you for your suggestion. I will use g_assert_true and 
g_assert_false to replace g_assert

Re: [PATCH v19 2/8] numa: Extend CLI to provide memory latency and bandwidth information

2019-11-28 Thread Tao Xu


On 11/28/2019 7:50 PM, Markus Armbruster wrote:

Tao Xu  writes:


From: Liu Jingqi 

Add -numa hmat-lb option to provide System Locality Latency and
Bandwidth Information. These memory attributes help to build
System Locality Latency and Bandwidth Information Structure(s)
in ACPI Heterogeneous Memory Attribute Table (HMAT). Before using
hmat-lb option, enable HMAT with -machine hmat=on.

Signed-off-by: Liu Jingqi 
Signed-off-by: Tao Xu 
---

[...]

diff --git a/qapi/machine.json b/qapi/machine.json
index 27d0e37534..cf9851fcd1 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -426,10 +426,12 @@
  #
  # @cpu: property based CPU(s) to node mapping (Since: 2.10)
  #
+# @hmat-lb: memory latency and bandwidth information (Since: 5.0)
+#
  # Since: 2.1
  ##
  { 'enum': 'NumaOptionsType',
-  'data': [ 'node', 'dist', 'cpu' ] }
+  'data': [ 'node', 'dist', 'cpu', 'hmat-lb' ] }
  
  ##

  # @NumaOptions:
@@ -444,7 +446,8 @@
'data': {
  'node': 'NumaNodeOptions',
  'dist': 'NumaDistOptions',
-'cpu': 'NumaCpuOptions' }}
+'cpu': 'NumaCpuOptions',
+'hmat-lb': 'NumaHmatLBOptions' }}
  
  ##

  # @NumaNodeOptions:
@@ -557,6 +560,92 @@
 'base': 'CpuInstanceProperties',
 'data' : {} }
  
+##

+# @HmatLBMemoryHierarchy:
+#
+# The memory hierarchy in the System Locality Latency and Bandwidth
+# Information Structure of HMAT (Heterogeneous Memory Attribute Table)
+#
+# For more information about @HmatLBMemoryHierarchy see chapter


@HmatLBMemoryHierarchy, see


+# 5.2.27.4: Table 5-146: Field "Flags" of ACPI 6.3 spec.
+#
+# @memory: the structure represents the memory performance
+#
+# @first-level: first level of memory side cache
+#
+# @second-level: second level of memory side cache
+#
+# @third-level: third level of memory side cache
+#
+# Since: 5.0
+##
+{ 'enum': 'HmatLBMemoryHierarchy',
+  'data': [ 'memory', 'first-level', 'second-level', 'third-level' ] }
+
+##
+# @HmatLBDataType:
+#
+# Data type in the System Locality Latency and Bandwidth
+# Information Structure of HMAT (Heterogeneous Memory Attribute Table)
+#
+# For more information about @HmatLBDataType see chapter


@HmatLBDataType, see


+# 5.2.27.4: Table 5-146:  Field "Data Type" of ACPI 6.3 spec.
+#
+# @access-latency: access latency (nanoseconds)
+#
+# @read-latency: read latency (nanoseconds)
+#
+# @write-latency: write latency (nanoseconds)
+#
+# @access-bandwidth: access bandwidth (Bytes per second)
+#
+# @read-bandwidth: read bandwidth (Bytes per second)
+#
+# @write-bandwidth: write bandwidth (Bytes per second)
+#
+# Since: 5.0
+##
+{ 'enum': 'HmatLBDataType',
+  'data': [ 'access-latency', 'read-latency', 'write-latency',
+'access-bandwidth', 'read-bandwidth', 'write-bandwidth' ] }
+
+##
+# @NumaHmatLBOptions:
+#
+# Set the system locality latency and bandwidth information
+# between Initiator and Target proximity Domains.
+#
+# For more information about @NumaHmatLBOptions see chapter


@NumaHmatLBOptions, see


+# 5.2.27.4: Table 5-146 of ACPI 6.3 spec.
+#
+# @initiator: the Initiator Proximity Domain.
+#
+# @target: the Target Proximity Domain.
+#
+# @hierarchy: the Memory Hierarchy. Indicates the performance
+# of memory or side cache.
+#
+# @data-type: presents the type of data, access/read/write
+# latency or hit latency.
+#
+# @latency: the value of latency from @initiator to @target
+#   proximity domain, the latency unit is "ns(nanosecond)".
+#
+# @bandwidth: the value of bandwidth between @initiator and @target
+# proximity domain, the bandwidth unit is
+# "Bytes per second".
+#
+# Since: 5.0
+##
+{ 'struct': 'NumaHmatLBOptions',
+'data': {
+'initiator': 'uint16',
+'target': 'uint16',
+'hierarchy': 'HmatLBMemoryHierarchy',
+'data-type': 'HmatLBDataType',
+'*latency': 'uint64',
+'*bandwidth': 'size' }}
+
  ##
  # @HostMemPolicy:
  #
diff --git a/qemu-options.hx b/qemu-options.hx
index 63f6b33322..23303fc7d7 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -168,16 +168,19 @@ DEF("numa", HAS_ARG, QEMU_OPTION_numa,
  "-numa 
node[,mem=size][,cpus=firstcpu[-lastcpu]][,nodeid=node][,initiator=node]\n"
  "-numa 
node[,memdev=id][,cpus=firstcpu[-lastcpu]][,nodeid=node][,initiator=node]\n"
  "-numa dist,src=source,dst=destination,val=distance\n"
-"-numa cpu,node-id=node[,socket-id=x][,core-id=y][,thread-id=z]\n",
+"-numa cpu,node-id=node[,socket-id=x][,core-id=y][,thread-id=z]\n"
+"-numa 
hmat-lb,initiator=node,target=node,hierarchy=memory|first-level|second-level|third-level,data-type=access-latency|read-latency|write-latency[,latency=lat][,bandwidth=bw]\n",
  QEMU_ARCH_ALL)
  STEXI
  @item -numa 
node[,mem=@var{size}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}][,initiator=@var{initiator}]
  @itemx -numa 
node[,memdev=@var{id}][,cpus=@var{firstcpu}[

Re: [PATCH v19 3/8] numa: Extend CLI to provide memory side cache information

2019-11-28 Thread Tao Xu


On 11/28/2019 7:50 PM, Markus Armbruster wrote:

Tao Xu  writes:


From: Liu Jingqi 

Add -numa hmat-cache option to provide Memory Side Cache Information.
These memory attributes help to build Memory Side Cache Information
Structure(s) in ACPI Heterogeneous Memory Attribute Table (HMAT).
Before using hmat-cache option, enable HMAT with -machine hmat=on.

Signed-off-by: Liu Jingqi 
Signed-off-by: Tao Xu 
---

Changes in v19:
 - Add description about the machine property 'hmat' in commit
   message (Markus)
 - Update the QAPI comments
 - Add a check for no memory side cache

Changes in v18:
 - Update the error message (Igor)

Changes in v17:
 - Use NumaHmatCacheOptions to replace HMAT_Cache_Info (Igor)
 - Add check for unordered cache level input (Igor)

Changes in v16:
 - Add cross check with hmat_lb data (Igor)
 - Drop total_levels in struct HMAT_Cache_Info (Igor)
 - Correct the error table number (Igor)

Changes in v15:
 - Change the QAPI version tag to 5.0 (Eric)
---
  hw/core/numa.c| 86 +++
  include/sysemu/numa.h |  5 +++
  qapi/machine.json | 81 +++-
  qemu-options.hx   | 16 +++-
  4 files changed, 184 insertions(+), 4 deletions(-)

diff --git a/hw/core/numa.c b/hw/core/numa.c
index 2183c8df1f..664b44ad68 100644
--- a/hw/core/numa.c
+++ b/hw/core/numa.c
@@ -366,6 +366,79 @@ void parse_numa_hmat_lb(NumaState *numa_state, 
NumaHmatLBOptions *node,
  g_array_append_val(hmat_lb->list, lb_data);
  }
  
+void parse_numa_hmat_cache(MachineState *ms, NumaHmatCacheOptions *node,

+   Error **errp)
+{
+int nb_numa_nodes = ms->numa_state->num_nodes;
+NodeInfo *numa_info = ms->numa_state->nodes;
+NumaHmatCacheOptions *hmat_cache = NULL;
+
+if (node->node_id >= nb_numa_nodes) {
+error_setg(errp, "Invalid node-id=%" PRIu32 ", it should be less "
+   "than %d", node->node_id, nb_numa_nodes);
+return;
+}
+
+if (numa_info[node->node_id].lb_info_provided != (BIT(0) | BIT(1))) {
+error_setg(errp, "The latency and bandwidth information of "
+   "node-id=%" PRIu32 " should be provided before memory side "
+   "cache attributes", node->node_id);
+return;
+}
+
+if (node->level >= HMAT_LB_LEVELS) {
+error_setg(errp, "Invalid level=%" PRIu8 ", it should be less than or "
+   "equal to %d", node->level, HMAT_LB_LEVELS - 1);
+return;
+}
+
+if (!node->level && (node->assoc || node->policy || node->line)) {
+error_setg(errp, "Assoc and policy options should be 'none', line "
+   "should be 0. If cache level is 0, which means no memory "
+   "side cache in node-id=%" PRIu32, node->node_id);


Error messages should be a phrase, not a paragraph; see error_setg()'s
function comment.  I think you want something like "be 0 when cache
level is 0".

I'm not sure the error message should explain what level 0 means, but
I'm happy to defer to the NUMA maintainers there.


+return;
+}
+
+assert(node->assoc < HMAT_CACHE_ASSOCIATIVITY__MAX);
+assert(node->policy < HMAT_CACHE_WRITE_POLICY__MAX);
+if (ms->numa_state->hmat_cache[node->node_id][node->level]) {
+error_setg(errp, "Duplicate configuration of the side cache for "
+   "node-id=%" PRIu32 " and level=%" PRIu8,
+   node->node_id, node->level);
+return;
+}
+
+if ((node->level > 1) &&
+ms->numa_state->hmat_cache[node->node_id][node->level - 1] &&
+(node->size >=
+ms->numa_state->hmat_cache[node->node_id][node->level - 1]->size)) 
{
+error_setg(errp, "Invalid size=%" PRIu64 ", the size of level=%" PRIu8
+   " should be less than the size(%" PRIu64 ") of "
+   "level=%" PRIu8, node->size, node->level,
+   ms->numa_state->hmat_cache[node->node_id]
+ [node->level - 1]->size,
+   node->level - 1);
+return;
+}
+
+if ((node->level < HMAT_LB_LEVELS - 1) &&
+ms->numa_state->hmat_cache[node->node_id][node->level + 1] &&
+(node->size <=
+ms->numa_state->hmat_cache[node->node_id][node->level + 1]->size)) 
{
+error_setg(errp, "Invalid size=%" PRIu64 ", the size of level=%" PRIu8
+   " should be l

Re: [PATCH v19 3/8] numa: Extend CLI to provide memory side cache information

2019-11-28 Thread Tao Xu


On 11/28/2019 9:57 PM, Igor Mammedov wrote:

On Thu, 28 Nov 2019 12:50:36 +0100
Markus Armbruster  wrote:


Tao Xu  writes:


From: Liu Jingqi 

Add -numa hmat-cache option to provide Memory Side Cache Information.
These memory attributes help to build Memory Side Cache Information
Structure(s) in ACPI Heterogeneous Memory Attribute Table (HMAT).
Before using hmat-cache option, enable HMAT with -machine hmat=on.

Signed-off-by: Liu Jingqi 
Signed-off-by: Tao Xu 
---

Changes in v19:
 - Add description about the machine property 'hmat' in commit
   message (Markus)
 - Update the QAPI comments
 - Add a check for no memory side cache

Changes in v18:
 - Update the error message (Igor)

Changes in v17:
 - Use NumaHmatCacheOptions to replace HMAT_Cache_Info (Igor)
 - Add check for unordered cache level input (Igor)

Changes in v16:
 - Add cross check with hmat_lb data (Igor)
 - Drop total_levels in struct HMAT_Cache_Info (Igor)
 - Correct the error table number (Igor)

Changes in v15:
 - Change the QAPI version tag to 5.0 (Eric)
---
  hw/core/numa.c| 86 +++
  include/sysemu/numa.h |  5 +++
  qapi/machine.json | 81 +++-
  qemu-options.hx   | 16 +++-
  4 files changed, 184 insertions(+), 4 deletions(-)

diff --git a/hw/core/numa.c b/hw/core/numa.c
index 2183c8df1f..664b44ad68 100644
--- a/hw/core/numa.c
+++ b/hw/core/numa.c
@@ -366,6 +366,79 @@ void parse_numa_hmat_lb(NumaState *numa_state, 
NumaHmatLBOptions *node,
  g_array_append_val(hmat_lb->list, lb_data);
  }
  
+void parse_numa_hmat_cache(MachineState *ms, NumaHmatCacheOptions *node,

+   Error **errp)
+{
+int nb_numa_nodes = ms->numa_state->num_nodes;
+NodeInfo *numa_info = ms->numa_state->nodes;
+NumaHmatCacheOptions *hmat_cache = NULL;
+
+if (node->node_id >= nb_numa_nodes) {
+error_setg(errp, "Invalid node-id=%" PRIu32 ", it should be less "
+   "than %d", node->node_id, nb_numa_nodes);
+return;
+}
+
+if (numa_info[node->node_id].lb_info_provided != (BIT(0) | BIT(1))) {
+error_setg(errp, "The latency and bandwidth information of "
+   "node-id=%" PRIu32 " should be provided before memory side "
+   "cache attributes", node->node_id);
+return;
+}
+
+if (node->level >= HMAT_LB_LEVELS) {
+error_setg(errp, "Invalid level=%" PRIu8 ", it should be less than or "
+   "equal to %d", node->level, HMAT_LB_LEVELS - 1);
+return;
+}
+
+if (!node->level && (node->assoc || node->policy || node->line)) {
+error_setg(errp, "Assoc and policy options should be 'none', line "
+   "should be 0. If cache level is 0, which means no memory "
+   "side cache in node-id=%" PRIu32, node->node_id);



Do we have to describe node->level == 0 in side-cache table
(spec isn't clear on this usecase)?

Can we just tell user that "RAM (level 0) should not be used with
'hmat-cache' option?



Yes we can. I will do that.

   


Error messages should be a phrase, not a paragraph; see error_setg()'s
function comment.  I think you want something like "be 0 when cache
level is 0".

I'm not sure the error message should explain what level 0 means, but
I'm happy to defer to the NUMA maintainers there.


+return;
+}
+
+assert(node->assoc < HMAT_CACHE_ASSOCIATIVITY__MAX);
+assert(node->policy < HMAT_CACHE_WRITE_POLICY__MAX);
+if (ms->numa_state->hmat_cache[node->node_id][node->level]) {
+error_setg(errp, "Duplicate configuration of the side cache for "
+   "node-id=%" PRIu32 " and level=%" PRIu8,
+   node->node_id, node->level);
+return;
+}
+
+if ((node->level > 1) &&
+ms->numa_state->hmat_cache[node->node_id][node->level - 1] &&
+(node->size >=
+ms->numa_state->hmat_cache[node->node_id][node->level - 1]->size)) 
{
+error_setg(errp, "Invalid size=%" PRIu64 ", the size of level=%" PRIu8
+   " should be less than the size(%" PRIu64 ") of "
+   "level=%" PRIu8, node->size, node->level,
+   ms->numa_state->hmat_cache[node->node_id]
+ [node->level - 1]->size,
+   node->level - 1);
+return;
+}
+
+if ((node->level < HMAT_LB_LEVELS - 1) &&
+ms->numa_state->hmat_ca

[PATCH v19 5/8] hmat acpi: Build System Locality Latency and Bandwidth Information Structure(s)

2019-11-28 Thread Tao Xu

From: Liu Jingqi 

This structure describes the memory access latency and bandwidth
information from various memory access initiator proximity domains.
The latency and bandwidth numbers represented in this structure
correspond to rated latency and bandwidth for the platform.
The software could use this information as hint for optimization.

Reviewed-by: Igor Mammedov 
Signed-off-by: Liu Jingqi 
Signed-off-by: Tao Xu 
---

No changes in 19.

Changes in v17:
- Remove unnecessary header file (Igor)

Changes in v16:
- Add more description for lb_length (Igor)
- Drop entry_list and calculate entries in this patch (Igor)

Changes in v13:
- Calculate the entries in a new patch.
---
 hw/acpi/hmat.c | 104 -
 1 file changed, 103 insertions(+), 1 deletion(-)

diff --git a/hw/acpi/hmat.c b/hw/acpi/hmat.c
index 9ff79308a4..e5ee8b4317 100644
--- a/hw/acpi/hmat.c
+++ b/hw/acpi/hmat.c
@@ -25,6 +25,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "sysemu/numa.h"
 #include "hw/acpi/hmat.h"
 
@@ -67,11 +68,89 @@ static void build_hmat_mpda(GArray *table_data, uint16_t 
flags,
 build_append_int_noprefix(table_data, 0, 8);
 }
 
+/*
+ * ACPI 6.3: 5.2.27.4 System Locality Latency and Bandwidth Information
+ * Structure: Table 5-146
+ */
+static void build_hmat_lb(GArray *table_data, HMAT_LB_Info *hmat_lb,
+  uint32_t num_initiator, uint32_t num_target,
+  uint32_t *initiator_list)
+{
+int i, index;
+HMAT_LB_Data *lb_data;
+uint16_t *entry_list;
+uint32_t base;
+/* Length in bytes for entire structure */
+uint32_t lb_length
+= 32 /* Table length upto and including Entry Base Unit */
++ 4 * num_initiator /* Initiator Proximity Domain List */
++ 4 * num_target /* Target Proximity Domain List */
++ 2 * num_initiator * num_target; /* Latency or Bandwidth Entries */
+
+/* Type */
+build_append_int_noprefix(table_data, 1, 2);
+/* Reserved */
+build_append_int_noprefix(table_data, 0, 2);
+/* Length */
+build_append_int_noprefix(table_data, lb_length, 4);
+/* Flags: Bits [3:0] Memory Hierarchy, Bits[7:4] Reserved */
+assert(!(hmat_lb->hierarchy >> 4));
+build_append_int_noprefix(table_data, hmat_lb->hierarchy, 1);
+/* Data Type */
+build_append_int_noprefix(table_data, hmat_lb->data_type, 1);
+/* Reserved */
+build_append_int_noprefix(table_data, 0, 2);
+/* Number of Initiator Proximity Domains (s) */
+build_append_int_noprefix(table_data, num_initiator, 4);
+/* Number of Target Proximity Domains (t) */
+build_append_int_noprefix(table_data, num_target, 4);
+/* Reserved */
+build_append_int_noprefix(table_data, 0, 4);
+
+/* Entry Base Unit */
+if (hmat_lb->data_type <= HMAT_LB_DATA_WRITE_LATENCY) {
+/* Convert latency base from nanoseconds to picosecond */
+base = hmat_lb->base * 1000;
+} else {
+/* Convert bandwidth base from Byte to Megabyte */
+base = hmat_lb->base / MiB;
+}
+build_append_int_noprefix(table_data, base, 8);
+
+/* Initiator Proximity Domain List */
+for (i = 0; i < num_initiator; i++) {
+build_append_int_noprefix(table_data, initiator_list[i], 4);
+}
+
+/* Target Proximity Domain List */
+for (i = 0; i < num_target; i++) {
+build_append_int_noprefix(table_data, i, 4);
+}
+
+/* Latency or Bandwidth Entries */
+entry_list = g_malloc0(hmat_lb->list->len * sizeof(uint16_t));
+for (i = 0; i < hmat_lb->list->len; i++) {
+lb_data = _array_index(hmat_lb->list, HMAT_LB_Data, i);
+index = lb_data->initiator * num_target + lb_data->target;
+
+entry_list[index] = (uint16_t)(lb_data->data / hmat_lb->base);
+}
+
+for (i = 0; i < num_initiator * num_target; i++) {
+build_append_int_noprefix(table_data, entry_list[i], 2);
+}
+
+g_free(entry_list);
+}
+
 /* Build HMAT sub table structures */
 static void hmat_build_table_structs(GArray *table_data, NumaState *numa_state)
 {
 uint16_t flags;
-int i;
+uint32_t num_initiator = 0;
+uint32_t initiator_list[MAX_NODES];
+int i, hierarchy, type;
+HMAT_LB_Info *hmat_lb;
 
 for (i = 0; i < numa_state->num_nodes; i++) {
 flags = 0;
@@ -82,6 +161,29 @@ static void hmat_build_table_structs(GArray *table_data, 
NumaState *numa_state)
 
 build_hmat_mpda(table_data, flags, numa_state->nodes[i].initiator, i);
 }
+
+for (i = 0; i < numa_state->num_nodes; i++) {
+if (numa_state->nodes[i].has_cpu) {
+initiator_list[num_initiator++] = i;
+}
+}
+
+/*
+ * ACPI 6.3: 5.2.27.4 System Locality Latency and Bandwidth Information
+ * Structure: Table 5-146
+ */
+

[PATCH v19 4/8] hmat acpi: Build Memory Proximity Domain Attributes Structure(s)

2019-11-28 Thread Tao Xu

From: Liu Jingqi 

HMAT is defined in ACPI 6.3: 5.2.27 Heterogeneous Memory Attribute Table
(HMAT). The specification references below link:
http://www.uefi.org/sites/default/files/resources/ACPI_6_3_final_Jan30.pdf

It describes the memory attributes, such as memory side cache
attributes and bandwidth and latency details, related to the
Memory Proximity Domain. The software is
expected to use this information as hint for optimization.

This structure describes Memory Proximity Domain Attributes by memory
subsystem and its associativity with processor proximity domain as well as
hint for memory usage.

In the linux kernel, the codes in drivers/acpi/hmat/hmat.c parse and report
the platform's HMAT tables.

Reviewed-by: Igor Mammedov 
Reviewed-by: Daniel Black 
Reviewed-by: Jonathan Cameron 
Signed-off-by: Liu Jingqi 
Signed-off-by: Tao Xu 
---

No changes in v19.

Changes in v16:
- Use uint32_t for initiator and mem_node

Changes in v13:
- Remove the unnecessary head file.
---
 hw/acpi/Kconfig   |  7 ++-
 hw/acpi/Makefile.objs |  1 +
 hw/acpi/hmat.c| 99 +++
 hw/acpi/hmat.h| 42 ++
 hw/i386/acpi-build.c  |  5 +++
 5 files changed, 152 insertions(+), 2 deletions(-)
 create mode 100644 hw/acpi/hmat.c
 create mode 100644 hw/acpi/hmat.h

diff --git a/hw/acpi/Kconfig b/hw/acpi/Kconfig
index 12e3f1e86e..54209c6f2f 100644
--- a/hw/acpi/Kconfig
+++ b/hw/acpi/Kconfig
@@ -7,6 +7,7 @@ config ACPI_X86
 select ACPI_NVDIMM
 select ACPI_CPU_HOTPLUG
 select ACPI_MEMORY_HOTPLUG
+select ACPI_HMAT
 
 config ACPI_X86_ICH
 bool
@@ -23,6 +24,10 @@ config ACPI_NVDIMM
 bool
 depends on ACPI
 
+config ACPI_HMAT
+bool
+depends on ACPI
+
 config ACPI_PCI
 bool
 depends on ACPI && PCI
@@ -33,5 +38,3 @@ config ACPI_VMGENID
 depends on PC
 
 config ACPI_HW_REDUCED
-bool
-depends on ACPI
diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs
index 655a9c1973..517bd88704 100644
--- a/hw/acpi/Makefile.objs
+++ b/hw/acpi/Makefile.objs
@@ -7,6 +7,7 @@ common-obj-$(CONFIG_ACPI_CPU_HOTPLUG) += cpu.o
 common-obj-$(CONFIG_ACPI_NVDIMM) += nvdimm.o
 common-obj-$(CONFIG_ACPI_VMGENID) += vmgenid.o
 common-obj-$(CONFIG_ACPI_HW_REDUCED) += generic_event_device.o
+common-obj-$(CONFIG_ACPI_HMAT) += hmat.o
 common-obj-$(call lnot,$(CONFIG_ACPI_X86)) += acpi-stub.o
 
 common-obj-y += acpi_interface.o
diff --git a/hw/acpi/hmat.c b/hw/acpi/hmat.c
new file mode 100644
index 00..9ff79308a4
--- /dev/null
+++ b/hw/acpi/hmat.c
@@ -0,0 +1,99 @@
+/*
+ * HMAT ACPI Implementation
+ *
+ * Copyright(C) 2019 Intel Corporation.
+ *
+ * Author:
+ *  Liu jingqi 
+ *  Tao Xu 
+ *
+ * HMAT is defined in ACPI 6.3: 5.2.27 Heterogeneous Memory Attribute Table
+ * (HMAT)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>
+ */
+
+#include "qemu/osdep.h"
+#include "sysemu/numa.h"
+#include "hw/acpi/hmat.h"
+
+/*
+ * ACPI 6.3:
+ * 5.2.27.3 Memory Proximity Domain Attributes Structure: Table 5-145
+ */
+static void build_hmat_mpda(GArray *table_data, uint16_t flags,
+uint32_t initiator, uint32_t mem_node)
+{
+
+/* Memory Proximity Domain Attributes Structure */
+/* Type */
+build_append_int_noprefix(table_data, 0, 2);
+/* Reserved */
+build_append_int_noprefix(table_data, 0, 2);
+/* Length */
+build_append_int_noprefix(table_data, 40, 4);
+/* Flags */
+build_append_int_noprefix(table_data, flags, 2);
+/* Reserved */
+build_append_int_noprefix(table_data, 0, 2);
+/* Proximity Domain for the Attached Initiator */
+build_append_int_noprefix(table_data, initiator, 4);
+/* Proximity Domain for the Memory */
+build_append_int_noprefix(table_data, mem_node, 4);
+/* Reserved */
+build_append_int_noprefix(table_data, 0, 4);
+/*
+ * Reserved:
+ * Previously defined as the Start Address of the System Physical
+ * Address Range. Deprecated since ACPI Spec 6.3.
+ */
+build_append_int_noprefix(table_data, 0, 8);
+/*
+ * Reserved:
+ * Previously defined as the Range Length of the region in bytes.
+ * Deprecated since ACPI Spec 6.3.
+ */
+build_append_int_noprefix(table_data, 0, 8);
+}
+
+/* Build HMAT sub table structures */
+static void

[PATCH v19 8/8] tests/bios-tables-test: add test cases for ACPI HMAT

2019-11-28 Thread Tao Xu

ACPI table HMAT has been introduced, QEMU now builds HMAT tables for
Heterogeneous Memory with boot option '-numa node'.

Add test cases on PC and Q35 machines with 2 numa nodes.
Because HMAT is generated when system enable numa, the
following tables need to be added for this test:
tests/data/acpi/pc/APIC.acpihmat
tests/data/acpi/pc/SRAT.acpihmat
tests/data/acpi/pc/HMAT.acpihmat
tests/data/acpi/pc/DSDT.acpihmat
tests/data/acpi/q35/APIC.acpihmat
tests/data/acpi/q35/SRAT.acpihmat
tests/data/acpi/q35/HMAT.acpihmat
tests/data/acpi/q35/DSDT.acpihmat

Reviewed-by: Igor Mammedov 
Reviewed-by: Daniel Black 
Reviewed-by: Jingqi Liu 
Suggested-by: Igor Mammedov 
Signed-off-by: Tao Xu 
---

No changes in v19.

Changes in v18:
- Remove unit "ns".

Changes in v17:
- Update the latency and bandwidth

Changes in v15:
- Make tests without breaking CI (Michael)

Changes in v13:
- Use decimal notation with appropriate suffix for cache size
---
 tests/bios-tables-test-allowed-diff.h |  8 +
 tests/bios-tables-test.c  | 44 +++
 tests/data/acpi/pc/APIC.acpihmat  |  0
 tests/data/acpi/pc/DSDT.acpihmat  |  0
 tests/data/acpi/pc/HMAT.acpihmat  |  0
 tests/data/acpi/pc/SRAT.acpihmat  |  0
 tests/data/acpi/q35/APIC.acpihmat |  0
 tests/data/acpi/q35/DSDT.acpihmat |  0
 tests/data/acpi/q35/HMAT.acpihmat |  0
 tests/data/acpi/q35/SRAT.acpihmat |  0
 10 files changed, 52 insertions(+)
 create mode 100644 tests/data/acpi/pc/APIC.acpihmat
 create mode 100644 tests/data/acpi/pc/DSDT.acpihmat
 create mode 100644 tests/data/acpi/pc/HMAT.acpihmat
 create mode 100644 tests/data/acpi/pc/SRAT.acpihmat
 create mode 100644 tests/data/acpi/q35/APIC.acpihmat
 create mode 100644 tests/data/acpi/q35/DSDT.acpihmat
 create mode 100644 tests/data/acpi/q35/HMAT.acpihmat
 create mode 100644 tests/data/acpi/q35/SRAT.acpihmat

diff --git a/tests/bios-tables-test-allowed-diff.h 
b/tests/bios-tables-test-allowed-diff.h
index dfb8523c8b..3c9e0c979b 100644
--- a/tests/bios-tables-test-allowed-diff.h
+++ b/tests/bios-tables-test-allowed-diff.h
@@ -1 +1,9 @@
 /* List of comma-separated changed AML files to ignore */
+"tests/data/acpi/pc/APIC.acpihmat",
+"tests/data/acpi/pc/SRAT.acpihmat",
+"tests/data/acpi/pc/HMAT.acpihmat",
+"tests/data/acpi/pc/DSDT.acpihmat",
+"tests/data/acpi/q35/APIC.acpihmat",
+"tests/data/acpi/q35/SRAT.acpihmat",
+"tests/data/acpi/q35/HMAT.acpihmat",
+"tests/data/acpi/q35/DSDT.acpihmat",
diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c
index 79f5da092f..cb1de58053 100644
--- a/tests/bios-tables-test.c
+++ b/tests/bios-tables-test.c
@@ -947,6 +947,48 @@ static void test_acpi_virt_tcg_numamem(void)
 
 }
 
+static void test_acpi_tcg_acpi_hmat(const char *machine)
+{
+test_data data;
+
+memset(, 0, sizeof(data));
+data.machine = machine;
+data.variant = ".acpihmat";
+test_acpi_one(" -machine hmat=on"
+  " -smp 2,sockets=2"
+  " -m 128M,slots=2,maxmem=1G"
+  " -object memory-backend-ram,size=64M,id=m0"
+  " -object memory-backend-ram,size=64M,id=m1"
+  " -numa node,nodeid=0,memdev=m0"
+  " -numa node,nodeid=1,memdev=m1,initiator=0"
+  " -numa cpu,node-id=0,socket-id=0"
+  " -numa cpu,node-id=0,socket-id=1"
+  " -numa hmat-lb,initiator=0,target=0,hierarchy=memory,"
+  "data-type=access-latency,latency=1"
+  " -numa hmat-lb,initiator=0,target=0,hierarchy=memory,"
+  "data-type=access-bandwidth,bandwidth=65534M"
+  " -numa hmat-lb,initiator=0,target=1,hierarchy=memory,"
+  "data-type=access-latency,latency=65534"
+  " -numa hmat-lb,initiator=0,target=1,hierarchy=memory,"
+  "data-type=access-bandwidth,bandwidth=32767M"
+  " -numa hmat-cache,node-id=0,size=10K,level=1,assoc=direct,"
+  "policy=write-back,line=8"
+  " -numa hmat-cache,node-id=1,size=10K,level=1,assoc=direct,"
+  "policy=write-back,line=8",
+  );
+free_test_data();
+}
+
+static void test_acpi_q35_tcg_acpi_hmat(void)
+{
+test_acpi_tcg_acpi_hmat(MACHINE_Q35);
+}
+
+static void test_acpi_piix4_tcg_acpi_hmat(void)
+{
+test_acpi_tcg_acpi_hmat(MACHINE_PC);
+}
+
 static void test_acpi_virt_tcg(void)
 {
 test_data data = {
@@ -991,6 +1033,8 @@ int main(int argc, char *argv[])
 qtest_add_func("acpi/q35/numamem", test_acpi_q35_tcg_numamem);
 qtest_add_func(&quo

[PATCH v19 2/8] numa: Extend CLI to provide memory latency and bandwidth information

2019-11-28 Thread Tao Xu

From: Liu Jingqi 

Add -numa hmat-lb option to provide System Locality Latency and
Bandwidth Information. These memory attributes help to build
System Locality Latency and Bandwidth Information Structure(s)
in ACPI Heterogeneous Memory Attribute Table (HMAT). Before using
hmat-lb option, enable HMAT with -machine hmat=on.

Signed-off-by: Liu Jingqi 
Signed-off-by: Tao Xu 
---

Changes in v19:
- Add description about the machine property 'hmat' in commit
  message (Markus)

Changes in v18:
- Use qapi type uint64 and only nanosecond for latency (Markus)

Changes in v17:
- Add check when user input latency or bandwidth 0, the
  lb_info_provided should also be 0. Because in ACPI 6.3 5.2.27.4,
  0 means the corresponding latency or bandwidth information is
  not provided.
- Fix the infinite loop when node->latency is 0.

Changes in v16:
- Initialize HMAT_LB_Data lb_data (Igor)
- Remove punctuation from error_setg (Igor)
- Correct some description (Igor)
- Drop statement about max value (Igor)
- Simplify struct HMAT_LB_Info and related code, unify latency
  and bandwidth (Igor)
---
 hw/core/numa.c| 181 ++
 include/sysemu/numa.h |  53 +
 qapi/machine.json |  93 +-
 qemu-options.hx   |  48 ++-
 4 files changed, 372 insertions(+), 3 deletions(-)

diff --git a/hw/core/numa.c b/hw/core/numa.c
index e60da99293..2183c8df1f 100644
--- a/hw/core/numa.c
+++ b/hw/core/numa.c
@@ -23,6 +23,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "sysemu/hostmem.h"
 #include "sysemu/numa.h"
 #include "sysemu/sysemu.h"
@@ -198,6 +199,173 @@ void parse_numa_distance(MachineState *ms, 
NumaDistOptions *dist, Error **errp)
 ms->numa_state->have_numa_distance = true;
 }
 
+void parse_numa_hmat_lb(NumaState *numa_state, NumaHmatLBOptions *node,
+Error **errp)
+{
+int i, first_bit, last_bit;
+uint64_t max_entry, temp_base_la;
+NodeInfo *numa_info = numa_state->nodes;
+HMAT_LB_Info *hmat_lb =
+numa_state->hmat_lb[node->hierarchy][node->data_type];
+HMAT_LB_Data lb_data = {};
+HMAT_LB_Data *lb_temp;
+
+/* Error checking */
+if (node->initiator > numa_state->num_nodes) {
+error_setg(errp, "Invalid initiator=%d, it should be less than %d",
+   node->initiator, numa_state->num_nodes);
+return;
+}
+if (node->target > numa_state->num_nodes) {
+error_setg(errp, "Invalid target=%d, it should be less than %d",
+   node->target, numa_state->num_nodes);
+return;
+}
+if (!numa_info[node->initiator].has_cpu) {
+error_setg(errp, "Invalid initiator=%d, it isn't an "
+   "initiator proximity domain", node->initiator);
+return;
+}
+if (!numa_info[node->target].present) {
+error_setg(errp, "The target=%d should point to an existing node",
+   node->target);
+return;
+}
+
+if (!hmat_lb) {
+hmat_lb = g_malloc0(sizeof(*hmat_lb));
+numa_state->hmat_lb[node->hierarchy][node->data_type] = hmat_lb;
+hmat_lb->list = g_array_new(false, true, sizeof(HMAT_LB_Data));
+}
+hmat_lb->hierarchy = node->hierarchy;
+hmat_lb->data_type = node->data_type;
+lb_data.initiator = node->initiator;
+lb_data.target = node->target;
+
+if (node->data_type <= HMATLB_DATA_TYPE_WRITE_LATENCY) {
+/* Input latency data */
+
+if (!node->has_latency) {
+error_setg(errp, "Missing 'latency' option");
+return;
+}
+if (node->has_bandwidth) {
+error_setg(errp, "Invalid option 'bandwidth' since "
+   "the data type is latency");
+return;
+}
+
+/* Detect duplicate configuration */
+for (i = 0; i < hmat_lb->list->len; i++) {
+lb_temp = _array_index(hmat_lb->list, HMAT_LB_Data, i);
+
+if (node->initiator == lb_temp->initiator &&
+node->target == lb_temp->target) {
+error_setg(errp, "Duplicate configuration of the latency for "
+"initiator=%d and target=%d", node->initiator,
+node->target);
+return;
+}
+}
+
+hmat_lb->base = hmat_lb->base ? hmat_lb->base : UINT64_MAX;
+
+if (node->latency) {
+/* Calculate the temporary base and compressed latency */
+max_entry = node->latency;
+temp_base_la = 1;
+while (QEMU_IS_ALIGNED(max_entry, 10)) {
+

[PATCH v19 6/8] hmat acpi: Build Memory Side Cache Information Structure(s)

2019-11-28 Thread Tao Xu

From: Liu Jingqi 

This structure describes memory side cache information for memory
proximity domains if the memory side cache is present and the
physical device forms the memory side cache.
The software could use this information to effectively place
the data in memory to maximize the performance of the system
memory that use the memory side cache.

Reviewed-by: Igor Mammedov 
Reviewed-by: Daniel Black 
Reviewed-by: Jonathan Cameron 
Signed-off-by: Liu Jingqi 
Signed-off-by: Tao Xu 
---

No changes in v19.

Changes in v16:
- Use checks and assert to replace masks (Igor)
- Fields in Cache Attributes are promoted to uint32_t before
  shifting (Igor)
- Drop cpu_to_le32() (Igor)

Changes in v13:
- rename level as cache_level
---
 hw/acpi/hmat.c | 69 +-
 1 file changed, 68 insertions(+), 1 deletion(-)

diff --git a/hw/acpi/hmat.c b/hw/acpi/hmat.c
index e5ee8b4317..bb6adb0ccf 100644
--- a/hw/acpi/hmat.c
+++ b/hw/acpi/hmat.c
@@ -143,14 +143,62 @@ static void build_hmat_lb(GArray *table_data, 
HMAT_LB_Info *hmat_lb,
 g_free(entry_list);
 }
 
+/* ACPI 6.3: 5.2.27.5 Memory Side Cache Information Structure: Table 5-147 */
+static void build_hmat_cache(GArray *table_data, uint8_t total_levels,
+ NumaHmatCacheOptions *hmat_cache)
+{
+/*
+ * Cache Attributes: Bits [3:0] – Total Cache Levels
+ * for this Memory Proximity Domain
+ */
+uint32_t cache_attr = total_levels;
+
+/* Bits [7:4] : Cache Level described in this structure */
+cache_attr |= (uint32_t) hmat_cache->level << 4;
+
+/* Bits [11:8] - Cache Associativity */
+cache_attr |= (uint32_t) hmat_cache->assoc << 8;
+
+/* Bits [15:12] - Write Policy */
+cache_attr |= (uint32_t) hmat_cache->policy << 12;
+
+/* Bits [31:16] - Cache Line size in bytes */
+cache_attr |= (uint32_t) hmat_cache->line << 16;
+
+/* Type */
+build_append_int_noprefix(table_data, 2, 2);
+/* Reserved */
+build_append_int_noprefix(table_data, 0, 2);
+/* Length */
+build_append_int_noprefix(table_data, 32, 4);
+/* Proximity Domain for the Memory */
+build_append_int_noprefix(table_data, hmat_cache->node_id, 4);
+/* Reserved */
+build_append_int_noprefix(table_data, 0, 4);
+/* Memory Side Cache Size */
+build_append_int_noprefix(table_data, hmat_cache->size, 8);
+/* Cache Attributes */
+build_append_int_noprefix(table_data, cache_attr, 4);
+/* Reserved */
+build_append_int_noprefix(table_data, 0, 2);
+/*
+ * Number of SMBIOS handles (n)
+ * Linux kernel uses Memory Side Cache Information Structure
+ * without SMBIOS entries for now, so set Number of SMBIOS handles
+ * as 0.
+ */
+build_append_int_noprefix(table_data, 0, 2);
+}
+
 /* Build HMAT sub table structures */
 static void hmat_build_table_structs(GArray *table_data, NumaState *numa_state)
 {
 uint16_t flags;
 uint32_t num_initiator = 0;
 uint32_t initiator_list[MAX_NODES];
-int i, hierarchy, type;
+int i, hierarchy, type, cache_level, total_levels;
 HMAT_LB_Info *hmat_lb;
+NumaHmatCacheOptions *hmat_cache;
 
 for (i = 0; i < numa_state->num_nodes; i++) {
 flags = 0;
@@ -184,6 +232,25 @@ static void hmat_build_table_structs(GArray *table_data, 
NumaState *numa_state)
 }
 }
 }
+
+/*
+ * ACPI 6.3: 5.2.27.5 Memory Side Cache Information Structure:
+ * Table 5-147
+ */
+for (i = 0; i < numa_state->num_nodes; i++) {
+total_levels = 0;
+for (cache_level = 1; cache_level < HMAT_LB_LEVELS; cache_level++) {
+if (numa_state->hmat_cache[i][cache_level]) {
+total_levels++;
+}
+}
+for (cache_level = 0; cache_level <= total_levels; cache_level++) {
+hmat_cache = numa_state->hmat_cache[i][cache_level];
+if (hmat_cache) {
+build_hmat_cache(table_data, total_levels, hmat_cache);
+}
+}
+}
 }
 
 void build_hmat(GArray *table_data, BIOSLinker *linker, NumaState *numa_state)
-- 
2.20.1

[PATCH v19 0/8] Build ACPI Heterogeneous Memory Attribute Table (HMAT)

2019-11-28 Thread Tao Xu

This series of patches will build Heterogeneous Memory Attribute Table (HMAT)
according to the command line. The ACPI HMAT describes the memory attributes,
such as memory side cache attributes and bandwidth and latency details,
related to the Memory Proximity Domain.
The software is expected to use HMAT information as hint for optimization.

In the linux kernel, the codes in drivers/acpi/hmat/hmat.c parse and report
the platform's HMAT tables.

The V18 patches link:
https://patchwork.kernel.org/cover/11263551/

Changelog:
v19:
- Add description about the machine property 'hmat' in commit
  message (Markus)
- Update the QAPI comments
- Add a check for no memory side cache
- Add some fail cases for hmat-cache when level=0
v18:
- Defer patches 01/14~06/14 of V17, use qapi type uint64 and
  only nanosecond for latency (Markus)
- Rewrite the lines over 80 characters(Igor)
v17:
- Add check when user input latency or bandwidth 0, the
  lb_info_provided should also be 0. Because in ACPI 6.3 5.2.27.4,
  0 means the corresponding latency or bandwidth information is
  not provided.
- Fix the infinite loop when node->latency is 0.
- Use NumaHmatCacheOptions to replace HMAT_Cache_Info (Igor)
- Add check for unordered cache level input (Igor)
- Add some fail test cases (Igor)
v16:
- Add and use qemu_strtold_finite to parse size, support full
  64bit precision, modify related test cases (Eduardo and Markus)
- Simplify struct HMAT_LB_Info and related code, unify latency
  and bandwidth (Igor)
- Add cross check with hmat_lb data (Igor)
- Fields in Cache Attributes are promoted to uint32_t before
  shifting (Igor)
- Add case for QMP build HMAT (Igor)
v15:
- Add a new patch to refactor do_strtosz() (Eduardo)
- Make tests without breaking CI (Michael)
v14:
- Reuse the codes of do_strtosz to build qemu_strtotime_ns
  (Eduardo)
- Squash patch v13 01/12 and 02/12 together (Daniel and Eduardo)
- Drop time unit picosecond (Eric)
- Use qemu ctz64 and clz64 instead of builtin function
v13:
- Modify some text description
- Drop "initiator_valid" field in struct NodeInfo
- Reuse Garray to store the raw bandwidth and bandwidth data
- Calculate common base unit using range bitmap
- Add a patch to alculate hmat latency and bandwidth entry list
- Drop the total_levels option and use readable cache size
- Remove the unnecessary head file
- Use decimal notation with appropriate suffix for cache size
v12:
- Fix a bug that a memory-only node without initiator setting
  doesn't report error. (reported by Danmei Wei)
- Fix a bug that if HMAT is enabled and without hmat-lb setting,
  QEMU will crash. (reported by Danmei Wei)

Liu Jingqi (5):
  numa: Extend CLI to provide memory latency and bandwidth information
  numa: Extend CLI to provide memory side cache information
  hmat acpi: Build Memory Proximity Domain Attributes Structure(s)
  hmat acpi: Build System Locality Latency and Bandwidth Information
Structure(s)
  hmat acpi: Build Memory Side Cache Information Structure(s)

Tao Xu (3):
  numa: Extend CLI to provide initiator information for numa nodes
  tests/numa: Add case for QMP build HMAT
  tests/bios-tables-test: add test cases for ACPI HMAT

 hw/acpi/Kconfig   |   7 +-
 hw/acpi/Makefile.objs |   1 +
 hw/acpi/hmat.c| 268 
 hw/acpi/hmat.h|  42 
 hw/core/machine.c |  64 ++
 hw/core/numa.c| 290 ++
 hw/i386/acpi-build.c  |   5 +
 include/sysemu/numa.h |  63 ++
 qapi/machine.json | 180 +++-
 qemu-options.hx   |  95 -
 tests/bios-tables-test-allowed-diff.h |   8 +
 tests/bios-tables-test.c  |  44 
 tests/data/acpi/pc/APIC.acpihmat  |   0
 tests/data/acpi/pc/DSDT.acpihmat  |   0
 tests/data/acpi/pc/HMAT.acpihmat  |   0
 tests/data/acpi/pc/SRAT.acpihmat  |   0
 tests/data/acpi/q35/APIC.acpihmat |   0
 tests/data/acpi/q35/DSDT.acpihmat |   0
 tests/data/acpi/q35/HMAT.acpihmat |   0
 tests/data/acpi/q35/SRAT.acpihmat |   0
 tests/numa-test.c | 213 +++
 21 files changed, 1269 insertions(+), 11 deletions(-)
 create mode 100644 hw/acpi/hmat.c
 create mode 100644 hw/acpi/hmat.h
 create mode 100644 tests/data/acpi/pc/APIC.acpihmat
 create mode 100644 tests/data/acpi/pc/DSDT.acpihmat
 create mode 100644 tests/data/acpi/pc/HMAT.acpihmat
 create mode 100644 tests/data/acpi/pc/SRAT.acpihmat
 create mode 100644 tests/data/acpi/q35/APIC.acpihmat
 create mode 100644 tests/data/acpi/q35/DSDT.acpihmat
 create mode 100644 tests/data/acpi/q35/HMAT.acpihmat
 create mode 100644 tests/data/acpi/q35/SRAT.acpihmat

-- 
2.20.1

[PATCH v19 7/8] tests/numa: Add case for QMP build HMAT

2019-11-28 Thread Tao Xu

Check configuring HMAT usecase

Reviewed-by: Igor Mammedov 
Suggested-by: Igor Mammedov 
Signed-off-by: Tao Xu 
---

Changes in v19:
- Add some fail cases for hmat-cache when level=0

Changes in v18:
- Rewrite the lines over 80 characters

Chenges in v17:
- Add some fail test cases (Igor)
---
 tests/numa-test.c | 213 ++
 1 file changed, 213 insertions(+)

diff --git a/tests/numa-test.c b/tests/numa-test.c
index 8de8581231..aed7b2f31b 100644
--- a/tests/numa-test.c
+++ b/tests/numa-test.c
@@ -327,6 +327,216 @@ static void pc_dynamic_cpu_cfg(const void *data)
 qtest_quit(qs);
 }
 
+static void pc_hmat_build_cfg(const void *data)
+{
+QTestState *qs = qtest_initf("%s -nodefaults --preconfig -machine hmat=on "
+ "-smp 2,sockets=2 "
+ "-m 128M,slots=2,maxmem=1G "
+ "-object memory-backend-ram,size=64M,id=m0 "
+ "-object memory-backend-ram,size=64M,id=m1 "
+ "-numa node,nodeid=0,memdev=m0 "
+ "-numa node,nodeid=1,memdev=m1,initiator=0 "
+ "-numa cpu,node-id=0,socket-id=0 "
+ "-numa cpu,node-id=0,socket-id=1",
+ data ? (char *)data : "");
+
+/* Fail: Initiator should be less than the number of nodes */
+g_assert(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+" 'arguments': { 'type': 'hmat-lb', 'initiator': 2, 'target': 0,"
+" 'hierarchy': \"memory\", 'data-type': \"access-latency\" } }")));
+
+/* Fail: Target should be less than the number of nodes */
+g_assert(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+" 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 2,"
+" 'hierarchy': \"memory\", 'data-type': \"access-latency\" } }")));
+
+/* Fail: Initiator should contain cpu */
+g_assert(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+" 'arguments': { 'type': 'hmat-lb', 'initiator': 1, 'target': 0,"
+" 'hierarchy': \"memory\", 'data-type': \"access-latency\" } }")));
+
+/* Fail: Data-type mismatch */
+g_assert(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+" 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0,"
+" 'hierarchy': \"memory\", 'data-type': \"write-latency\","
+" 'bandwidth': 524288000 } }")));
+g_assert(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+" 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0,"
+" 'hierarchy': \"memory\", 'data-type': \"read-bandwidth\","
+" 'latency': 5 } }")));
+
+/* Fail: Bandwidth should be 1MB (1048576) aligned */
+g_assert(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+" 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0,"
+" 'hierarchy': \"memory\", 'data-type': \"access-bandwidth\","
+" 'bandwidth': 1048575 } }")));
+
+/* Configuring HMAT bandwidth and latency details */
+g_assert(!qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+" 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0,"
+" 'hierarchy': \"memory\", 'data-type': \"access-latency\","
+" 'latency': 1 } }")));/* 1 ns */
+g_assert(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+" 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0,"
+" 'hierarchy': \"memory\", 'data-type': \"access-latency\","
+" 'latency': 5 } }")));/* Fail: Duplicate configuration */
+g_assert(!qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+" 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0,"
+" 'hierarchy': \"memory\", 'data-type': \"access-bandwidth\","
+" 'bandwidth': 68717379584 } }")));/* 65534 MB/s */
+g_assert(!qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+" 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 1,"
+" 'hierarchy': \"memory\", 'data-type': \"access-latency\","
+" 'latency': 65534 } }")));/* 65534 ns */
+g_assert(!qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+" 'argume

[PATCH v19 1/8] numa: Extend CLI to provide initiator information for numa nodes

2019-11-28 Thread Tao Xu

In ACPI 6.3 chapter 5.2.27 Heterogeneous Memory Attribute Table (HMAT),
The initiator represents processor which access to memory. And in 5.2.27.3
Memory Proximity Domain Attributes Structure, the attached initiator is
defined as where the memory controller responsible for a memory proximity
domain. With attached initiator information, the topology of heterogeneous
memory can be described. Add new machine property 'hmat' to enable all
HMAT specific options.

Extend CLI of "-numa node" option to indicate the initiator numa node-id.
In the linux kernel, the codes in drivers/acpi/hmat/hmat.c parse and report
the platform's HMAT tables. Before using initiator option, enable HMAT with
-machine hmat=on.

Reviewed-by: Igor Mammedov 
Reviewed-by: Jingqi Liu 
Suggested-by: Dan Williams 
Signed-off-by: Tao Xu 
---

Changes in v19:
- Add description about the machine property 'hmat' in commit
  message (Markus)

Changes in v15:
- Change the QAPI version tag to 5.0 (Eric)
---
 hw/core/machine.c | 64 +++
 hw/core/numa.c| 23 
 include/sysemu/numa.h |  5 
 qapi/machine.json | 10 ++-
 qemu-options.hx   | 35 +++
 5 files changed, 131 insertions(+), 6 deletions(-)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index 1689ad3bf8..d7d2cfa66d 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -518,6 +518,20 @@ static void machine_set_nvdimm(Object *obj, bool value, 
Error **errp)
 ms->nvdimms_state->is_enabled = value;
 }
 
+static bool machine_get_hmat(Object *obj, Error **errp)
+{
+MachineState *ms = MACHINE(obj);
+
+return ms->numa_state->hmat_enabled;
+}
+
+static void machine_set_hmat(Object *obj, bool value, Error **errp)
+{
+MachineState *ms = MACHINE(obj);
+
+ms->numa_state->hmat_enabled = value;
+}
+
 static char *machine_get_nvdimm_persistence(Object *obj, Error **errp)
 {
 MachineState *ms = MACHINE(obj);
@@ -645,6 +659,7 @@ void machine_set_cpu_numa_node(MachineState *machine,
const CpuInstanceProperties *props, Error 
**errp)
 {
 MachineClass *mc = MACHINE_GET_CLASS(machine);
+NodeInfo *numa_info = machine->numa_state->nodes;
 bool match = false;
 int i;
 
@@ -714,6 +729,17 @@ void machine_set_cpu_numa_node(MachineState *machine,
 match = true;
 slot->props.node_id = props->node_id;
 slot->props.has_node_id = props->has_node_id;
+
+if (machine->numa_state->hmat_enabled) {
+if ((numa_info[props->node_id].initiator < MAX_NODES) &&
+(props->node_id != numa_info[props->node_id].initiator)) {
+error_setg(errp, "The initiator of CPU NUMA node %" PRId64
+" should be itself", props->node_id);
+return;
+}
+numa_info[props->node_id].has_cpu = true;
+numa_info[props->node_id].initiator = props->node_id;
+}
 }
 
 if (!match) {
@@ -960,6 +986,13 @@ static void machine_initfn(Object *obj)
 
 if (mc->numa_mem_supported) {
 ms->numa_state = g_new0(NumaState, 1);
+object_property_add_bool(obj, "hmat",
+ machine_get_hmat, machine_set_hmat,
+ _abort);
+object_property_set_description(obj, "hmat",
+"Set on/off to enable/disable "
+"ACPI Heterogeneous Memory Attribute "
+"Table (HMAT)", NULL);
 }
 
 /* Register notifier when init is done for sysbus sanity checks */
@@ -1048,6 +1081,32 @@ static char *cpu_slot_to_string(const CPUArchId *cpu)
 return g_string_free(s, false);
 }
 
+static void numa_validate_initiator(NumaState *numa_state)
+{
+int i;
+NodeInfo *numa_info = numa_state->nodes;
+
+for (i = 0; i < numa_state->num_nodes; i++) {
+if (numa_info[i].initiator == MAX_NODES) {
+error_report("The initiator of NUMA node %d is missing, use "
+ "'-numa node,initiator' option to declare it", i);
+exit(1);
+}
+
+if (!numa_info[numa_info[i].initiator].present) {
+error_report("NUMA node %" PRIu16 " is missing, use "
+ "'-numa node' option to declare it first",
+ numa_info[i].initiator);
+exit(1);
+}
+
+if (!numa_info[numa_info[i].initiator].has_cpu) {
+error_report("The initiator of NUMA node %d is invalid", i);
+exit(1);
+}
+}
+}
+
 static void machine_numa_finish_cpu_init(MachineState *machine)
 {
 int i;
@@ -1088,6 +1147

[PATCH v19 3/8] numa: Extend CLI to provide memory side cache information

2019-11-28 Thread Tao Xu

From: Liu Jingqi 

Add -numa hmat-cache option to provide Memory Side Cache Information.
These memory attributes help to build Memory Side Cache Information
Structure(s) in ACPI Heterogeneous Memory Attribute Table (HMAT).
Before using hmat-cache option, enable HMAT with -machine hmat=on.

Signed-off-by: Liu Jingqi 
Signed-off-by: Tao Xu 
---

Changes in v19:
- Add description about the machine property 'hmat' in commit
  message (Markus)
- Update the QAPI comments
- Add a check for no memory side cache

Changes in v18:
- Update the error message (Igor)

Changes in v17:
- Use NumaHmatCacheOptions to replace HMAT_Cache_Info (Igor)
- Add check for unordered cache level input (Igor)

Changes in v16:
- Add cross check with hmat_lb data (Igor)
- Drop total_levels in struct HMAT_Cache_Info (Igor)
- Correct the error table number (Igor)

Changes in v15:
- Change the QAPI version tag to 5.0 (Eric)
---
 hw/core/numa.c| 86 +++
 include/sysemu/numa.h |  5 +++
 qapi/machine.json | 81 +++-
 qemu-options.hx   | 16 +++-
 4 files changed, 184 insertions(+), 4 deletions(-)

diff --git a/hw/core/numa.c b/hw/core/numa.c
index 2183c8df1f..664b44ad68 100644
--- a/hw/core/numa.c
+++ b/hw/core/numa.c
@@ -366,6 +366,79 @@ void parse_numa_hmat_lb(NumaState *numa_state, 
NumaHmatLBOptions *node,
 g_array_append_val(hmat_lb->list, lb_data);
 }
 
+void parse_numa_hmat_cache(MachineState *ms, NumaHmatCacheOptions *node,
+   Error **errp)
+{
+int nb_numa_nodes = ms->numa_state->num_nodes;
+NodeInfo *numa_info = ms->numa_state->nodes;
+NumaHmatCacheOptions *hmat_cache = NULL;
+
+if (node->node_id >= nb_numa_nodes) {
+error_setg(errp, "Invalid node-id=%" PRIu32 ", it should be less "
+   "than %d", node->node_id, nb_numa_nodes);
+return;
+}
+
+if (numa_info[node->node_id].lb_info_provided != (BIT(0) | BIT(1))) {
+error_setg(errp, "The latency and bandwidth information of "
+   "node-id=%" PRIu32 " should be provided before memory side "
+   "cache attributes", node->node_id);
+return;
+}
+
+if (node->level >= HMAT_LB_LEVELS) {
+error_setg(errp, "Invalid level=%" PRIu8 ", it should be less than or "
+   "equal to %d", node->level, HMAT_LB_LEVELS - 1);
+return;
+}
+
+if (!node->level && (node->assoc || node->policy || node->line)) {
+error_setg(errp, "Assoc and policy options should be 'none', line "
+   "should be 0. If cache level is 0, which means no memory "
+   "side cache in node-id=%" PRIu32, node->node_id);
+return;
+}
+
+assert(node->assoc < HMAT_CACHE_ASSOCIATIVITY__MAX);
+assert(node->policy < HMAT_CACHE_WRITE_POLICY__MAX);
+if (ms->numa_state->hmat_cache[node->node_id][node->level]) {
+error_setg(errp, "Duplicate configuration of the side cache for "
+   "node-id=%" PRIu32 " and level=%" PRIu8,
+   node->node_id, node->level);
+return;
+}
+
+if ((node->level > 1) &&
+ms->numa_state->hmat_cache[node->node_id][node->level - 1] &&
+(node->size >=
+ms->numa_state->hmat_cache[node->node_id][node->level - 1]->size)) 
{
+error_setg(errp, "Invalid size=%" PRIu64 ", the size of level=%" PRIu8
+   " should be less than the size(%" PRIu64 ") of "
+   "level=%" PRIu8, node->size, node->level,
+   ms->numa_state->hmat_cache[node->node_id]
+ [node->level - 1]->size,
+   node->level - 1);
+return;
+}
+
+if ((node->level < HMAT_LB_LEVELS - 1) &&
+ms->numa_state->hmat_cache[node->node_id][node->level + 1] &&
+(node->size <=
+ms->numa_state->hmat_cache[node->node_id][node->level + 1]->size)) 
{
+error_setg(errp, "Invalid size=%" PRIu64 ", the size of level=%" PRIu8
+   " should be larger than the size(%" PRIu64 ") of "
+   "level=%" PRIu8, node->size, node->level,
+   ms->numa_state->hmat_cache[node->node_id]
+ [node->level + 1]->size,
+   node->level + 1);
+return;
+}
+
+hmat_cache = g_malloc0(si

Re: [PATCH v18 3/8] numa: Extend CLI to provide memory side cache information

2019-11-27 Thread Tao Xu


On 11/28/2019 10:46 AM, Tao Xu wrote:

On 11/27/2019 5:56 PM, Markus Armbruster wrote:

Tao Xu  writes:


From: Liu Jingqi 

Add -numa hmat-cache option to provide Memory Side Cache Information.
These memory attributes help to build Memory Side Cache Information
Structure(s) in ACPI Heterogeneous Memory Attribute Table (HMAT).


Please mention this requires -machine hmat=on.


OK I will add these for 3 related patches.



Reviewed-by: Igor Mammedov 
Reviewed-by: Daniel Black 
Signed-off-by: Liu Jingqi 
Signed-off-by: Tao Xu 

[...]

diff --git a/qapi/machine.json b/qapi/machine.json
index c741649d7b..3d0ba226a9 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -428,10 +428,12 @@
  #
  # @hmat-lb: memory latency and bandwidth information (Since: 5.0)
  #
+# @hmat-cache: memory side cache information (Since: 5.0)
+#
  # Since: 2.1
  ##
  { 'enum': 'NumaOptionsType',
-  'data': [ 'node', 'dist', 'cpu', 'hmat-lb' ] }
+  'data': [ 'node', 'dist', 'cpu', 'hmat-lb', 'hmat-cache' ] }
  ##
  # @NumaOptions:
@@ -447,7 +449,8 @@
  'node': 'NumaNodeOptions',
  'dist': 'NumaDistOptions',
  'cpu': 'NumaCpuOptions',
-    'hmat-lb': 'NumaHmatLBOptions' }}
+    'hmat-lb': 'NumaHmatLBOptions',
+    'hmat-cache': 'NumaHmatCacheOptions' }}
  ##
  # @NumaNodeOptions:
@@ -647,6 +650,77 @@
  '*latency': 'uint64',
  '*bandwidth': 'size' }}
+##
+# @HmatCacheAssociativity:
+#
+# Cache associativity in the Memory Side Cache
+# Information Structure of HMAT
+#
+# For more information of @HmatCacheAssociativity see
+# the chapter 5.2.27.5: Table 5-147 of ACPI 6.3 spec.


   # Cache associativity in the Memory Side Cache Information Structure
   # of HMAT
   #
   # For more information of @HmatCacheAssociativity, see chapter
   # 5.2.27.5: Table 5-147 of ACPI 6.3 spec.


+#
+# @none: None


What does cache associativity @none mean?  A none-associative cache?  I
guess it makes sense to people familiar with the ACPI spec...



This means this proximity domain has no memory cache, thus none for 
Cache associativity, I will add more description about this.


Read again about ACPI spec, there is no description about 'none'. In
linux kernel HMAT code, this is handle as "other", maybe means not 
provided. I will also add a check when level is none, the associativity, 
policy and line_size should be none or 0.

Re: [PATCH v18 3/8] numa: Extend CLI to provide memory side cache information

2019-11-27 Thread Tao Xu


On 11/27/2019 5:56 PM, Markus Armbruster wrote:

Tao Xu  writes:


From: Liu Jingqi 

Add -numa hmat-cache option to provide Memory Side Cache Information.
These memory attributes help to build Memory Side Cache Information
Structure(s) in ACPI Heterogeneous Memory Attribute Table (HMAT).


Please mention this requires -machine hmat=on.


OK I will add these for 3 related patches.



Reviewed-by: Igor Mammedov 
Reviewed-by: Daniel Black 
Signed-off-by: Liu Jingqi 
Signed-off-by: Tao Xu 

[...]

diff --git a/qapi/machine.json b/qapi/machine.json
index c741649d7b..3d0ba226a9 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -428,10 +428,12 @@
  #
  # @hmat-lb: memory latency and bandwidth information (Since: 5.0)
  #
+# @hmat-cache: memory side cache information (Since: 5.0)
+#
  # Since: 2.1
  ##
  { 'enum': 'NumaOptionsType',
-  'data': [ 'node', 'dist', 'cpu', 'hmat-lb' ] }
+  'data': [ 'node', 'dist', 'cpu', 'hmat-lb', 'hmat-cache' ] }
  
  ##

  # @NumaOptions:
@@ -447,7 +449,8 @@
  'node': 'NumaNodeOptions',
  'dist': 'NumaDistOptions',
  'cpu': 'NumaCpuOptions',
-'hmat-lb': 'NumaHmatLBOptions' }}
+'hmat-lb': 'NumaHmatLBOptions',
+'hmat-cache': 'NumaHmatCacheOptions' }}
  
  ##

  # @NumaNodeOptions:
@@ -647,6 +650,77 @@
  '*latency': 'uint64',
  '*bandwidth': 'size' }}
  
+##

+# @HmatCacheAssociativity:
+#
+# Cache associativity in the Memory Side Cache
+# Information Structure of HMAT
+#
+# For more information of @HmatCacheAssociativity see
+# the chapter 5.2.27.5: Table 5-147 of ACPI 6.3 spec.


   # Cache associativity in the Memory Side Cache Information Structure
   # of HMAT
   #
   # For more information of @HmatCacheAssociativity, see chapter
   # 5.2.27.5: Table 5-147 of ACPI 6.3 spec.


+#
+# @none: None


What does cache associativity @none mean?  A none-associative cache?  I
guess it makes sense to people familiar with the ACPI spec...



This means this proximity domain has no memory cache, thus none for 
Cache associativity, I will add more description about this.

+#
+# @direct: Direct Mapped
+#
+# @complex: Complex Cache Indexing (implementation specific)
+#
+# Since: 5.0
+##
+{ 'enum': 'HmatCacheAssociativity',
+  'data': [ 'none', 'direct', 'complex' ] }
+
+##
+# @HmatCacheWritePolicy:
+#
+# Cache write policy in the Memory Side Cache
+# Information Structure of HMAT
+#
+# For more information of @HmatCacheWritePolicy see
+# the chapter 5.2.27.5: Table 5-147: Field "Cache Attributes" of ACPI 6.3 spec.


Break lines around column 70, please.


+#
+# @none: None


What does cache write policy @none mean?


This means this proximity domain has no memory cache, thus none for 
cache write policy.



+#
+# @write-back: Write Back (WB)
+#
+# @write-through: Write Through (WT)
+#
+# Since: 5.0
+##
+{ 'enum': 'HmatCacheWritePolicy',
+  'data': [ 'none', 'write-back', 'write-through' ] }
+
+##
+# @NumaHmatCacheOptions:
+#
+# Set the memory side cache information for a given memory domain.
+#
+# For more information of @NumaHmatCacheOptions see
+# the chapter 5.2.27.5: Table 5-147: Field "Cache Attributes" of ACPI 6.3 spec.
+#
+# @node-id: the memory proximity domain to which the memory belongs.
+#
+# @size: the size of memory side cache in bytes.
+#
+# @level: the cache level described in this structure.
+#
+# @assoc: the cache associativity, none/direct-mapped/complex(complex cache 
indexing).
+#
+# @policy: the write policy, none/write-back/write-through.
+#
+# @line: the cache Line size in bytes.
+#
+# Since: 5.0
+##
+{ 'struct': 'NumaHmatCacheOptions',
+  'data': {
+   'node-id': 'uint32',


Ignorant question: you use 'uint16' for other "proximity domains".  Is
'uint32' intentional here?



Yes, because ACPI 6.3 spec define the domain as 4 byte(32 bit), and for 
HmatCacheOptions we directly use this QAPI struct when building HMAT. 
But for other "proximity domains" we use local variable (such as 
"uint32_t initiator")

[PATCH v18 8/8] tests/bios-tables-test: add test cases for ACPI HMAT

2019-11-27 Thread Tao Xu

ACPI table HMAT has been introduced, QEMU now builds HMAT tables for
Heterogeneous Memory with boot option '-numa node'.

Add test cases on PC and Q35 machines with 2 numa nodes.
Because HMAT is generated when system enable numa, the
following tables need to be added for this test:
tests/data/acpi/pc/APIC.acpihmat
tests/data/acpi/pc/SRAT.acpihmat
tests/data/acpi/pc/HMAT.acpihmat
tests/data/acpi/pc/DSDT.acpihmat
tests/data/acpi/q35/APIC.acpihmat
tests/data/acpi/q35/SRAT.acpihmat
tests/data/acpi/q35/HMAT.acpihmat
tests/data/acpi/q35/DSDT.acpihmat

Reviewed-by: Igor Mammedov 
Reviewed-by: Daniel Black 
Reviewed-by: Jingqi Liu 
Suggested-by: Igor Mammedov 
Signed-off-by: Tao Xu 
---

Changes in v18:
- Remove unit "ns".

Changes in v17:
- Update the latency and bandwidth

Changes in v15:
- Make tests without breaking CI (Michael)

Changes in v13:
- Use decimal notation with appropriate suffix for cache size
---
 tests/bios-tables-test-allowed-diff.h |  8 +
 tests/bios-tables-test.c  | 44 +++
 tests/data/acpi/pc/APIC.acpihmat  |  0
 tests/data/acpi/pc/DSDT.acpihmat  |  0
 tests/data/acpi/pc/HMAT.acpihmat  |  0
 tests/data/acpi/pc/SRAT.acpihmat  |  0
 tests/data/acpi/q35/APIC.acpihmat |  0
 tests/data/acpi/q35/DSDT.acpihmat |  0
 tests/data/acpi/q35/HMAT.acpihmat |  0
 tests/data/acpi/q35/SRAT.acpihmat |  0
 10 files changed, 52 insertions(+)
 create mode 100644 tests/data/acpi/pc/APIC.acpihmat
 create mode 100644 tests/data/acpi/pc/DSDT.acpihmat
 create mode 100644 tests/data/acpi/pc/HMAT.acpihmat
 create mode 100644 tests/data/acpi/pc/SRAT.acpihmat
 create mode 100644 tests/data/acpi/q35/APIC.acpihmat
 create mode 100644 tests/data/acpi/q35/DSDT.acpihmat
 create mode 100644 tests/data/acpi/q35/HMAT.acpihmat
 create mode 100644 tests/data/acpi/q35/SRAT.acpihmat

diff --git a/tests/bios-tables-test-allowed-diff.h 
b/tests/bios-tables-test-allowed-diff.h
index dfb8523c8b..3c9e0c979b 100644
--- a/tests/bios-tables-test-allowed-diff.h
+++ b/tests/bios-tables-test-allowed-diff.h
@@ -1 +1,9 @@
 /* List of comma-separated changed AML files to ignore */
+"tests/data/acpi/pc/APIC.acpihmat",
+"tests/data/acpi/pc/SRAT.acpihmat",
+"tests/data/acpi/pc/HMAT.acpihmat",
+"tests/data/acpi/pc/DSDT.acpihmat",
+"tests/data/acpi/q35/APIC.acpihmat",
+"tests/data/acpi/q35/SRAT.acpihmat",
+"tests/data/acpi/q35/HMAT.acpihmat",
+"tests/data/acpi/q35/DSDT.acpihmat",
diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c
index 79f5da092f..cb1de58053 100644
--- a/tests/bios-tables-test.c
+++ b/tests/bios-tables-test.c
@@ -947,6 +947,48 @@ static void test_acpi_virt_tcg_numamem(void)
 
 }
 
+static void test_acpi_tcg_acpi_hmat(const char *machine)
+{
+test_data data;
+
+memset(, 0, sizeof(data));
+data.machine = machine;
+data.variant = ".acpihmat";
+test_acpi_one(" -machine hmat=on"
+  " -smp 2,sockets=2"
+  " -m 128M,slots=2,maxmem=1G"
+  " -object memory-backend-ram,size=64M,id=m0"
+  " -object memory-backend-ram,size=64M,id=m1"
+  " -numa node,nodeid=0,memdev=m0"
+  " -numa node,nodeid=1,memdev=m1,initiator=0"
+  " -numa cpu,node-id=0,socket-id=0"
+  " -numa cpu,node-id=0,socket-id=1"
+  " -numa hmat-lb,initiator=0,target=0,hierarchy=memory,"
+  "data-type=access-latency,latency=1"
+  " -numa hmat-lb,initiator=0,target=0,hierarchy=memory,"
+  "data-type=access-bandwidth,bandwidth=65534M"
+  " -numa hmat-lb,initiator=0,target=1,hierarchy=memory,"
+  "data-type=access-latency,latency=65534"
+  " -numa hmat-lb,initiator=0,target=1,hierarchy=memory,"
+  "data-type=access-bandwidth,bandwidth=32767M"
+  " -numa hmat-cache,node-id=0,size=10K,level=1,assoc=direct,"
+  "policy=write-back,line=8"
+  " -numa hmat-cache,node-id=1,size=10K,level=1,assoc=direct,"
+  "policy=write-back,line=8",
+  );
+free_test_data();
+}
+
+static void test_acpi_q35_tcg_acpi_hmat(void)
+{
+test_acpi_tcg_acpi_hmat(MACHINE_Q35);
+}
+
+static void test_acpi_piix4_tcg_acpi_hmat(void)
+{
+test_acpi_tcg_acpi_hmat(MACHINE_PC);
+}
+
 static void test_acpi_virt_tcg(void)
 {
 test_data data = {
@@ -991,6 +1033,8 @@ int main(int argc, char *argv[])
 qtest_add_func("acpi/q35/numamem", test_acpi_q35_tcg_numamem);
 qtest_add_func(&quo

1 2 3 4 5 >

1 - 100 of 499 matches

Mail list logo