date:20231218

From: Antonio Caggiano 

Support BLOB resources creation, mapping and unmapping by calling the
new stable virglrenderer 0.10 interface. Only enabled when available and
via the blob config. E.g. -device virtio-vga-gl,blob=true

Signed-off-by: Antonio Caggiano 
Signed-off-by: Dmitry Osipenko 
Signed-off-by: Xenia Ragiadakou 
Signed-off-by: Huang Rui 
---

Changes in v6:
- Use new struct virgl_gpu_resource.
- Unmap, unref and destroy the resource only after the memory region
  has been completely removed.
- In unref check whether the resource is still mapped.
- In unmap_blob check whether the resource has been already unmapped.
- Fix coding style

 hw/display/virtio-gpu-virgl.c | 274 +-
 hw/display/virtio-gpu.c   |   4 +-
 meson.build   |   4 +
 3 files changed, 276 insertions(+), 6 deletions(-)

diff --git a/hw/display/virtio-gpu-virgl.c b/hw/display/virtio-gpu-virgl.c
index faab374336..5a3a292f79 100644
--- a/hw/display/virtio-gpu-virgl.c
+++ b/hw/display/virtio-gpu-virgl.c
@@ -17,6 +17,7 @@
 #include "trace.h"
 #include "hw/virtio/virtio.h"
 #include "hw/virtio/virtio-gpu.h"
+#include "hw/virtio/virtio-gpu-bswap.h"
 
 #include "ui/egl-helpers.h"
 
@@ -24,8 +25,62 @@
 
 struct virgl_gpu_resource {
 struct virtio_gpu_simple_resource res;
+uint32_t ref;
+VirtIOGPU *g;
+
+#ifdef HAVE_VIRGL_RESOURCE_BLOB
+/* only blob resource needs this region to be mapped as guest mmio */
+MemoryRegion *region;
+#endif
 };
 
+static void vres_get_ref(struct virgl_gpu_resource *vres)
+{
+uint32_t ref;
+
+ref = qatomic_fetch_inc(>ref);
+g_assert(ref < INT_MAX);
+}
+
+static void virgl_resource_destroy(struct virgl_gpu_resource *vres)
+{
+struct virtio_gpu_simple_resource *res;
+VirtIOGPU *g;
+
+if (!vres) {
+return;
+}
+
+g = vres->g;
+res = >res;
+QTAILQ_REMOVE(>reslist, res, next);
+virtio_gpu_cleanup_mapping(g, res);
+g_free(vres);
+}
+
+static void virgl_resource_unref(struct virgl_gpu_resource *vres)
+{
+struct virtio_gpu_simple_resource *res;
+
+if (!vres) {
+return;
+}
+
+res = >res;
+virgl_renderer_resource_detach_iov(res->resource_id, NULL, NULL);
+virgl_renderer_resource_unref(res->resource_id);
+}
+
+static void vres_put_ref(struct virgl_gpu_resource *vres)
+{
+g_assert(vres->ref > 0);
+
+if (qatomic_fetch_dec(>ref) == 1) {
+virgl_resource_unref(vres);
+virgl_resource_destroy(vres);
+}
+}
+
 static struct virgl_gpu_resource *
 virgl_gpu_find_resource(VirtIOGPU *g, uint32_t resource_id)
 {
@@ -59,6 +114,8 @@ static void virgl_cmd_create_resource_2d(VirtIOGPU *g,
c2d.width, c2d.height);
 
 vres = g_new0(struct virgl_gpu_resource, 1);
+vres_get_ref(vres);
+vres->g = g;
 vres->res.width = c2d.width;
 vres->res.height = c2d.height;
 vres->res.format = c2d.format;
@@ -91,6 +148,8 @@ static void virgl_cmd_create_resource_3d(VirtIOGPU *g,
c3d.width, c3d.height, c3d.depth);
 
 vres = g_new0(struct virgl_gpu_resource, 1);
+vres_get_ref(vres);
+vres->g = g;
 vres->res.width = c3d.width;
 vres->res.height = c3d.height;
 vres->res.format = c3d.format;
@@ -126,12 +185,21 @@ static void virgl_cmd_resource_unref(VirtIOGPU *g,
 return;
 }
 
-virgl_renderer_resource_detach_iov(unref.resource_id, NULL, NULL);
-virgl_renderer_resource_unref(unref.resource_id);
+#ifdef HAVE_VIRGL_RESOURCE_BLOB
+if (vres->region) {
+VirtIOGPUBase *b = VIRTIO_GPU_BASE(g);
+MemoryRegion *mr = vres->region;
+
+warn_report("%s: blob resource %d not unmapped",
+__func__, unref.resource_id);
+vres->region = NULL;
+memory_region_set_enabled(mr, false);
+memory_region_del_subregion(>hostmem, mr);
+object_unparent(OBJECT(mr));
+}
+#endif /* HAVE_VIRGL_RESOURCE_BLOB */
 
-QTAILQ_REMOVE(>reslist, >res, next);
-virtio_gpu_cleanup_mapping(g, >res);
-g_free(vres);
+vres_put_ref(vres);
 }
 
 static void virgl_cmd_context_create(VirtIOGPU *g,
@@ -470,6 +538,191 @@ static void virgl_cmd_get_capset(VirtIOGPU *g,
 g_free(resp);
 }
 
+#ifdef HAVE_VIRGL_RESOURCE_BLOB
+
+static void virgl_resource_unmap(struct virgl_gpu_resource *vres)
+{
+if (!vres) {
+return;
+}
+
+virgl_renderer_resource_unmap(vres->res.resource_id);
+
+vres_put_ref(vres);
+}
+
+static void virgl_resource_blob_async_unmap(void *obj)
+{
+MemoryRegion *mr = MEMORY_REGION(obj);
+struct virgl_gpu_resource *vres = mr->opaque;
+
+virgl_resource_unmap(vres);
+
+g_free(obj);
+}
+
+static void virgl_cmd_resource_create_blob(VirtIOGPU *g,
+   struct virtio_gpu_ctrl_command *cmd)
+{
+struct virgl_gpu_resource *vres;
+struct virtio_gpu_resource_create_blob cblob;
+struct

[PATCH v6 08/11] virtio-gpu: Resource UUID

From: Antonio Caggiano 

Enable resource UUID feature and implement command resource assign UUID.
This is done by introducing a hash table to map resource IDs to their
UUIDs.

Signed-off-by: Antonio Caggiano 
Signed-off-by: Huang Rui 
---

Changes in v6:
- Set resource uuid as option.
- Implement optional subsection of vmstate_virtio_gpu_resource_uuid_state
  or virtio live migration.
- Use g_int_hash/g_int_equal instead of the default.
- Move virtio_vgpu_simple_resource initialization in the earlier new patch
  "virtio-gpu: Introduce virgl_gpu_resource structure"

 hw/display/trace-events|   1 +
 hw/display/virtio-gpu-base.c   |   4 ++
 hw/display/virtio-gpu-virgl.c  |   3 +
 hw/display/virtio-gpu.c| 119 +
 include/hw/virtio/virtio-gpu.h |   7 ++
 5 files changed, 134 insertions(+)

diff --git a/hw/display/trace-events b/hw/display/trace-events
index 2336a0ca15..54d6894c59 100644
--- a/hw/display/trace-events
+++ b/hw/display/trace-events
@@ -41,6 +41,7 @@ virtio_gpu_cmd_res_create_blob(uint32_t res, uint64_t size) 
"res 0x%x, size %" P
 virtio_gpu_cmd_res_unref(uint32_t res) "res 0x%x"
 virtio_gpu_cmd_res_back_attach(uint32_t res) "res 0x%x"
 virtio_gpu_cmd_res_back_detach(uint32_t res) "res 0x%x"
+virtio_gpu_cmd_res_assign_uuid(uint32_t res) "res 0x%x"
 virtio_gpu_cmd_res_xfer_toh_2d(uint32_t res) "res 0x%x"
 virtio_gpu_cmd_res_xfer_toh_3d(uint32_t res) "res 0x%x"
 virtio_gpu_cmd_res_xfer_fromh_3d(uint32_t res) "res 0x%x"
diff --git a/hw/display/virtio-gpu-base.c b/hw/display/virtio-gpu-base.c
index 37af256219..6bcee3882f 100644
--- a/hw/display/virtio-gpu-base.c
+++ b/hw/display/virtio-gpu-base.c
@@ -236,6 +236,10 @@ virtio_gpu_base_get_features(VirtIODevice *vdev, uint64_t 
features,
 features |= (1 << VIRTIO_GPU_F_CONTEXT_INIT);
 }
 
+if (virtio_gpu_resource_uuid_enabled(g->conf)) {
+features |= (1 << VIRTIO_GPU_F_RESOURCE_UUID);
+}
+
 return features;
 }
 
diff --git a/hw/display/virtio-gpu-virgl.c b/hw/display/virtio-gpu-virgl.c
index 5a3a292f79..be9da6e780 100644
--- a/hw/display/virtio-gpu-virgl.c
+++ b/hw/display/virtio-gpu-virgl.c
@@ -777,6 +777,9 @@ void virtio_gpu_virgl_process_cmd(VirtIOGPU *g,
 /* TODO add security */
 virgl_cmd_ctx_detach_resource(g, cmd);
 break;
+case VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID:
+virtio_gpu_resource_assign_uuid(g, cmd);
+break;
 case VIRTIO_GPU_CMD_GET_CAPSET_INFO:
 virgl_cmd_get_capset_info(g, cmd);
 break;
diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
index 8189c392dc..466debb256 100644
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -958,6 +958,37 @@ virtio_gpu_resource_detach_backing(VirtIOGPU *g,
 virtio_gpu_cleanup_mapping(g, res);
 }
 
+void virtio_gpu_resource_assign_uuid(VirtIOGPU *g,
+ struct virtio_gpu_ctrl_command *cmd)
+{
+struct virtio_gpu_simple_resource *res;
+struct virtio_gpu_resource_assign_uuid assign;
+struct virtio_gpu_resp_resource_uuid resp;
+QemuUUID *uuid;
+
+VIRTIO_GPU_FILL_CMD(assign);
+virtio_gpu_bswap_32(, sizeof(assign));
+trace_virtio_gpu_cmd_res_assign_uuid(assign.resource_id);
+
+res = virtio_gpu_find_check_resource(g, assign.resource_id, false, 
__func__, >error);
+if (!res) {
+return;
+}
+
+memset(, 0, sizeof(resp));
+resp.hdr.type = VIRTIO_GPU_RESP_OK_RESOURCE_UUID;
+
+uuid = g_hash_table_lookup(g->resource_uuids, _id);
+if (!uuid) {
+uuid = g_new(QemuUUID, 1);
+qemu_uuid_generate(uuid);
+g_hash_table_insert(g->resource_uuids, _id, uuid);
+}
+
+memcpy(resp.uuid, uuid, sizeof(QemuUUID));
+virtio_gpu_ctrl_response(g, cmd, , sizeof(resp));
+}
+
 void virtio_gpu_simple_process_cmd(VirtIOGPU *g,
struct virtio_gpu_ctrl_command *cmd)
 {
@@ -1006,6 +1037,9 @@ void virtio_gpu_simple_process_cmd(VirtIOGPU *g,
 case VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING:
 virtio_gpu_resource_detach_backing(g, cmd);
 break;
+case VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID:
+virtio_gpu_resource_assign_uuid(g, cmd);
+break;
 default:
 cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
 break;
@@ -1400,6 +1434,57 @@ static int virtio_gpu_blob_load(QEMUFile *f, void 
*opaque, size_t size,
 return 0;
 }
 
+static int virtio_gpu_resource_uuid_save(QEMUFile *f, void *opaque, size_t 
size,
+ const VMStateField *field,
+ JSONWriter *vmdesc)
+{
+VirtIOGPU *g = opaque;
+struct virtio_gpu_simple_resource *res;
+QemuUUID *uuid;
+
+/* in 2d mode we should never find unprocessed commands here */
+assert(QTAILQ_EMPTY(>cmdq));
+
+QTAILQ_FOREACH(res, >reslist, next) {
+qemu_put_be32(f, res->resource_id);
+uuid = g_hash_table_lookup(g->resource_uuids,

[PATCH v6 09/11] virtio-gpu: Support Venus capset

From: Antonio Caggiano 

Add support for the Venus capset, which enables Vulkan support through
the Venus Vulkan driver for virtio-gpu.

Signed-off-by: Antonio Caggiano 
Signed-off-by: Huang Rui 
---

No change in v6.

 hw/display/virtio-gpu-virgl.c | 21 +
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/hw/display/virtio-gpu-virgl.c b/hw/display/virtio-gpu-virgl.c
index be9da6e780..f35a751824 100644
--- a/hw/display/virtio-gpu-virgl.c
+++ b/hw/display/virtio-gpu-virgl.c
@@ -506,6 +506,11 @@ static void virgl_cmd_get_capset_info(VirtIOGPU *g,
 virgl_renderer_get_cap_set(resp.capset_id,
_max_version,
_max_size);
+} else if (info.capset_index == 2) {
+resp.capset_id = VIRTIO_GPU_CAPSET_VENUS;
+virgl_renderer_get_cap_set(resp.capset_id,
+   _max_version,
+   _max_size);
 } else {
 resp.capset_max_version = 0;
 resp.capset_max_size = 0;
@@ -978,10 +983,18 @@ int virtio_gpu_virgl_init(VirtIOGPU *g)
 
 int virtio_gpu_virgl_get_num_capsets(VirtIOGPU *g)
 {
-uint32_t capset2_max_ver, capset2_max_size;
+uint32_t capset2_max_ver, capset2_max_size, num_capsets;
+num_capsets = 1;
+
 virgl_renderer_get_cap_set(VIRTIO_GPU_CAPSET_VIRGL2,
-  _max_ver,
-  _max_size);
+   _max_ver,
+   _max_size);
+num_capsets += capset2_max_ver ? 1 : 0;
+
+virgl_renderer_get_cap_set(VIRTIO_GPU_CAPSET_VENUS,
+   _max_ver,
+   _max_size);
+num_capsets += capset2_max_size ? 1 : 0;
 
-return capset2_max_ver ? 2 : 1;
+return num_capsets;
 }
-- 
2.25.1

[PATCH v6 05/11] virtio-gpu: Introduce virgl_gpu_resource structure

Introduce a new virgl_gpu_resource data structure and helper functions
for virgl. It's used to add new member which is specific for virgl in
following patches of blob memory support.

Signed-off-by: Huang Rui 
---

New patch:
- Introduce new struct virgl_gpu_resource to store virgl specific members.
- Move resource initialization from path "virtio-gpu: Resource UUID" here.
- Remove error handling of g_new0, because glib will abort() on OOM.
- Set iov and iov_cnt in struct virtio_gpu_simple_resource for all types
  of resources.

 hw/display/virtio-gpu-virgl.c | 84 ++-
 1 file changed, 64 insertions(+), 20 deletions(-)

diff --git a/hw/display/virtio-gpu-virgl.c b/hw/display/virtio-gpu-virgl.c
index 5bbc8071b2..faab374336 100644
--- a/hw/display/virtio-gpu-virgl.c
+++ b/hw/display/virtio-gpu-virgl.c
@@ -22,6 +22,23 @@
 
 #include 
 
+struct virgl_gpu_resource {
+struct virtio_gpu_simple_resource res;
+};
+
+static struct virgl_gpu_resource *
+virgl_gpu_find_resource(VirtIOGPU *g, uint32_t resource_id)
+{
+struct virtio_gpu_simple_resource *res;
+
+res = virtio_gpu_find_resource(g, resource_id);
+if (!res) {
+return NULL;
+}
+
+return container_of(res, struct virgl_gpu_resource, res);
+}
+
 #if VIRGL_RENDERER_CALLBACKS_VERSION >= 4
 static void *
 virgl_get_egl_display(G_GNUC_UNUSED void *cookie)
@@ -35,11 +52,19 @@ static void virgl_cmd_create_resource_2d(VirtIOGPU *g,
 {
 struct virtio_gpu_resource_create_2d c2d;
 struct virgl_renderer_resource_create_args args;
+struct virgl_gpu_resource *vres;
 
 VIRTIO_GPU_FILL_CMD(c2d);
 trace_virtio_gpu_cmd_res_create_2d(c2d.resource_id, c2d.format,
c2d.width, c2d.height);
 
+vres = g_new0(struct virgl_gpu_resource, 1);
+vres->res.width = c2d.width;
+vres->res.height = c2d.height;
+vres->res.format = c2d.format;
+vres->res.resource_id = c2d.resource_id;
+QTAILQ_INSERT_HEAD(>reslist, >res, next);
+
 args.handle = c2d.resource_id;
 args.target = 2;
 args.format = c2d.format;
@@ -59,11 +84,19 @@ static void virgl_cmd_create_resource_3d(VirtIOGPU *g,
 {
 struct virtio_gpu_resource_create_3d c3d;
 struct virgl_renderer_resource_create_args args;
+struct virgl_gpu_resource *vres;
 
 VIRTIO_GPU_FILL_CMD(c3d);
 trace_virtio_gpu_cmd_res_create_3d(c3d.resource_id, c3d.format,
c3d.width, c3d.height, c3d.depth);
 
+vres = g_new0(struct virgl_gpu_resource, 1);
+vres->res.width = c3d.width;
+vres->res.height = c3d.height;
+vres->res.format = c3d.format;
+vres->res.resource_id = c3d.resource_id;
+QTAILQ_INSERT_HEAD(>reslist, >res, next);
+
 args.handle = c3d.resource_id;
 args.target = c3d.target;
 args.format = c3d.format;
@@ -82,19 +115,23 @@ static void virgl_cmd_resource_unref(VirtIOGPU *g,
  struct virtio_gpu_ctrl_command *cmd)
 {
 struct virtio_gpu_resource_unref unref;
-struct iovec *res_iovs = NULL;
-int num_iovs = 0;
+struct virgl_gpu_resource *vres;
 
 VIRTIO_GPU_FILL_CMD(unref);
 trace_virtio_gpu_cmd_res_unref(unref.resource_id);
 
-virgl_renderer_resource_detach_iov(unref.resource_id,
-   _iovs,
-   _iovs);
-if (res_iovs != NULL && num_iovs != 0) {
-virtio_gpu_cleanup_mapping_iov(g, res_iovs, num_iovs);
+vres = virgl_gpu_find_resource(g, unref.resource_id);
+if (!vres) {
+cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
+return;
 }
+
+virgl_renderer_resource_detach_iov(unref.resource_id, NULL, NULL);
 virgl_renderer_resource_unref(unref.resource_id);
+
+QTAILQ_REMOVE(>reslist, >res, next);
+virtio_gpu_cleanup_mapping(g, >res);
+g_free(vres);
 }
 
 static void virgl_cmd_context_create(VirtIOGPU *g,
@@ -310,44 +347,51 @@ static void virgl_resource_attach_backing(VirtIOGPU *g,
   struct virtio_gpu_ctrl_command *cmd)
 {
 struct virtio_gpu_resource_attach_backing att_rb;
-struct iovec *res_iovs;
-uint32_t res_niov;
+struct virgl_gpu_resource *vres;
 int ret;
 
 VIRTIO_GPU_FILL_CMD(att_rb);
 trace_virtio_gpu_cmd_res_back_attach(att_rb.resource_id);
 
+vres = virgl_gpu_find_resource(g, att_rb.resource_id);
+if (!vres) {
+cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
+return;
+}
+
 ret = virtio_gpu_create_mapping_iov(g, att_rb.nr_entries, sizeof(att_rb),
-cmd, NULL, _iovs, _niov);
+cmd, NULL, >res.iov,
+>res.iov_cnt);
 if (ret != 0) {
 cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
 return;
 }
 
 ret = virgl_renderer_resource_attach_iov(att_rb.resource_id,
-

[PATCH v6 06/11] softmmu/memory: enable automatic deallocation of memory regions

From: Xenia Ragiadakou 

When the memory region has a different life-cycle from that of her parent,
could be automatically released, once has been unparent and once all of her
references have gone away, via the object's free callback.

However, currently, the address space subsystem keeps references to the
memory region without first incrementing its object's reference count.
As a result, the automatic deallocation of the object, not taking into
account those references, results in use-after-free memory corruption.

More specifically, reference to the memory region is kept in flatview
ranges. If the reference count of the memory region is not incremented,
flatview_destroy(), that is asynchronous, may be called after memory
region's destruction. If the reference count of the memory region is
incremented, memory region's destruction will take place after
flatview_destroy() has released its references.

This patch increases the reference count of an owned memory region object
on each memory_region_ref() and decreases it on each memory_region_unref().

Signed-off-by: Xenia Ragiadakou 
Signed-off-by: Huang Rui 
---

Changes in v6:
- remove in-code comment because it is confusing and explain the issue,
  that the patch attempts to fix, with more details in commit message

 system/memory.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/system/memory.c b/system/memory.c
index 304fa843ea..4d5e7e7a4c 100644
--- a/system/memory.c
+++ b/system/memory.c
@@ -1824,6 +1824,7 @@ void memory_region_ref(MemoryRegion *mr)
  * we do not ref/unref them because it slows down DMA sensibly.
  */
 if (mr && mr->owner) {
+object_ref(OBJECT(mr));
 object_ref(mr->owner);
 }
 }
@@ -1832,6 +1833,7 @@ void memory_region_unref(MemoryRegion *mr)
 {
 if (mr && mr->owner) {
 object_unref(mr->owner);
+object_unref(OBJECT(mr));
 }
 }
 
-- 
2.25.1

[PATCH v6 03/11] virtio-gpu: Support context init feature with virglrenderer

Patch "virtio-gpu: CONTEXT_INIT feature" has added the context_init
feature flags.
We would like to enable the feature with virglrenderer, so add to create
virgl renderer context with flags using context_id when valid.

Originally-by: Antonio Caggiano 
Signed-off-by: Huang Rui 
---

Changes in v6:
- Handle the case while context_init is disabled.
- Enable context_init by default.

 hw/display/virtio-gpu-virgl.c | 13 +++--
 hw/display/virtio-gpu.c   |  4 
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/hw/display/virtio-gpu-virgl.c b/hw/display/virtio-gpu-virgl.c
index 8bb7a2c21f..5bbc8071b2 100644
--- a/hw/display/virtio-gpu-virgl.c
+++ b/hw/display/virtio-gpu-virgl.c
@@ -106,8 +106,17 @@ static void virgl_cmd_context_create(VirtIOGPU *g,
 trace_virtio_gpu_cmd_ctx_create(cc.hdr.ctx_id,
 cc.debug_name);
 
-virgl_renderer_context_create(cc.hdr.ctx_id, cc.nlen,
-  cc.debug_name);
+#ifdef HAVE_VIRGL_CONTEXT_CREATE_WITH_FLAGS
+if (cc.context_init && 
virtio_gpu_context_init_enabled(g->parent_obj.conf)) {
+virgl_renderer_context_create_with_flags(cc.hdr.ctx_id,
+ cc.context_init,
+ cc.nlen,
+ cc.debug_name);
+return;
+}
+#endif
+
+virgl_renderer_context_create(cc.hdr.ctx_id, cc.nlen, cc.debug_name);
 }
 
 static void virgl_cmd_context_destroy(VirtIOGPU *g,
diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
index b016d3bac8..8b2f4c6be3 100644
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -1619,6 +1619,10 @@ static Property virtio_gpu_properties[] = {
 DEFINE_PROP_BIT("blob", VirtIOGPU, parent_obj.conf.flags,
 VIRTIO_GPU_FLAG_BLOB_ENABLED, false),
 DEFINE_PROP_SIZE("hostmem", VirtIOGPU, parent_obj.conf.hostmem, 0),
+#ifdef HAVE_VIRGL_CONTEXT_CREATE_WITH_FLAGS
+DEFINE_PROP_BIT("context_init", VirtIOGPU, parent_obj.conf.flags,
+VIRTIO_GPU_FLAG_CONTEXT_INIT_ENABLED, true),
+#endif
 DEFINE_PROP_END_OF_LIST(),
 };
 
-- 
2.25.1

[PATCH v6 04/11] virtio-gpu: Don't require udmabuf when blobs and virgl are enabled

From: Dmitry Osipenko 

The udmabuf usage is mandatory when virgl is disabled and blobs feature
enabled in the Qemu machine configuration. If virgl and blobs are enabled,
then udmabuf requirement is optional. Since udmabuf isn't widely supported
by a popular Linux distros today, let's relax the udmabuf requirement for
blobs=on,virgl=on. Now, a full-featured virtio-gpu acceleration is
available to Qemu users without a need to have udmabuf available in the
system.

Reviewed-by: Antonio Caggiano 
Signed-off-by: Dmitry Osipenko 
Signed-off-by: Huang Rui 
---

No change in v6.

 hw/display/virtio-gpu.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
index 8b2f4c6be3..4c3ec9d0ea 100644
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -1443,6 +1443,7 @@ void virtio_gpu_device_realize(DeviceState *qdev, Error 
**errp)
 
 if (virtio_gpu_blob_enabled(g->parent_obj.conf)) {
 if (!virtio_gpu_rutabaga_enabled(g->parent_obj.conf) &&
+!virtio_gpu_virgl_enabled(g->parent_obj.conf) &&
 !virtio_gpu_have_udmabuf()) {
 error_setg(errp, "need rutabaga or udmabuf for blob resources");
 return;
-- 
2.25.1

[PATCH v6 02/11] virtio-gpu: Configure new feature flag context_create_with_flags for virglrenderer

Configure a new feature flag (context_create_with_flags) for
virglrenderer.

Originally-by: Antonio Caggiano 
Signed-off-by: Huang Rui 
---

Changes in v6:
- Move macros configurations under virgl.found() and rename
  HAVE_VIRGL_CONTEXT_CREATE_WITH_FLAGS.

 meson.build | 4 
 1 file changed, 4 insertions(+)

diff --git a/meson.build b/meson.build
index ec01f8b138..ea52ef1b9c 100644
--- a/meson.build
+++ b/meson.build
@@ -1050,6 +1050,10 @@ if not get_option('virglrenderer').auto() or have_system 
or have_vhost_user_gpu
  cc.has_member('struct 
virgl_renderer_resource_info_ext', 'd3d_tex2d',
prefix: '#include ',
dependencies: virgl))
+config_host_data.set('HAVE_VIRGL_CONTEXT_CREATE_WITH_FLAGS',
+ 
cc.has_function('virgl_renderer_context_create_with_flags',
+ prefix: '#include ',
+ dependencies: virgl))
   endif
 endif
 rutabaga = not_found
-- 
2.25.1

[PATCH v6 00/11] Support blob memory and venus on qemu

Hi all,

Sorry to late for V6, I was occupied by other stuff last two months, and
right now resume the submission.

Antonio Caggiano made the venus with QEMU on KVM platform last
September[1]. This series are inherited from his original work to support
the features of context init, hostmem, resource uuid, and blob resources
for venus.
At March of this year, we sent out the V1 version[2] for the review. But
those series are included both xen and virtio gpu. Right now, we would like
to divide into two parts, one is to continue the Antonio's work to upstream
virtio-gpu support for blob memory and venus, and another is to upstream
xen specific patches. This series is focusing on virtio-gpu, so we are
marking as V4 version here to continue Antonio's patches[1]. And we will
send xen specific patches separately, because they are hypervisor specific.
Besides of QEMU, these supports also included virglrenderer[3][4] and
mesa[5][6] as well. Right now, virglrenderer and mesa parts are all
accepted by upstream. In this qemu version, we try to address the concerns
around not proper cleanup during blob resource unmap and unref. Appreciate
it if you have any commments.

[1] 
https://lore.kernel.org/qemu-devel/20220926142422.22325-1-antonio.caggi...@collabora.com/
[2] V1: 
https://lore.kernel.org/qemu-devel/20230312092244.451465-1-ray.hu...@amd.com
[3] https://gitlab.freedesktop.org/virgl/virglrenderer/-/merge_requests/1068
[4] https://gitlab.freedesktop.org/virgl/virglrenderer/-/merge_requests/1180
[5] https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22108
[6] https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23680

Please note the first 4 patches 1 -> 4 are inlcuded in these series because
the series depends on them and not because we want them to be reviewed
since they are already in the process of review through the "rutabaga_gfx +
gfxstream" series.
- 
https://lore.kernel.org/qemu-devel/20230829003629.410-1-gurchetansi...@chromium.org/

V4: 
https://lore.kernel.org/qemu-devel/20230831093252.2461282-1-ray.hu...@amd.com
V5: https://lore.kernel.org/qemu-devel/2023091530.24064-1-ray.hu...@amd.com

Changes from V5 to V6

- Move macros configurations under virgl.found() and rename
  HAVE_VIRGL_CONTEXT_CREATE_WITH_FLAGS.

- Handle the case while context_init is disabled.

- Enable context_init by default.

- Move virtio_gpu_virgl_resource_unmap() into
  virgl_cmd_resource_unmap_blob().

- Introduce new struct virgl_gpu_resource to store virgl specific members.

- Remove erro handling of g_new0, because glib will abort() on OOM.

- Set resource uuid as option.

- Implement optional subsection of vmstate_virtio_gpu_resource_uuid_state
  for virtio live migration.

- Use g_int_hash/g_int_equal instead of the default

- Add scanout_blob function for virtio-gpu-virgl

- Resolve the memory leak on virtio-gpu-virgl

- Remove the unstable API flags check because virglrenderer is already 1.0

- Squash the render server flag support into "Initialize Venus"

Changes from V4 (virtio gpu V4) to V5

- Inverted patch 5 and 6 because we should configure
  HAVE_VIRGL_CONTEXT_INIT firstly.

- Validate owner of memory region to avoid slowing down DMA.

- Use memory_region_init_ram_ptr() instead of
  memory_region_init_ram_device_ptr().

- Adjust sequence to allocate gpu resource before virglrender resource
  creation

- Add virtio migration handling for uuid.

- Send kernel patch to define VIRTIO_GPU_CAPSET_VENUS.
  https://lore.kernel.org/lkml/20230915105918.3763061-1-ray.hu...@amd.com/

- Add meson check to make sure unstable APIs defined from 0.9.0.

Changes from V1 to V2 (virtio gpu V4)

- Remove unused #include "hw/virtio/virtio-iommu.h"

- Add a local function, called virgl_resource_destroy(), that is used
  to release a vgpu resource on error paths and in resource_unref.

- Remove virtio_gpu_virgl_resource_unmap from
  virtio_gpu_cleanup_mapping(),
  since this function won't be called on blob resources and also because
  blob resources are unmapped via virgl_cmd_resource_unmap_blob().

- In virgl_cmd_resource_create_blob(), do proper cleanup in error paths
  and move QTAILQ_INSERT_HEAD(>reslist, res, next) after the resource
  has been fully initialized.

- Memory region has a different life-cycle from virtio gpu resources
  i.e. cannot be released synchronously along with the vgpu resource.
  So, here the field "region" was changed to a pointer and is allocated
  dynamically when the blob is mapped.
  Also, since the pointer can be used to indicate whether the blob
  is mapped, the explicite field "mapped" was removed.

- In virgl_cmd_resource_map_blob(), add check on the value of
  res->region, to prevent beeing called twice on the same resource.

- Add a patch to enable automatic deallocation of memory regions to resolve
  use-after-free memory corruption with a reference.

References

Demo with Venus:
- 
https://static.sched.com/hosted_files/xen2023/3f/xen_summit_2023_virtgpu_demo.mp4
QEMU repository:
-

[PATCH v6 01/11] linux-headers: Update to kernel headers to add venus capset

Sync up kernel headers to update venus macro till they are merged into
mainline.

Signed-off-by: Huang Rui 
---

Changes in v6:
- Venus capset is applied in kernel, so update it in qemu for future use.

https://lore.kernel.org/lkml/b79dcf75-c9e8-490e-644f-3b97d95f7...@collabora.com/
https://cgit.freedesktop.org/drm-misc/commit/?id=216d86b9a430f3280e5b631c51e6fd1a7774cfa0

 include/standard-headers/linux/virtio_gpu.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/standard-headers/linux/virtio_gpu.h 
b/include/standard-headers/linux/virtio_gpu.h
index 2da48d3d4c..2db643ed8f 100644
--- a/include/standard-headers/linux/virtio_gpu.h
+++ b/include/standard-headers/linux/virtio_gpu.h
@@ -309,6 +309,8 @@ struct virtio_gpu_cmd_submit {
 
 #define VIRTIO_GPU_CAPSET_VIRGL 1
 #define VIRTIO_GPU_CAPSET_VIRGL2 2
+/* 3 is reserved for gfxstream */
+#define VIRTIO_GPU_CAPSET_VENUS 4
 
 /* VIRTIO_GPU_CMD_GET_CAPSET_INFO */
 struct virtio_gpu_get_capset_info {
-- 
2.25.1

[linux-linus test] 184170: tolerable FAIL - PUSHED

flight 184170 linux-linus real [real]
http://logs.test-lab.xenproject.org/osstest/logs/184170/

Failures :-/ but no regressions.

Tests which did not succeed, but are not blocking:
 test-armhf-armhf-libvirt   16 saverestore-support-check fail blocked in 184162
 test-amd64-amd64-xl-qemut-win7-amd64 19 guest-stopfail like 184162
 test-amd64-amd64-xl-qemuu-win7-amd64 19 guest-stopfail like 184162
 test-amd64-amd64-xl-qemuu-ws16-amd64 19 guest-stopfail like 184162
 test-armhf-armhf-libvirt-qcow2 15 saverestore-support-check   fail like 184162
 test-amd64-amd64-xl-qemut-ws16-amd64 19 guest-stopfail like 184162
 test-armhf-armhf-libvirt-raw 15 saverestore-support-checkfail  like 184162
 test-amd64-amd64-qemuu-nested-amd 20 debian-hvm-install/l1/l2 fail like 184162
 test-amd64-amd64-libvirt 15 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt-xsm 15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-thunderx 15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-thunderx 16 saverestore-support-checkfail   never pass
 test-arm64-arm64-libvirt-xsm 15 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-xsm 16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-credit2  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-credit2  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-credit1  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-credit1  16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-multivcpu 15 migrate-support-checkfail  never pass
 test-armhf-armhf-xl-multivcpu 16 saverestore-support-checkfail  never pass
 test-armhf-armhf-xl-rtds 15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-rtds 16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-credit1  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit1  16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-arndale  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-arndale  16 saverestore-support-checkfail   never pass
 test-amd64-amd64-libvirt-qcow2 14 migrate-support-checkfail never pass
 test-armhf-armhf-xl  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  16 saverestore-support-checkfail   never pass
 test-amd64-amd64-libvirt-raw 14 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-raw 14 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-raw 15 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-vhd  14 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-vhd  15 saverestore-support-checkfail   never pass
 test-armhf-armhf-libvirt-qcow2 14 migrate-support-checkfail never pass
 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check 
fail never pass
 test-armhf-armhf-xl-vhd  14 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-vhd  15 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-credit2  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit2  16 saverestore-support-checkfail   never pass
 test-armhf-armhf-libvirt 15 migrate-support-checkfail   never pass
 test-armhf-armhf-libvirt-raw 14 migrate-support-checkfail   never pass

version targeted for testing:
 linux2cf4f94d8e8646803f8fb0facf134b0cd7fb691a
baseline version:
 linuxceb6a6f023fd3e8b07761ed900352ef574010bcb

Last test of basis   184162  2023-12-18 02:29:46 Z1 days
Testing same since   184170  2023-12-18 19:42:38 Z0 days1 attempts


People who touched revisions under test:
  Benjamin Bigler 
  Linus Torvalds 
  Louis Chauvet 
  Manish Pandey 
  Mark Brown 
  Martin K. Petersen 
  Miquel Raynal 
  Nam Cao 
  Nitin Rawat 
  Ronald Wahl 

jobs:
 build-amd64-xsm  pass
 build-arm64-xsm  pass
 build-i386-xsm   pass
 build-amd64  pass
 build-arm64  pass
 build-armhf  pass
 build-i386   pass
 build-amd64-libvirt

[xen-unstable test] 184169: tolerable FAIL - PUSHED

flight 184169 xen-unstable real [real]
flight 184171 xen-unstable real-retest [real]
http://logs.test-lab.xenproject.org/osstest/logs/184169/
http://logs.test-lab.xenproject.org/osstest/logs/184171/

Failures :-/ but no regressions.

Tests which are failing intermittently (not blocking):
 test-armhf-armhf-xl-credit1  19 guest-start.2   fail pass in 184171-retest
 test-armhf-armhf-libvirt-qcow2 13 guest-start   fail pass in 184171-retest

Tests which did not succeed, but are not blocking:
 test-armhf-armhf-libvirt-qcow2 15 saverestore-support-check fail in 184171 
like 184161
 test-armhf-armhf-libvirt-qcow2 14 migrate-support-check fail in 184171 never 
pass
 test-armhf-armhf-libvirt 16 saverestore-support-checkfail  like 184161
 test-amd64-amd64-xl-qemut-win7-amd64 19 guest-stopfail like 184161
 test-amd64-i386-xl-qemuu-win7-amd64 19 guest-stop fail like 184161
 test-amd64-amd64-xl-qemuu-ws16-amd64 19 guest-stopfail like 184161
 test-amd64-i386-xl-qemut-ws16-amd64 19 guest-stop fail like 184161
 test-armhf-armhf-libvirt-raw 15 saverestore-support-checkfail  like 184161
 test-amd64-i386-xl-qemut-win7-amd64 19 guest-stop fail like 184161
 test-amd64-amd64-xl-qemut-ws16-amd64 19 guest-stopfail like 184161
 test-amd64-amd64-qemuu-nested-amd 20 debian-hvm-install/l1/l2 fail like 184161
 test-amd64-i386-xl-qemuu-ws16-amd64 19 guest-stop fail like 184161
 test-amd64-amd64-xl-qemuu-win7-amd64 19 guest-stopfail like 184161
 test-amd64-i386-libvirt-xsm  15 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt-xsm 15 migrate-support-checkfail   never pass
 test-amd64-i386-libvirt  15 migrate-support-checkfail   never pass
 test-amd64-i386-xl-pvshim14 guest-start  fail   never pass
 test-amd64-amd64-libvirt 15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-thunderx 15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-thunderx 16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-credit2  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-credit2  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-libvirt-xsm 15 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-xsm 16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-credit1  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-credit1  16 saverestore-support-checkfail   never pass
 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check 
fail never pass
 test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check 
fail never pass
 test-armhf-armhf-xl-credit2  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit2  16 saverestore-support-checkfail   never pass
 test-armhf-armhf-libvirt 15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-multivcpu 15 migrate-support-checkfail  never pass
 test-armhf-armhf-xl-multivcpu 16 saverestore-support-checkfail  never pass
 test-armhf-armhf-xl-rtds 15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-rtds 16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-credit1  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit1  16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-arndale  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-arndale  16 saverestore-support-checkfail   never pass
 test-amd64-i386-libvirt-raw  14 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-raw 14 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-raw 15 saverestore-support-checkfail   never pass
 test-amd64-amd64-libvirt-vhd 14 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-vhd  14 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-vhd  15 saverestore-support-checkfail   never pass
 test-armhf-armhf-libvirt-raw 14 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-vhd  14 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-vhd  15 saverestore-support-checkfail   never pass

version targeted for testing:
 xen  0cc74376d6823e0883f89556be2a267f2240a558
baseline version:
 xen

Re: [RFC PATCH v1 1/1] xen/Makefile: introduce ARCH_FIXED_CONFIG for randconfig

On Mon, 18 Dec 2023, Jan Beulich wrote:
> On 18.12.2023 17:07, Andrew Cooper wrote:
> > On 11/12/2023 3:56 pm, Jan Beulich wrote:
> >> On 07.12.2023 21:17, Andrew Cooper wrote:
> >>> On 07/12/2023 5:03 pm, Oleksii Kurochko wrote:
>  ARCH_FIXED_CONFIG is required in the case of randconfig
>  and CI for configs that aren't ready or are not
>  supposed to be implemented for specific architecture.
>  These configs should always be disabled to prevent randconfig
>  related tests from failing.
> 
>  Signed-off-by: Oleksii Kurochko 
>  ---
>   xen/Makefile | 5 -
>   1 file changed, 4 insertions(+), 1 deletion(-)
> 
>  diff --git a/xen/Makefile b/xen/Makefile
>  index ca571103c8..8ae8fe1480 100644
>  --- a/xen/Makefile
>  +++ b/xen/Makefile
>  @@ -336,11 +336,14 @@ ifeq ($(config-build),y)
>   # *config targets only - make sure prerequisites are updated, and 
>  descend
>   # in tools/kconfig to make the *config target
>   
>  +ARCH_FORCED_CONFIG := 
>  $(srctree)/arch/$(SRCARCH)/configs/randomforced.config
>  +
>   # Create a file for KCONFIG_ALLCONFIG which depends on the environment.
>   # This will be use by kconfig targets 
>  allyesconfig/allmodconfig/allnoconfig/randconfig
>   filechk_kconfig_allconfig = \
>   $(if $(findstring n,$(XEN_HAS_CHECKPOLICY)), echo 
>  'CONFIG_XSM_FLASK_POLICY=n';) \
>  -$(if $(KCONFIG_ALLCONFIG), cat $(KCONFIG_ALLCONFIG);) \
>  +$(if $(KCONFIG_ALLCONFIG), cat $(KCONFIG_ALLCONFIG); \
>  +$(if $(wildcard $(ARCH_FORCED_CONFIG)), cat $(ARCH_FORCED_CONFIG);) 
>  ) \
>   :
>   
>   .allconfig.tmp: FORCE
> >>> We already have infrastructure for this.  What's wrong with
> >>> EXTRA_FIXED_RANDCONFIG?
> >> What I don't understand here is why dealing with the issue would want
> >> limiting to gitlab-CI. Anyone could run randconfig on their own, and
> >> imo it would be helpful if the same issue(s) could be prevented there,
> >> too. Hence my earlier suggestion to have a snippet which can be used
> >> by "interested" parties. And once dealt with in e.g. the makefile
> >> there should not be a need for any overrides in the CI config anymore.
> > 
> > This is trying to find a solution to a problem which doesn't exist.
> > 
> > RISC-V and PPC are experimental in Xen.  Noone else is going to come and
> > randconfig them until they're rather more production ready, and a
> > prerequisite of that is removing this list of exclusions.
> > 
> > Until you can actually find an interested party to comment, I think this
> > is just churn for no useful improvement.  If nothing else, calling it
> > randomforced.config isn't appropriate given the explanation of what this
> > target is used for...
> 
> "random" in the name can't possibly be right anyway. Such collection of
> fixed settings would also be relevant to e.g. all{yes,no}config. Yet
> that's still not the same as any kind of "default" config, which the
> two architectures presently kind of abuse for the purpose of defining
> required-fixed settings.

One thing for sure, I don't think it would be a good idea to add extra
temporary Kconfig changes like these:

[1] 
https://lore.kernel.org/xen-devel/cdc20255540a66ba0b6946ac6d48c11029cd3385.1701453087.git.oleksii.kuroc...@gmail.com/
[2] 
https://lore.kernel.org/xen-devel/d42a34866edc70a12736b5c6976aa1b44b4ebd8a.1701453087.git.oleksii.kuroc...@gmail.com/


I agree with Andrew that RISC-V and PPC are experimental so whatever
works to enable them to make progress on this issue with a small effort
is sufficient. I would be happy with a quick respin of this series
following the gitlab-ci approach. This is good enough.

And I think that having some sort of fixed seed (seed.config?) for
randconfig would also be fine and potentially more reusable.

But I think Oleksii should go forward with whatever approach he prefers
and he is more comfortable with, as long as it is not [1] and [2].

Re: [XEN PATCH v2] xen: move declaration of first_valid_mfn to xen/numa.h

On Mon, 18 Dec 2023, Nicola Vetrini wrote:
> Such declaration is moved in order to provide it for Arm and PPC,
> whilst not violating MISRA C:2012 Rule 8.4 in common/page_alloc.c:
> "A compatible declaration shall be visible when an object or
> function with external linkage is defined".
> 
> Signed-off-by: Julien Grall 
> Signed-off-by: Nicola Vetrini 

Reviewed-by: Stefano Stabellini 


> ---
> Changes in v2:
> This patch is a rework of a previous one appeared in this series [1], of which
> patches 1 and 2 have been committed already.
> 
> The updated patch was provided by Julien in this thread [2]. I added the 
> commit
> message and the rest of the information.
> 
> [1] 
> https://lore.kernel.org/xen-devel/cover.1702285639.git.nicola.vetr...@bugseng.com/T/#mee6def855787d932fe2f10d5648d437dcb6f046c
> [2] 
> https://lore.kernel.org/xen-devel/cover.1702285639.git.nicola.vetr...@bugseng.com/T/#m3c5b141b806530b5920bb5e9dd53631195560317
> ---
>  xen/arch/arm/include/asm/numa.h | 6 --
>  xen/arch/ppc/include/asm/numa.h | 6 --
>  xen/common/page_alloc.c | 6 --
>  xen/include/xen/numa.h  | 2 ++
>  4 files changed, 6 insertions(+), 14 deletions(-)
> 
> diff --git a/xen/arch/arm/include/asm/numa.h b/xen/arch/arm/include/asm/numa.h
> index e2bee2bd8223..a2c1da4a82f7 100644
> --- a/xen/arch/arm/include/asm/numa.h
> +++ b/xen/arch/arm/include/asm/numa.h
> @@ -11,12 +11,6 @@ typedef u8 nodeid_t;
>  #define cpu_to_node(cpu) 0
>  #define node_to_cpumask(node)   (cpu_online_map)
>  
> -/*
> - * TODO: make first_valid_mfn static when NUMA is supported on Arm, this
> - * is required because the dummy helpers are using it.
> - */
> -extern mfn_t first_valid_mfn;
> -
>  /* XXX: implement NUMA support */
>  #define node_spanned_pages(nid) (max_page - mfn_x(first_valid_mfn))
>  #define node_start_pfn(nid) (mfn_x(first_valid_mfn))
> diff --git a/xen/arch/ppc/include/asm/numa.h b/xen/arch/ppc/include/asm/numa.h
> index 7fdf66c3da74..204180ad5b98 100644
> --- a/xen/arch/ppc/include/asm/numa.h
> +++ b/xen/arch/ppc/include/asm/numa.h
> @@ -10,12 +10,6 @@ typedef uint8_t nodeid_t;
>  #define cpu_to_node(cpu) 0
>  #define node_to_cpumask(node)   (cpu_online_map)
>  
> -/*
> - * TODO: make first_valid_mfn static when NUMA is supported on PPC, this
> - * is required because the dummy helpers are using it.
> - */
> -extern mfn_t first_valid_mfn;
> -
>  /* XXX: implement NUMA support */
>  #define node_spanned_pages(nid) (max_page - mfn_x(first_valid_mfn))
>  #define node_start_pfn(nid) (mfn_x(first_valid_mfn))
> diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
> index 9b5df74fddab..d874525916ea 100644
> --- a/xen/common/page_alloc.c
> +++ b/xen/common/page_alloc.c
> @@ -255,8 +255,10 @@ static PAGE_LIST_HEAD(page_broken_list);
>   */
>  
>  /*
> - * first_valid_mfn is exported because it is use in ARM specific NUMA
> - * helpers. See comment in arch/arm/include/asm/numa.h.
> + * first_valid_mfn is exported because it is used when !CONFIG_NUMA.
> + *
> + * TODO: Consider if we can conditionally export first_valid_mfn based
> + * on whether NUMA is selected.
>   */
>  mfn_t first_valid_mfn = INVALID_MFN_INITIALIZER;
>  
> diff --git a/xen/include/xen/numa.h b/xen/include/xen/numa.h
> index 287e81ff..a10d4b1778a0 100644
> --- a/xen/include/xen/numa.h
> +++ b/xen/include/xen/numa.h
> @@ -108,6 +108,8 @@ extern void numa_set_processor_nodes_parsed(nodeid_t 
> node);
>  
>  #else
>  
> +extern mfn_t first_valid_mfn;
> +
>  static inline nodeid_t mfn_to_nid(mfn_t mfn)
>  {
>  return 0;
> -- 
> 2.34.1
>

Re: [XEN PATCH v2 7/7] automation/eclair_analysis: avoid violation of MISRA Rule 2.1

On Mon, 18 Dec 2023, Stefano Stabellini wrote:
> On Mon, 18 Dec 2023, Nicola Vetrini wrote:
> > The presence of an unlinked object file triggers a violation
> > of MISRA C Rule 2.1, which is deviated, as it's not part of
> > the final Xen binary.
> > 
> > No functional change.
> > 
> > Signed-off-by: Nicola Vetrini 
> 
> Acked-by: Stefano Stabellini 
> 
> 
> > ---
> >  automation/eclair_analysis/ECLAIR/deviations.ecl | 5 +
> >  1 file changed, 5 insertions(+)
> > 
> > diff --git a/automation/eclair_analysis/ECLAIR/deviations.ecl 
> > b/automation/eclair_analysis/ECLAIR/deviations.ecl
> > index 85741a2c01a9..e3de0fb2adf8 100644
> > --- a/automation/eclair_analysis/ECLAIR/deviations.ecl
> > +++ b/automation/eclair_analysis/ECLAIR/deviations.ecl
> > @@ -28,6 +28,11 @@ not executable, and therefore it is safe for them to be 
> > unreachable."
> >  -config=MC3R1.R2.1,ignored_stmts+={"any()", "pure_decl()"}
> >  -doc_end
> >  
> > ++-doc_begin="The following autogenerated file is not linked deliberately."
> > ++-file_tag+={C_runtime_failures,"^automation/eclair_analysis/C-runtime-failures\\.rst\\.c$"}
> > ++-config=MC3R1.R2.1,reports+={deliberate, 
> > "any_area(any_loc(file(C_runtime_failures)))"}
> > ++-doc_end

Would it make sense to add it to exclude-list instead?

Re: [XEN PATCH] docs/misra: add entries to exclude-list

On Mon, 18 Dec 2023, Federico Serafini wrote:
> Exclude efibind.h for all the architectures: it is used to build the
> efi stub, which is a separate entry point for Xen when booted from EFI
> firmware.
> Remove redundant entries from out_of_scope.ecl.
> 
> Exclude common/coverage: it is code to support gcov, hence it is part
> of the testing machinery.
> 
> Exclude decompress.h: file ported from Linux that defines a unique and
> documented interface towards all the (adopted) decompress functions.
> 
> Signed-off-by: Federico Serafini 

Acked-by: Stefano Stabellini 


> ---
>  .../eclair_analysis/ECLAIR/out_of_scope.ecl  |  5 -
>  docs/misra/exclude-list.json | 16 
>  2 files changed, 16 insertions(+), 5 deletions(-)
> 
> diff --git a/automation/eclair_analysis/ECLAIR/out_of_scope.ecl 
> b/automation/eclair_analysis/ECLAIR/out_of_scope.ecl
> index fd870716cf..9bcec4c69d 100644
> --- a/automation/eclair_analysis/ECLAIR/out_of_scope.ecl
> +++ b/automation/eclair_analysis/ECLAIR/out_of_scope.ecl
> @@ -17,11 +17,6 @@
>  -file_tag+={out_of_scope,"^xen/arch/x86/include/asm/intel-family\\.h$"}
>  -doc_end
>  
> --doc_begin="Files imported from the gnu-efi package"
> --file_tag+={adopted,"^xen/include/efi/.*$"}
> --file_tag+={adopted,"^xen/arch/x86/include/asm/x86_64/efibind\\.h$"}
> --doc_end
> -
>  -doc_begin="Build tools are out of scope."
>  -file_tag+={out_of_scope_tools,"^xen/tools/.*$"}
>  -file_tag+={out_of_scope_tools,"^xen/arch/x86/efi/mkreloc\\.c$"}
> diff --git a/docs/misra/exclude-list.json b/docs/misra/exclude-list.json
> index 48f671c548..83fae0b4a4 100644
> --- a/docs/misra/exclude-list.json
> +++ b/docs/misra/exclude-list.json
> @@ -1,6 +1,10 @@
>  {
>  "version": "1.0",
>  "content": [
> +{
> +"rel_path": "arch/*/include/asm/*/efibind.h",
> +"comment": "Imported from gnu-efi package, ignore for now"
> +},
>  {
>  "rel_path": "arch/arm/arm64/cpufeature.c",
>  "comment": "Imported from Linux, ignore for now"
> @@ -97,6 +101,10 @@
>  "rel_path": "arch/x86/efi/check.c",
>  "comment": "The resulting code is not included in the final Xen 
> binary, ignore for now"
>  },
> +{
> +"rel_path": "common/coverage/*",
> +"comment": "Files to support gcov, ignore for now"
> +},
>  {
>  "rel_path": "common/bitmap.c",
>  "comment": "Imported from Linux, ignore for now"
> @@ -213,6 +221,14 @@
>  "rel_path": "include/xen/acpi.h",
>  "comment": "Imported from Linux, ignore for now"
>  },
> +{
> +"rel_path": "include/efi/*",
> +"comment": "Imported from gnu-efi package, ignore for now"
> +},
> +{
> +"rel_path": "include/xen/decompress.h",
> +"comment": "Imported from Linux, ignore for now"
> +},
>  {
>  "rel_path": "lib/list-sort.c",
>  "comment": "Imported from Linux, ignore for now"
> -- 
> 2.34.1
>

Re: [XEN PATCH v2 7/7] automation/eclair_analysis: avoid violation of MISRA Rule 2.1

On Mon, 18 Dec 2023, Nicola Vetrini wrote:
> The presence of an unlinked object file triggers a violation
> of MISRA C Rule 2.1, which is deviated, as it's not part of
> the final Xen binary.
> 
> No functional change.
> 
> Signed-off-by: Nicola Vetrini 

Acked-by: Stefano Stabellini 


> ---
>  automation/eclair_analysis/ECLAIR/deviations.ecl | 5 +
>  1 file changed, 5 insertions(+)
> 
> diff --git a/automation/eclair_analysis/ECLAIR/deviations.ecl 
> b/automation/eclair_analysis/ECLAIR/deviations.ecl
> index 85741a2c01a9..e3de0fb2adf8 100644
> --- a/automation/eclair_analysis/ECLAIR/deviations.ecl
> +++ b/automation/eclair_analysis/ECLAIR/deviations.ecl
> @@ -28,6 +28,11 @@ not executable, and therefore it is safe for them to be 
> unreachable."
>  -config=MC3R1.R2.1,ignored_stmts+={"any()", "pure_decl()"}
>  -doc_end
>  
> ++-doc_begin="The following autogenerated file is not linked deliberately."
> ++-file_tag+={C_runtime_failures,"^automation/eclair_analysis/C-runtime-failures\\.rst\\.c$"}
> ++-config=MC3R1.R2.1,reports+={deliberate, 
> "any_area(any_loc(file(C_runtime_failures)))"}
> ++-doc_end
> +
>  -doc_begin="Proving compliance with respect to Rule 2.2 is generally 
> impossible:
>  see https://arxiv.org/abs/2212.13933 for details. Moreover, peer review 
> gives us
>  confidence that no evidence of errors in the program's logic has been missed 
> due
> -- 
> 2.34.1
>

Re: [XEN PATCH v2 6/7] xen/arm: vcpreg: address violation of MISRA C Rule 2.1

On Mon, 18 Dec 2023, Nicola Vetrini wrote:
> There is no path that reaches the call to 'advance_pc', thus violating MISRA C
> Rule 2.1.
> A call to ASSERT_UNREACHABLE() is added after the switch, despite this being
> useful to detect errors only in debug builds; if that marker is ever reached,
> a domain crash is triggered, as a defensive coding measure.
> 
> No functional change.
> 
> Signed-off-by: Julien Grall 
> Signed-off-by: Nicola Vetrini 

Reviewed-by: Stefano Stabellini 


> ---
> The code changes (including the comment) were made by Julien in [1]; I added 
> the
> commit text and all other informations.
> 
> All the switch clauses, when expanded, end with a return statement
> and the default clause has an unconditional return, therefore
> advance_pc() is never reached.
> 
> However, it has been deemed safer to crash the domain if the switch is ever
> exited.
> 
> [1] 
> https://lore.kernel.org/xen-devel/alpine.DEB.2.22.394.2312151232580.3175268@ubuntu-linux-20-04-desktop/T/#maa91d8025532455a6317119a1e4affa00a99e1ce
> ---
>  xen/arch/arm/vcpreg.c | 10 --
>  1 file changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/xen/arch/arm/vcpreg.c b/xen/arch/arm/vcpreg.c
> index 39aeda9dab62..a2d050070473 100644
> --- a/xen/arch/arm/vcpreg.c
> +++ b/xen/arch/arm/vcpreg.c
> @@ -707,8 +707,14 @@ void do_cp10(struct cpu_user_regs *regs, const union hsr 
> hsr)
>  inject_undef_exception(regs, hsr);
>  return;
>  }
> -
> -advance_pc(regs, hsr);
> +
> +/*
> + * All the cases in the switch should return. If this is not the
> + * case, then something went wrong and it is best to crash the
> + * domain.
> + */
> +ASSERT_UNREACHABLE();
> +domain_crash(current->domain);
>  }
>  
>  void do_cp(struct cpu_user_regs *regs, const union hsr hsr)
> -- 
> 2.34.1
>

Re: [XEN PATCH v2 5/7] x86/platform: removed break to address MISRA C:2012 Rule 2.1

On Mon, 18 Dec 2023, Nicola Vetrini wrote:
> The break statement is redundant, hence it can be removed.
> 
> Signed-off-by: Nicola Vetrini 

Reviewed-by: Stefano Stabellini

Re: [XEN PATCH v2 4/7] xen/arm: traps: add ASSERT_UNREACHABLE() where needed

On Mon, 18 Dec 2023, Nicola Vetrini wrote:
> The statements after a call to the noreturn function 'do_unexpected_trap'
> can't be reached, thus violating MISRA C:2012 Rule 2.1
> ("A project shall not contain unreachable code.").
> ASSERT_UNREACHABLE() is used to signal that the unreachable break-s are used 
> as
> a defensive coding measure to prevent inadvertent fallthrough.
> 
> Signed-off-by: Nicola Vetrini 

Reviewed-by: Stefano Stabellini 


> ---
> Changes in v2:
> - Use ASSERT_UNREACHABLE() to prevent mistakes.
> ---
>  xen/arch/arm/traps.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c
> index 3784e8276ef6..77220ba0927a 100644
> --- a/xen/arch/arm/traps.c
> +++ b/xen/arch/arm/traps.c
> @@ -2152,6 +2152,7 @@ void do_trap_hyp_sync(struct cpu_user_regs *regs)
>  case HSR_EC_SVE:
>  /* An SVE exception is a bug somewhere in hypervisor code */
>  do_unexpected_trap("SVE trap at EL2", regs);
> +ASSERT_UNREACHABLE();
>  break;
>  #endif
>  case HSR_EC_DATA_ABORT_CURR_EL:
> @@ -2171,7 +2172,7 @@ void do_trap_hyp_sync(struct cpu_user_regs *regs)
>  dump_hyp_walk(get_hfar(is_data));
>  
>  do_unexpected_trap(fault, regs);
> -
> +ASSERT_UNREACHABLE();
>  break;
>  }
>  default:
> -- 
> 2.34.1
>

Re: [XEN PATCH v2 3/7] xen/arm: address MISRA C:2012 Rule 2.1

On Mon, 18 Dec 2023, Nicola Vetrini wrote:

> There are no paths that can reach the last return statement
> of function 'vgic_v3_its_mmio_write' in 'vcig-v3-its.c' and
> 'arch_memory_op' in 'arch/arm/mm.c', thus violating
> MISRA C:2012 Rule 2.1:
> "A project shall not contain unreachable code".
> 
> Therefore, an ASSERT_UNREACHABLE() is inserted to remove the unreachable
> return statement and protect against possible mistakes.
> 
> Signed-off-by: Nicola Vetrini 

Reviewed-by: Stefano Stabellini 


> ---
> Changes in v2:
> - Changed resolution strategy to have an ASSERT_UNREACHABLE() before
>   the return.
> ---
>  xen/arch/arm/mm.c  | 1 +
>  xen/arch/arm/vgic-v3-its.c | 1 +
>  2 files changed, 2 insertions(+)
> 
> diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c
> index eeb65ca6bb79..b15a18a49412 100644
> --- a/xen/arch/arm/mm.c
> +++ b/xen/arch/arm/mm.c
> @@ -283,6 +283,7 @@ long arch_memory_op(int op, XEN_GUEST_HANDLE_PARAM(void) 
> arg)
>  return -ENOSYS;
>  }
>  
> +ASSERT_UNREACHABLE();
>  return 0;
>  }
>  
> diff --git a/xen/arch/arm/vgic-v3-its.c b/xen/arch/arm/vgic-v3-its.c
> index 05429030b539..70b5aeb82219 100644
> --- a/xen/arch/arm/vgic-v3-its.c
> +++ b/xen/arch/arm/vgic-v3-its.c
> @@ -1409,6 +1409,7 @@ static int vgic_v3_its_mmio_write(struct vcpu *v, 
> mmio_info_t *info,
>  return 0;
>  }
>  
> +ASSERT_UNREACHABLE();
>  return 1;
>  
>  write_ignore_64:
> -- 
> 2.34.1
>

Re: [XEN PATCH v2 2/7] x86/mm: address MISRA C:2012 Rule 2.1

On Mon, 18 Dec 2023, Nicola Vetrini wrote:
> The "return 0" after the swich statement in 'xen/arch/x86/mm.c'
> is unreachable because all switch clauses end with returns, and
> can thus be dropped.
> 
> No functional changes.
> 
> Signed-off-by: Nicola Vetrini 

Reviewed-by: Stefano Stabellini

Re: [XEN PATCH v2 1/7] xen/shutdown: address MISRA C:2012 Rule 2.1

On Mon, 18 Dec 2023, Nicola Vetrini wrote:
> Given that 'hwdom_shutdown' is a noreturn function, unreachable
> breaks can be eliminated to resolve violations of Rule 2.1.
> 
> The rename s/maybe_reboot/reboot_or_halt/ is done to clarify
> that the function is noreturn.
> 
> No functional change.
> 
> Signed-off-by: Nicola Vetrini 


Reviewed-by: Stefano Stabellini

Re: [XEN PATCH v3] automation/eclair: add deviations for MISRA C:2012 Rule 16.3

On Mon, 18 Dec 2023, Federico Serafini wrote:
> MISRA C:2012 Rule 16.3 states that an unconditional break statement
> shall terminate every switch-clause.
> 
> Update ECLAIR configuration to take into account:
>   - continue, goto, return statements;
>   - functions with attribute noreturn;
>   - pseudo-keyword fallthrough;
>   - macro BUG();
>   - comments.
> 
> Update docs/misra/deviations.rst accordingly.
> 
> Signed-off-by: Federico Serafini 

Reviewed-by: Stefano Stabellini

Re: [XEN PATCH v2] automation/eclair: update configuration of MISRA C:2012 Rule 5.6

On Mon, 18 Dec 2023, Federico Serafini wrote:
> Deviate typedef names that are delberately defined multiple times.
> 
> Update docs/misra/deviations.rst accordingly.
> 
> Tag Rule 5.6 as clean.
> 
> Signed-off-by: Federico Serafini 

Reviewed-by: Stefano Stabellini

Re: [PATCH 9/9] xen: add SAF deviation for safe cast removal.

On Mon, 18 Dec 2023, Jan Beulich wrote:
> On 15.12.2023 22:02, Stefano Stabellini wrote:
> > On Fri, 15 Dec 2023, Jan Beulich wrote:
> >> On 14.12.2023 23:04, Stefano Stabellini wrote:
> >>> On Thu, 14 Dec 2023, Jan Beulich wrote:
>  On 14.12.2023 13:07, Simone Ballarin wrote:
> > --- a/docs/misra/safe.json
> > +++ b/docs/misra/safe.json
> > @@ -28,6 +28,14 @@
> >  },
> >  {
> >  "id": "SAF-3-safe",
> > +"analyser": {
> > +"eclair": "MC3R1.R11.8"
> > +},
> > +"name": "MC3R1.R11.8: removal of const qualifier to comply 
> > with function signature",
> > +"text": "It is safe to cast away const qualifiers to 
> > comply with function signature if the function does not modify the 
> > pointee."
> 
>  I'm not happy with this description, as it invites for all sorts of 
>  abuse.
>  Yet I'm also puzzled that ...
> >>>
> >>> We can improve the language but the concept would still be the same. For
> >>> instance:
> >>>
> >>> A single function might or might not modify the pointee depending on
> >>> other function parameters (for instance a single function that could
> >>> either read or write depending on how it is called). It is safe to cast
> >>> away const qualifiers when passing a parameter to a function of this
> >>> type when the other parameters are triggering a read-only operation.
> >>
> >> Right, but I think the next here needs to be setting as tight boundaries
> >> as possible: It should cover only this one specific pattern. Anything
> >> else would better get its own deviation, imo.
> > 
> > OK. What about:
> > 
> > A single function might or might not modify the pointee depending on
> > other function parameters, for instance a common pattern is to implement
> > one function that could either read or write depending on how it is
> > called. It is safe to cast away const qualifiers when passing a
> > parameter to a function following this pattern when the other parameters
> > are triggering a read-only operation.
> > 
> > Feel free to suggest a better wording.
> 
> Well, my point was to get rid of "for instance" and "common pattern" (and
> anything alike). E.g.:
> 
> "A single function could either read or write through a passed in pointer,
>  depending on how it is called. It is deemed safe to cast away a const
>  qualifier when passing a pointer to such a function, when the other
>  parameters guarantee read-only operation."

That's fine by me

Re: Possible bug in Xen

On 18/12/2023 11:36 pm, Joe Tretter wrote:
> Hi Andrew,
>
> I have tried it with your suggested command line, and it "feels" like
> it makes the issue occur less often (but one never knows with sparse
> intermittent issues).
> The first time I ran my test, it took 23 minutes before the error
> occurred.
> However, I picked that when running two processes simultaneously, the
> issue occurs reasonably fast.

Do you mean running the unit tests twice simultaneously?  Or something else.

>
> I have, however, not been able to verify during runtime that the
> parameter really arrived in Xen because /proc/cmdline didn't show the
> Xen parameters. But I have taken a screen shot of what I have done to
> submit the parameter.
>
> If you tell me how I can view the boot options in a running system to
> be certain the option has is effective, I'm happy to run the test again.
>
> Side remark: To make reasonably sure the issue really only happens
> with Xen, I have re-verified that without Xen the issue does not
> exist, even with two instances for more than 1.5 hours.

Running `xl info` in dom0 will tell you various things, one of which is
xen_cmdline.  This is the command line that Xen is currently booted with.

~Andrew

[xen-4.17-testing test] 184165: tolerable FAIL - PUSHED

flight 184165 xen-4.17-testing real [real]
http://logs.test-lab.xenproject.org/osstest/logs/184165/

Failures :-/ but no regressions.

Tests which did not succeed, but are not blocking:
 test-armhf-armhf-libvirt 16 saverestore-support-checkfail  like 184109
 test-amd64-i386-xl-qemuu-win7-amd64 19 guest-stop fail like 184109
 test-amd64-i386-xl-qemut-win7-amd64 19 guest-stop fail like 184109
 test-amd64-amd64-xl-qemuu-win7-amd64 19 guest-stopfail like 184109
 test-amd64-amd64-xl-qemut-win7-amd64 19 guest-stopfail like 184109
 test-armhf-armhf-libvirt-qcow2 15 saverestore-support-check   fail like 184109
 test-amd64-i386-xl-qemuu-ws16-amd64 19 guest-stop fail like 184109
 test-armhf-armhf-libvirt-raw 15 saverestore-support-checkfail  like 184109
 test-amd64-amd64-xl-qemut-ws16-amd64 19 guest-stopfail like 184109
 test-amd64-i386-xl-qemut-ws16-amd64 19 guest-stop fail like 184109
 test-amd64-amd64-xl-qemuu-ws16-amd64 19 guest-stopfail like 184109
 test-amd64-amd64-qemuu-nested-amd 20 debian-hvm-install/l1/l2 fail like 184109
 test-arm64-arm64-xl-thunderx 15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-thunderx 16 saverestore-support-checkfail   never pass
 test-amd64-i386-xl-pvshim14 guest-start  fail   never pass
 test-amd64-i386-libvirt  15 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt-xsm 15 migrate-support-checkfail   never pass
 test-amd64-i386-libvirt-xsm  15 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt 15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-libvirt-xsm 15 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-xsm 16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-credit1  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-credit1  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl  16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-vhd  14 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-vhd  15 saverestore-support-checkfail   never pass
 test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check 
fail never pass
 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check 
fail never pass
 test-amd64-amd64-libvirt-vhd 14 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-multivcpu 15 migrate-support-checkfail  never pass
 test-armhf-armhf-xl-multivcpu 16 saverestore-support-checkfail  never pass
 test-armhf-armhf-xl-arndale  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-arndale  16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-rtds 15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-rtds 16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-credit2  15 migrate-support-checkfail   never pass
 test-armhf-armhf-libvirt 15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-credit2  16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-credit2  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit2  16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-credit1  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit1  16 saverestore-support-checkfail   never pass
 test-amd64-i386-libvirt-raw  14 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-raw 14 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-raw 15 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-vhd  14 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-vhd  15 saverestore-support-checkfail   never pass
 test-armhf-armhf-libvirt-qcow2 14 migrate-support-checkfail never pass
 test-armhf-armhf-libvirt-raw 14 migrate-support-checkfail   never pass

version targeted for testing:
 xen  949a4aad417621638231e4cf9f1b35e8e0328463
baseline version:
 xen  958706fd2e178ffe8e5597b05b694b494e24258b

Last test of basis   184109  2023-12-12 14:07:07 Z6 days
Testing same since   184165  2023-12-18 12:38:50 Z0 days1 attempts


People who touched revisions under test:
  Jan Beulich 

jobs:
 build-amd64-xsm

[PATCH v2 3/3] xen/arm: Introduce "partial-emulation" xen command line option

2023-12-18 Thread Ayan Kumar Halder

This option is used to enable/disable partial emulation of registers at runtime.
While CONFIG_PARTIAL_EMULATION enables support for partial emulation at compile
time (ie adds code for partial emulation), this option may be enabled or
disabled by Yocto or other build systems.
However, customers can use scripts like Imagebuilder to generate uboot script
for booting Xen. These scripts can use "partial-emulation=true" to support this
at runtime.

This option is set to false by default so that customers are fully aware when
they enable partial emulation. They can also disable it without the need to
rebuild Xen.

Signed-off-by: Ayan Kumar Halder 
---
Changes from v1:-

1. New patch introduced in v2.

 docs/misc/xen-command-line.pandoc |  7 +++
 xen/arch/arm/arm64/vsysreg.c  |  5 -
 xen/arch/arm/include/asm/regs.h   |  6 ++
 xen/arch/arm/traps.c  |  3 +++
 xen/arch/arm/vcpreg.c | 17 +++--
 5 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/docs/misc/xen-command-line.pandoc 
b/docs/misc/xen-command-line.pandoc
index 8e65f8bd18..dd2a76fb19 100644
--- a/docs/misc/xen-command-line.pandoc
+++ b/docs/misc/xen-command-line.pandoc
@@ -1949,6 +1949,13 @@ This option is ignored in **pv-shim** mode.
 
 > Default: `on`
 
+### partial-emulation (arm)
+> `= `
+
+> Default: `false`
+
+Flag to enable or disable partial emulation of registers
+
 ### pci
 = List of [ serr=, perr= ]
 
diff --git a/xen/arch/arm/arm64/vsysreg.c b/xen/arch/arm/arm64/vsysreg.c
index 0fa8716884..02497c9fef 100644
--- a/xen/arch/arm/arm64/vsysreg.c
+++ b/xen/arch/arm/arm64/vsysreg.c
@@ -192,7 +192,10 @@ void do_sysreg(struct cpu_user_regs *regs,
 case HSR_SYSREG_DBGDTR_EL0:
 /* DBGDTR[TR]X_EL0 share the same encoding */
 case HSR_SYSREG_DBGDTRTX_EL0:
-return handle_raz_wi(regs, regidx, hsr.sysreg.read, hsr, 0);
+if ( opt_partial_emulation )
+return handle_raz_wi(regs, regidx, hsr.sysreg.read, hsr, 0);
+else
+goto fail;
 #endif
 
 HSR_SYSREG_DBG_CASES(DBGBVR):
diff --git a/xen/arch/arm/include/asm/regs.h b/xen/arch/arm/include/asm/regs.h
index f998aedff5..b71fa20f91 100644
--- a/xen/arch/arm/include/asm/regs.h
+++ b/xen/arch/arm/include/asm/regs.h
@@ -13,6 +13,12 @@
 
 #define psr_mode(psr,m) (((psr) & PSR_MODE_MASK) == (m))
 
+/*
+ * opt_partial_emulation: If true, partial emulation for registers will be
+ * enabled.
+ */
+extern bool opt_partial_emulation;
+
 static inline bool regs_mode_is_32bit(const struct cpu_user_regs *regs)
 {
 #ifdef CONFIG_ARM_32
diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c
index f5ab555b19..c8c00d2dd5 100644
--- a/xen/arch/arm/traps.c
+++ b/xen/arch/arm/traps.c
@@ -42,6 +42,9 @@
 #include 
 #include 
 
+bool opt_partial_emulation = false;
+boolean_param("partial-emulation", opt_partial_emulation);
+
 /* The base of the stack must always be double-word aligned, which means
  * that both the kernel half of struct cpu_user_regs (which is pushed in
  * entry.S) and struct cpu_info (which lives at the bottom of a Xen
diff --git a/xen/arch/arm/vcpreg.c b/xen/arch/arm/vcpreg.c
index 52a8732423..6bf417487a 100644
--- a/xen/arch/arm/vcpreg.c
+++ b/xen/arch/arm/vcpreg.c
@@ -578,12 +578,17 @@ void do_cp14_32(struct cpu_user_regs *regs, const union 
hsr hsr)
 #ifdef CONFIG_PARTIAL_EMULATION
 case HSR_CPREG32(DBGDTRTXINT):
 {
-/*
- * As DBGDSCRINT is emulated which is architecturally mapped to AArch64
- * register MDCCSR_EL0. MDSCR_EL1 is not emulated. So DBGDTR[TR]XINT 
can
- * only be accessed as EL0 level.
- */
-return handle_raz_wi(regs, regidx, cp32.read, hsr, 0);
+if ( opt_partial_emulation )
+{
+/*
+ * As DBGDSCRINT is emulated which is architecturally mapped to
+ * AArch64 register MDCCSR_EL0. MDSCR_EL1 is not emulated. So
+ * DBGDTR[TR]XINT can only be accessed as EL0 level.
+ */
+return handle_raz_wi(regs, regidx, cp32.read, hsr, 0);
+}
+else
+goto fail;
 }
 #endif
 
-- 
2.25.1

[PATCH v2 2/3] xen: arm: Introduce CONFIG_PARTIAL_EMULATION

2023-12-18 Thread Ayan Kumar Halder

There are can be situations when the registers cannot be emulated to its full
functionality. This can be due to the complexity involved. In such cases, we can
emulate those registers as RAZ/WI.
A suitable example of this is DBGDTRTX_EL0 (on Arm64) and DBGDTRTXINT(on Arm32).
As this register is not optional, guests may try to access this. Currently, this
would result in a crash. With this patch, Xen will emulated this as RAZ/WI and
the crash will be avoided.
Such partial emulations will be enclosed within CONFIG_PARTIAL_EMULATION.

Also "CONFIG_PARTIAL_EMULATION" is default to y, so that Xen does not need to be
rebuilt in order to prevent guest from crashing while accessing registers like
DBGDTRTX_EL0.

Signed-off-by: Ayan Kumar Halder 
---
Changes from v1:-

1. New patch introduced in v2.

 xen/arch/arm/Kconfig | 8 
 xen/arch/arm/arm64/vsysreg.c | 3 +++
 xen/arch/arm/vcpreg.c| 2 ++
 3 files changed, 13 insertions(+)

diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
index 50e9bfae1a..8f25d9cba0 100644
--- a/xen/arch/arm/Kconfig
+++ b/xen/arch/arm/Kconfig
@@ -225,6 +225,14 @@ config STATIC_EVTCHN
  This option enables establishing static event channel communication
  between domains on a dom0less system (domU-domU as well as domU-dom0).
 
+config PARTIAL_EMULATION
+bool "Enable partial emulation for registers"
+default y
+help
+  This option enabled partial emulation for registers to avoid guests
+  crashing when accessing registers which are not optional but has not been
+  emulated to its complete functionality.
+
 endmenu
 
 menu "ARM errata workaround via the alternative framework"
diff --git a/xen/arch/arm/arm64/vsysreg.c b/xen/arch/arm/arm64/vsysreg.c
index ebeb83dd65..0fa8716884 100644
--- a/xen/arch/arm/arm64/vsysreg.c
+++ b/xen/arch/arm/arm64/vsysreg.c
@@ -188,10 +188,13 @@ void do_sysreg(struct cpu_user_regs *regs,
 return handle_ro_read_val(regs, regidx, hsr.sysreg.read, hsr, 0,
   1U << 29);
 }
+#ifdef CONFIG_PARTIAL_EMULATION
 case HSR_SYSREG_DBGDTR_EL0:
 /* DBGDTR[TR]X_EL0 share the same encoding */
 case HSR_SYSREG_DBGDTRTX_EL0:
 return handle_raz_wi(regs, regidx, hsr.sysreg.read, hsr, 0);
+#endif
+
 HSR_SYSREG_DBG_CASES(DBGBVR):
 HSR_SYSREG_DBG_CASES(DBGBCR):
 HSR_SYSREG_DBG_CASES(DBGWVR):
diff --git a/xen/arch/arm/vcpreg.c b/xen/arch/arm/vcpreg.c
index 5087125111..52a8732423 100644
--- a/xen/arch/arm/vcpreg.c
+++ b/xen/arch/arm/vcpreg.c
@@ -575,6 +575,7 @@ void do_cp14_32(struct cpu_user_regs *regs, const union hsr 
hsr)
 case HSR_CPREG32(DBGOSLSR):
  return handle_ro_read_val(regs, regidx, cp32.read, hsr, 1, 1 << 3);
 
+#ifdef CONFIG_PARTIAL_EMULATION
 case HSR_CPREG32(DBGDTRTXINT):
 {
 /*
@@ -584,6 +585,7 @@ void do_cp14_32(struct cpu_user_regs *regs, const union hsr 
hsr)
  */
 return handle_raz_wi(regs, regidx, cp32.read, hsr, 0);
 }
+#endif
 
 case HSR_CPREG32(DBGVCR):
 case HSR_CPREG32(DBGBVR0):
-- 
2.25.1

[PATCH v2 1/3] xen/arm: Add emulation of Debug Data Transfer Registers

2023-12-18 Thread michal.orzel

From: Michal Orzel 

Currently if user enables HVC_DCC config option in Linux, it invokes
access to debug data transfer registers (ie MDCCSR_EL0 on arm64,
DBGDTRTXINT on arm32). As these registers are not emulated, Xen injects
an undefined exception to the guest. And Linux crashes.

We wish to avoid this crash by adding a "partial" emulation. DBGDTR_EL0
is emulated as TXfull.
Refer ARM DDI 0487I.a ID081822, D17.3.8, DBGDTRTX_EL0
"If TXfull is set to 1, set DTRRX and DTRTX to UNKNOWN"

Thus, any OS is expected to read DBGDTR_EL0 and check for TXfull
before using DBGDTRTX_EL0. Linux does it via hvc_dcc_init() --->
hvc_dcc_check() , it returns -ENODEV. In this way, we are preventing
the guest to be aborted.

We also emulate DBGDTRTX_EL0 as RAZ/WI.

We have added emulation for AArch32 variant of these registers as well.

Also in the case of AArch32, DBGOSLSR is emulated in the same way as
its AArch64 variant (ie OSLSR_EL1). This is to ensure that
DBGOSLSR.OSLK is 0, thus MDSCR_EL1.TXfull is treated as UNK/SBZP.
Thus only MDCCSR_EL0 can be emulated (which is DBGDTRTXINT on arm32).
So, DBGDTR[TR]XINT is emulated as RAZ/WI.

Signed-off-by: Michal Orzel 
Signed-off-by: Ayan Kumar Halder 
---
Changes from 

v1 :- 1. DBGDTR_EL0 does not emulate RXfull. This is to avoid giving the OS any
indication that the RX buffer is full and is waiting to be read.

2. In Arm32, DBGOSLSR is emulated. Also DBGDTRTXINT is emulated at EL0 only.

3. Fixed the commit message and inline code comments.

 xen/arch/arm/arm64/vsysreg.c | 26 +++
 xen/arch/arm/include/asm/arm64/hsr.h |  3 +++
 xen/arch/arm/include/asm/cpregs.h|  2 ++
 xen/arch/arm/include/asm/traps.h |  4 +++
 xen/arch/arm/traps.c | 18 +
 xen/arch/arm/vcpreg.c| 38 +++-
 6 files changed, 79 insertions(+), 12 deletions(-)

diff --git a/xen/arch/arm/arm64/vsysreg.c b/xen/arch/arm/arm64/vsysreg.c
index b5d54c569b..ebeb83dd65 100644
--- a/xen/arch/arm/arm64/vsysreg.c
+++ b/xen/arch/arm/arm64/vsysreg.c
@@ -159,9 +159,6 @@ void do_sysreg(struct cpu_user_regs *regs,
  *
  * Unhandled:
  *MDCCINT_EL1
- *DBGDTR_EL0
- *DBGDTRRX_EL0
- *DBGDTRTX_EL0
  *OSDTRRX_EL1
  *OSDTRTX_EL1
  *OSECCR_EL1
@@ -172,11 +169,29 @@ void do_sysreg(struct cpu_user_regs *regs,
 case HSR_SYSREG_MDSCR_EL1:
 return handle_raz_wi(regs, regidx, hsr.sysreg.read, hsr, 1);
 case HSR_SYSREG_MDCCSR_EL0:
+{
 /*
+ * Xen doesn't expose a real (or emulated) Debug Communications Channel
+ * (DCC) to a domain. Yet the Arm ARM implies this is not an optional
+ * feature. So some domains may start to probe it. For instance, the
+ * HVC_DCC driver in Linux (since f35dc083 and at least up to 
v6.7),
+ * will try to write some characters and check if the transmit buffer
+ * has emptied. By setting TX status bit to indicate the transmit 
buffer
+ * is full. This we would hint the OS that the DCC is probably not
+ * working.
+ *
+ * Bit 29: TX full
+ *
  * Accessible at EL0 only if MDSCR_EL1.TDCC is set to 0. We emulate 
that
  * register as RAZ/WI above. So RO at both EL0 and EL1.
  */
-return handle_ro_raz(regs, regidx, hsr.sysreg.read, hsr, 0);
+return handle_ro_read_val(regs, regidx, hsr.sysreg.read, hsr, 0,
+  1U << 29);
+}
+case HSR_SYSREG_DBGDTR_EL0:
+/* DBGDTR[TR]X_EL0 share the same encoding */
+case HSR_SYSREG_DBGDTRTX_EL0:
+return handle_raz_wi(regs, regidx, hsr.sysreg.read, hsr, 0);
 HSR_SYSREG_DBG_CASES(DBGBVR):
 HSR_SYSREG_DBG_CASES(DBGBCR):
 HSR_SYSREG_DBG_CASES(DBGWVR):
@@ -393,7 +408,8 @@ void do_sysreg(struct cpu_user_regs *regs,
  *
  * And all other unknown registers.
  */
-default:
+default: goto fail;
+fail:
 {
 const struct hsr_sysreg sysreg = hsr.sysreg;
 
diff --git a/xen/arch/arm/include/asm/arm64/hsr.h 
b/xen/arch/arm/include/asm/arm64/hsr.h
index e691d41c17..1495ccddea 100644
--- a/xen/arch/arm/include/asm/arm64/hsr.h
+++ b/xen/arch/arm/include/asm/arm64/hsr.h
@@ -47,6 +47,9 @@
 #define HSR_SYSREG_OSDLR_EL1  HSR_SYSREG(2,0,c1,c3,4)
 #define HSR_SYSREG_DBGPRCR_EL1HSR_SYSREG(2,0,c1,c4,4)
 #define HSR_SYSREG_MDCCSR_EL0 HSR_SYSREG(2,3,c0,c1,0)
+#define HSR_SYSREG_DBGDTR_EL0 HSR_SYSREG(2,3,c0,c4,0)
+#define HSR_SYSREG_DBGDTRTX_EL0   HSR_SYSREG(2,3,c0,c5,0)
+#define HSR_SYSREG_DBGDTRRX_EL0   HSR_SYSREG(2,3,c0,c5,0)
 
 #define HSR_SYSREG_DBGBVRn_EL1(n) HSR_SYSREG(2,0,c0,c##n,4)
 #define HSR_SYSREG_DBGBCRn_EL1(n) HSR_SYSREG(2,0,c0,c##n,5)
diff --git a/xen/arch/arm/include/asm/cpregs.h 
b/xen/arch/arm/include/asm/cpregs.h
index 6b083de204..aec9e8f329 100644
--- a/xen/arch/arm/include/asm/cpregs.h
+++ b/xen/arch/arm/include/asm/cpregs.h
@@ -75,6 +75,8 @@
 #define

[PATCH v2 0/3] xen/arm: Add emulation of Debug Data Transfer Registers

2023-12-18 Thread Ayan Kumar Halder

Hi,

Refer 
https://lore.kernel.org/all/alpine.DEB.2.22.394.2312071341540.1265976@ubuntu-linux-20-04-desktop/T/
for the previous discussion on this issue.

Also, the linux earlycon hvc driver has been fixed.
See 
https://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty.git/commit/?h=tty-next=0ec058ece2f933aed66b76bd5cb9b5e6764853c3

Changes from v1:-
1. Split the change across 3 patches as per the design consensus.

Ayan Kumar Halder (3):
  xen/arm: Add emulation of Debug Data Transfer Registers
  xen: arm: Introduce CONFIG_PARTIAL_EMULATION
  xen/arm: Introduce "partial-emulation" xen command line option

 docs/misc/xen-command-line.pandoc|  7 +
 xen/arch/arm/Kconfig |  8 +
 xen/arch/arm/arm64/vsysreg.c | 32 ---
 xen/arch/arm/include/asm/arm64/hsr.h |  3 ++
 xen/arch/arm/include/asm/cpregs.h|  2 ++
 xen/arch/arm/include/asm/regs.h  |  6 
 xen/arch/arm/include/asm/traps.h |  4 +++
 xen/arch/arm/traps.c | 21 +
 xen/arch/arm/vcpreg.c| 47 +++-
 9 files changed, 117 insertions(+), 13 deletions(-)

-- 
2.25.1

Re: [BUG]i2c_hid_acpi broken with 4.17.2 on Framework Laptop 13 AMD

2023-12-18 Thread Sébastien Chaumat

Le lun. 18 déc. 2023 à 18:04, Sébastien Chaumat  a écrit :
>
>
>
> Le lun. 18 déc. 2023, 17:44, Jan Beulich  a écrit :
>>
>> On 18.12.2023 17:21, Sébastien Chaumat wrote:
>> > On 05.12.2023 21:31, Sébastien Chaumat wrote:
>> >>> This issue seems that IRQ 7 (the GPIO controller) is natively fasteoi
>> >>> (so level type) while in xen it  is mapped to oapic-edge  instead of
>> >>> oapic-level
>> >>> as the SSDT indicates :
>> >>>
>> >>>  Device (GPIO)
>> >>>
>> >>>  {
>> >>>  Name (_HID, "AMDI0030")  // _HID: Hardware ID
>> >>>  Name (_CID, "AMDI0030")  // _CID: Compatible ID
>> >>>  Name (_UID, Zero)  // _UID: Unique ID
>> >>>  Method (_CRS, 0, NotSerialized)  // _CRS: Current Resource 
>> >>> Settings
>> >>>  {
>> >>>  Name (RBUF, ResourceTemplate ()
>> >>>  {
>> >>>  Interrupt (ResourceConsumer, Level, ActiveLow, Shared, 
>> >>> ,, )
>> >>>  {
>> >>>  0x0007,
>> >>>}
>> >>> Any idea why ?
>> >>
>> >> Information coming from AML is required to be handed down by Dom0 to Xen.
>> >> May want checking that (a) Dom0 properly does so and (b) Xen doesn't screw
>> >> up in consuming that data. See PHYSDEVOP_setup_gsi. I wonder if this is
>> >> specific to it being IRQ7 which GPIO uses, as at the (master) PIC IRQ7 is
>> >> also the spurious vector. You may want to retry with the tip of the 4.17
>> >> branch (soon to become 4.17.3) - while it doesn't look very likely to me
>> >> that recent backports there were related, it may still be that they make
>> >> a difference.
>> >>
>> >
>> > testing with 4.17.3:
>> >
>> > Adding some printk in PHYSDEVOP_setup_gsi, I  see (in xl dmesg)  that
>> > (XEN) PHYSDEVOP_setup_gsi setup_gsi : gsi: 7 triggering: 1 polarity: 1
>> >
>> > but later on in dmesg I see :
>> > [1.747958] xen: registering gsi 7 triggering 0 polarity 1
>> >
>> > So triggering is flipped from 1 to 0 (cannot find the definition for
>> > those values).
>> > Could this be the culprit ?
>>
>> Possibly. Since it would be the kernel to invoke PHYSDEVOP_setup_gsi, it
>> looks as if the kernel was confused about which trigger mode to use. Have
>> you figured from where the kernel takes the two different values?
>>
side note : dom0 is PV.

xen | Successful pipeline for RELEASE-4.17.3 | 949a4aad

2023-12-18 Thread GitLab



Pipeline #574461 has passed!

Project: xen ( https://gitlab.com/xen-project/xen )
Branch: RELEASE-4.17.3 ( 
https://gitlab.com/xen-project/xen/-/commits/RELEASE-4.17.3 )

Commit: 949a4aad ( 
https://gitlab.com/xen-project/xen/-/commit/949a4aad417621638231e4cf9f1b35e8e0328463
 )
Commit Message: update Xen version to 4.17.3

Commit Author: Jan Beulich ( https://gitlab.com/jbeulich )



Pipeline #574461 ( 
https://gitlab.com/xen-project/xen/-/pipelines/574461 ) triggered by Ganis 
( https://gitlab.com/ganis )
successfully completed 84 jobs in 2 stages.

-- 
You're receiving this email because of your account on gitlab.com.

[xen-unstable-smoke test] 184168: tolerable all pass - PUSHED

flight 184168 xen-unstable-smoke real [real]
http://logs.test-lab.xenproject.org/osstest/logs/184168/

Failures :-/ but no regressions.

Tests which did not succeed, but are not blocking:
 test-amd64-amd64-libvirt 15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  16 saverestore-support-checkfail   never pass

version targeted for testing:
 xen  0cc74376d6823e0883f89556be2a267f2240a558
baseline version:
 xen  a60067f39882a4f40547fc0cd8177bac89cb01b7

Last test of basis   184147  2023-12-15 22:03:52 Z2 days
Testing same since   184168  2023-12-18 15:03:58 Z0 days1 attempts


People who touched revisions under test:
  Jan Beulich 
  Maria Celeste Cesario  
  Maria Celeste Cesario 
  Nicola Vetrini 
  Oleksii Kurochko 
  Simone Ballarin  

jobs:
 build-arm64-xsm  pass
 build-amd64  pass
 build-armhf  pass
 build-amd64-libvirt  pass
 test-armhf-armhf-xl  pass
 test-arm64-arm64-xl-xsm  pass
 test-amd64-amd64-xl-qemuu-debianhvm-amd64pass
 test-amd64-amd64-libvirt pass



sg-report-flight on osstest.test-lab.xenproject.org
logs: /home/logs/logs
images: /home/logs/images

Logs, config files, etc. are available at
http://logs.test-lab.xenproject.org/osstest/logs

Explanation of these reports, and of osstest in general, is at
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README.email;hb=master
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README;hb=master

Test harness code can be found at
http://xenbits.xen.org/gitweb?p=osstest.git;a=summary


Pushing revision :

To xenbits.xen.org:/home/xen/git/xen.git
   a60067f398..0cc74376d6  0cc74376d6823e0883f89556be2a267f2240a558 -> smoke

Re: [PATCH v2 08/15] VMX: convert vmx_basic_msr

On 27/11/2023 12:44 pm, Jan Beulich wrote:
> On 24.11.2023 23:41, Andrew Cooper wrote:
>> On 24/11/2023 8:41 am, Jan Beulich wrote:
>>> ... to a struct field, which is then going to be accompanied by other
>>> capability/control data presently living in individual variables. As
>>> this structure isn't supposed to be altered post-boot, put it in
>>> .data.ro_after_init right away.
>>>
>>> Suggested-by: Roger Pau Monné 
>>> Signed-off-by: Jan Beulich 
>> For (usable) nested virt, we're going to need the VMX MSRs, in their
>> architectural form, in struct cpu_policy.  And just like CPUID features,
>> I want it to end up with nice bitfields to use.
>>
>> Looking through the rest of this series, vmx_caps ends up almost in
>> architectural form.
>>
>> Could I talk you into having a "struct vmx_msrs" (or similar - 'caps'
>> doesn't feel quite right here) in the policy object, and also
>> instantiating one instance of it for this purpose here?
> I was actually wondering while doing the conversion. The main reason I
> didn't go that route right away was that I wasn't really certain whether
> what I'd put there would the really be the (largely) final shape it
> wants to take there. (One thing you've likely noticed I didn't convert
> is _vmx_misc_cap, which right now only exists as a local variable in
> vmx_init_vmcs_config().)
>
>> AFAICT, it would only be a minor deviation to the latter half of this
>> series, but it would be an excellent start to fixing nested virt - and
>> getting this data in the policy really is the first task in getting the
>> ball rolling on nested virt.
> How much of a further change it would end up being (or where that change
> would occur) depends on another aspect: When put in cpu-policy.h (and I
> take it you mean the lib/ instance, not the asm/ one), it would seem
> natural and perhaps even necessary to properly introduce bitfields for
> each of the MSRs right away. That'll lead to a "raw" field as well. In
> VMX code (mostly its cpu_has_* #define-s), I'd then either need to use
> .raw (perhaps a little ugly here and there) or go with using the
> individual bitfields right away (likely eliminating the need for many of
> the constant #define-s), which increases the risk of inadvertent mistakes
> (and their overlooking during review).
>
>> I don't mind about serialising/de-serialsing it - that still has a bit
>> of userspace complexity to work out, and depends on some of the cleanup
>> still needing a repost.
>>
>> If you don't want to take the added space in cpu_policy yet, how about
>> having the declaration there and just forgo instantiating the subobject
>> in the short term?
> There's quite a bit of effectively dead space in the struct already; I
> think I wouldn't mind instantiating the struct there right away. So long
> as you're convinced it's going to be used there in not too distant a
> future.
>
> But: If I go as far, why would I introduce a global instance of the new
> struct? Wouldn't it then make more sense to use host_cpu_policy right
> away? I probably would keep populating it in vmx_init_vmcs_config() to
> limit churn for now, but consumers of the flags could then right away
> use the host policy.

George has stated an intent to pick nested virt up imminently.  I'll
have to defer to him on when this will actually start.

But, sorting out this data in the policies is the next step, whenever
that occurs.


If you fancy going all the way to use the raw/host policy then great,
but I expect that would be a large amount of extra work, hence the
suggestion to just use the "inner" struct in the short term.

Conversion to bitfields would want to be separate patch anyway, at which
point an A/B compile can confirm whether there was no resulting change.

I'm happy if you want to do all of this, but it's a lot of work, and
simply having the data in plain architectural uint64_t in the host
policy is something that I thought would be a very minor change to your
current series, but with a useful step towards nested virt.

One open question, before we get too far into this, is still whether to
express half of these as MSR-features like ARCH_CAPS.  Linux does, and
there is a very complex set of dependencies between certain properties,
although I have a sneaking suspicion that the dependency logic will
needed at runtime as the L1 hypervisor changes the various controls.

~Andrew

Re: [XEN PATCH 3/5] xen/acpi: Use NULL as a null pointer constant

2023-12-18 Thread Nicola Vetrini


On 2023-12-18 18:05, Jan Beulich wrote:

On 14.12.2023 12:44, Nicola Vetrini wrote:

--- a/xen/include/acpi/acmacros.h
+++ b/xen/include/acpi/acmacros.h
@@ -111,7 +111,7 @@

 #define ACPI_TO_POINTER(i)  ACPI_ADD_PTR (void,(void *) 
NULL,(acpi_native_uint) i)
 #define ACPI_TO_INTEGER(p)  ACPI_PTR_DIFF (p,(void *) 
NULL)
-#define ACPI_OFFSET(d,f)(acpi_size) ACPI_PTR_DIFF 
(&(((d *)0)->f),(void *) NULL)
+#define ACPI_OFFSET(d,f)(acpi_size) ACPI_PTR_DIFF 
(&(((d *)NULL)->f),(void *) NULL)

 #define ACPI_PHYSADDR_TO_PTR(i) ACPI_TO_POINTER(i)
 #define ACPI_PTR_TO_PHYSADDR(i) ACPI_TO_INTEGER(i)



This again is an ACPI CA header, which I'm hesitant to see being 
changed.


Jan


Yes, I sent this before the discussion on R11.8. I'm ok with adding all 
these files derived from ACPI CA to exclude-list.json, so in that case 
the patch can be ignored.

--
Nicola Vetrini, BSc
Software Engineer, BUGSENG srl (https://bugseng.com)

Re: [PATCH v4 2/4] x86/spec-ctrl: defer context-switch IBPB until guest entry

On Mon, Dec 18, 2023 at 02:58:01PM +0100, Jan Beulich wrote:
> On 18.12.2023 13:39, Roger Pau Monné wrote:
> > On Tue, Feb 14, 2023 at 05:11:05PM +0100, Jan Beulich wrote:
> >> In order to avoid clobbering Xen's own predictions, defer the barrier as
> >> much as possible. Merely mark the CPU as needing a barrier issued the
> >> next time we're exiting to guest context.
> > 
> > While I understand that doing the flush in the middle of the guest
> > context might not be ideal, as it's my understanding we also
> 
> s/guest context/context switch/?

Indeed, sorry.

> > needlessly flush Xen predictions, I'm unsure whether this makes any
> > difference in practice, and IMO just makes the exit to guest paths
> > more complex.
> 
> I need to redirect this question to Andrew, who suggested that doing so
> can be expected to make a difference. When we were discussing this, I
> could easily see it might make a difference, but I cannot provide hard
> proof.

That's fine, but with the added complexity in the return to guests
paths I think we need some kind of justification for such a change.
If it was the other way around I could easily see it as a benefits if
just for code clarity reasons.

Thanks, Roger.

Re: [PATCH v4 4/4] x86/PV: issue branch prediction barrier when switching 64-bit guest to kernel mode

On Tue, Feb 14, 2023 at 05:12:08PM +0100, Jan Beulich wrote:
> Since both kernel and user mode run in ring 3, they run in the same
> "predictor mode".

That only true when IBRS is enabled, otherwise all CPU modes share the
same predictor mode?

> While the kernel could take care of this itself, doing
> so would be yet another item distinguishing PV from native. Additionally
> we're in a much better position to issue the barrier command, and we can
> save a #GP (for privileged instruction emulation) this way.
> 
> To allow to recover performance, introduce a new VM assist allowing the
> guest kernel to suppress this barrier. Make availability of the assist
> dependent upon the command line control, such that kernels have a way to
> know whether their request actually took any effect.
> 
> Note that because of its use in PV64_VM_ASSIST_MASK, the declaration of
> opt_ibpb_mode_switch can't live in asm/spec_ctrl.h.
> 
> Signed-off-by: Jan Beulich 
> ---
> Is the placement of the clearing of opt_ibpb_ctxt_switch correct in
> parse_spec_ctrl()? Shouldn't it live ahead of the "disable_common"
> label, as being about guest protection, not Xen's?
> 
> Adding setting of the variable to the "pv" sub-case in parse_spec_ctrl()
> didn't seem quite right to me, considering that we default it to the
> opposite of opt_ibpb_entry_pv.
> ---
> v4: Correct the print_details() change. Re-base in particular over
> changes earlier in the series.
> v3: Leverage exit-IBPB. Introduce separate command line control.
> v2: Leverage entry-IBPB. Add VM assist. Re-base.
> 
> --- a/docs/misc/xen-command-line.pandoc
> +++ b/docs/misc/xen-command-line.pandoc
> @@ -2320,8 +2320,8 @@ By default SSBD will be mitigated at run
>  ### spec-ctrl (x86)
>  > `= List of [ , xen=, {pv,hvm}=,
>  >  {msr-sc,rsb,md-clear,ibpb-entry}=|{pv,hvm}=,
> ->  bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,psfd,
> ->  eager-fpu,l1d-flush,branch-harden,srb-lock,
> +>  bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ibpb-mode-switch,
> +>  ssbd,psfd,eager-fpu,l1d-flush,branch-harden,srb-lock,
>  >  unpriv-mmio}= ]`
>  
>  Controls for speculative execution sidechannel mitigations.  By default, Xen
> @@ -2403,7 +2403,10 @@ default.
>  
>  On hardware supporting IBPB (Indirect Branch Prediction Barrier), the `ibpb=`
>  option can be used to force (the default) or prevent Xen from issuing branch
> -prediction barriers on vcpu context switches.
> +prediction barriers on vcpu context switches.  On such hardware the
> +`ibpb-mode-switch` option can be used to control whether, by default, Xen
> +would issue branch prediction barriers when 64-bit PV guests switch from
> +user to kernel mode.  If enabled, guest kernels can op out of this behavior.
>  
>  On all hardware, the `eager-fpu=` option can be used to force or prevent Xen
>  from using fully eager FPU context switches.  This is currently implemented 
> as
> --- a/xen/arch/x86/include/asm/domain.h
> +++ b/xen/arch/x86/include/asm/domain.h
> @@ -742,6 +742,8 @@ static inline void pv_inject_sw_interrup
>  pv_inject_event();
>  }
>  
> +extern int8_t opt_ibpb_mode_switch;
> +
>  #define PV32_VM_ASSIST_MASK ((1UL << VMASST_TYPE_4gb_segments)| \
>   (1UL << VMASST_TYPE_4gb_segments_notify) | \
>   (1UL << VMASST_TYPE_writable_pagetables) | \
> @@ -753,7 +755,9 @@ static inline void pv_inject_sw_interrup
>   * but we can't make such requests fail all of the sudden.
>   */
>  #define PV64_VM_ASSIST_MASK (PV32_VM_ASSIST_MASK  | \
> - (1UL << VMASST_TYPE_m2p_strict))
> + (1UL << VMASST_TYPE_m2p_strict)  | \
> + ((opt_ibpb_mode_switch + 0UL) <<   \
> +  VMASST_TYPE_mode_switch_no_ibpb))

I'm wondering that it's kind of weird to offer the option to PV domUs
if opt_ibpb_entry_pv is set, as then the guest mode switch will always
(implicitly) do a IBPB as requiring an hypercall and thus take an
entry point into Xen.

I guess it's worth having it just as a way to signal to Xen that the
hypervisor does perform an IBPB, even if the guest cannot disable it.

>  #define HVM_VM_ASSIST_MASK  (1UL << VMASST_TYPE_runstate_update_flag)
>  
>  #define arch_vm_assist_valid_mask(d) \
> --- a/xen/arch/x86/pv/domain.c
> +++ b/xen/arch/x86/pv/domain.c
> @@ -455,6 +455,7 @@ static void _toggle_guest_pt(struct vcpu
>  void toggle_guest_mode(struct vcpu *v)
>  {
>  const struct domain *d = v->domain;
> +struct cpu_info *cpu_info = get_cpu_info();
>  unsigned long gs_base;
>  
>  ASSERT(!is_pv_32bit_vcpu(v));
> @@ -467,15 +468,21 @@ void toggle_guest_mode(struct vcpu *v)
>  if ( v->arch.flags & TF_kernel_mode )
>  v->arch.pv.gs_base_kernel = gs_base;
>  else
> +{
>  v->arch.pv.gs_base_user = gs_base;
> +
> +if (

Re: [XEN PATCH 3/5] xen/acpi: Use NULL as a null pointer constant

On 14.12.2023 12:44, Nicola Vetrini wrote:
> --- a/xen/include/acpi/acmacros.h
> +++ b/xen/include/acpi/acmacros.h
> @@ -111,7 +111,7 @@
>  
>  #define ACPI_TO_POINTER(i)  ACPI_ADD_PTR (void,(void *) 
> NULL,(acpi_native_uint) i)
>  #define ACPI_TO_INTEGER(p)  ACPI_PTR_DIFF (p,(void *) NULL)
> -#define ACPI_OFFSET(d,f)(acpi_size) ACPI_PTR_DIFF (&(((d 
> *)0)->f),(void *) NULL)
> +#define ACPI_OFFSET(d,f)(acpi_size) ACPI_PTR_DIFF (&(((d 
> *)NULL)->f),(void *) NULL)
>  #define ACPI_PHYSADDR_TO_PTR(i) ACPI_TO_POINTER(i)
>  #define ACPI_PTR_TO_PHYSADDR(i) ACPI_TO_INTEGER(i)
>  

This again is an ACPI CA header, which I'm hesitant to see being changed.

Jan

Re: [BUG]i2c_hid_acpi broken with 4.17.2 on Framework Laptop 13 AMD

2023-12-18 Thread Sébastien Chaumat

Le lun. 18 déc. 2023, 17:44, Jan Beulich  a écrit :

> On 18.12.2023 17:21, Sébastien Chaumat wrote:
> > On 05.12.2023 21:31, Sébastien Chaumat wrote:
> >>> This issue seems that IRQ 7 (the GPIO controller) is natively fasteoi
> >>> (so level type) while in xen it  is mapped to oapic-edge  instead of
> >>> oapic-level
> >>> as the SSDT indicates :
> >>>
> >>>  Device (GPIO)
> >>>
> >>>  {
> >>>  Name (_HID, "AMDI0030")  // _HID: Hardware ID
> >>>  Name (_CID, "AMDI0030")  // _CID: Compatible ID
> >>>  Name (_UID, Zero)  // _UID: Unique ID
> >>>  Method (_CRS, 0, NotSerialized)  // _CRS: Current Resource
> Settings
> >>>  {
> >>>  Name (RBUF, ResourceTemplate ()
> >>>  {
> >>>  Interrupt (ResourceConsumer, Level, ActiveLow,
> Shared, ,, )
> >>>  {
> >>>  0x0007,
> >>>}
> >>> Any idea why ?
> >>
> >> Information coming from AML is required to be handed down by Dom0 to
> Xen.
> >> May want checking that (a) Dom0 properly does so and (b) Xen doesn't
> screw
> >> up in consuming that data. See PHYSDEVOP_setup_gsi. I wonder if this is
> >> specific to it being IRQ7 which GPIO uses, as at the (master) PIC IRQ7
> is
> >> also the spurious vector. You may want to retry with the tip of the 4.17
> >> branch (soon to become 4.17.3) - while it doesn't look very likely to me
> >> that recent backports there were related, it may still be that they make
> >> a difference.
> >>
> >
> > testing with 4.17.3:
> >
> > Adding some printk in PHYSDEVOP_setup_gsi, I  see (in xl dmesg)  that
> > (XEN) PHYSDEVOP_setup_gsi setup_gsi : gsi: 7 triggering: 1 polarity: 1
> >
> > but later on in dmesg I see :
> > [1.747958] xen: registering gsi 7 triggering 0 polarity 1
> >
> > So triggering is flipped from 1 to 0 (cannot find the definition for
> > those values).
> > Could this be the culprit ?
>
> Possibly. Since it would be the kernel to invoke PHYSDEVOP_setup_gsi, it
> looks as if the kernel was confused about which trigger mode to use. Have
> you figured from where the kernel takes the two different values?
>

> Would you mind pointing me to the definition for those values first ? I
did not find what 0/1 means in this context.

Thanks

Re: [XEN PATCH 2/5] x86/ioapic: use NULL as a null pointer constant

On 14.12.2023 22:30, Stefano Stabellini wrote:
> On Thu, 14 Dec 2023, Nicola Vetrini wrote:
>> Resolves violations of MISRA C Rule 11.9.
>> No functional change.
>>
>> Signed-off-by: Nicola Vetrini 
> 
> Reviewed-by: Stefano Stabellini 

Acked-by: Jan Beulich

Re: [XEN PATCH 1/5] xen/hvm: use NULL as a null pointer constant

On 14.12.2023 22:29, Stefano Stabellini wrote:
> On Thu, 14 Dec 2023, Nicola Vetrini wrote:
>> No functional change.
>>
>> Signed-off-by: Nicola Vetrini 
> 
> Reviewed-by: Stefano Stabellini 

Acked-by: Jan Beulich 

Yet it would have been really nice if style had been tidied of the lines
which are touched anyway.

Jan

Re: [PATCH v2 37/39] xen/rirscv: add minimal amount of stubs to build full Xen

On 24.11.2023 11:30, Oleksii Kurochko wrote:
> --- a/xen/arch/riscv/mm.c
> +++ b/xen/arch/riscv/mm.c
> @@ -1,19 +1,23 @@
>  /* SPDX-License-Identifier: GPL-2.0-only */
>  
> +#include 
>  #include 
>  #include 
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  
>  #include 
>  #include 
>  #include 
> -#include 
>  #include 
>  #include 
>  
> +unsigned long frametable_base_pdx __read_mostly;
> +unsigned long frametable_virt_end __read_mostly;

Nit (style):

unsigned long __read_mostly frametable_base_pdx;
unsigned long __read_mostly frametable_virt_end;

(i.e. attributes generally between type and identifier). Plus
__read_mostly or __ro_after_init?

> @@ -294,3 +298,49 @@ unsigned long __init calc_phys_offset(void)
>  phys_offset = load_start - XEN_VIRT_START;
>  return phys_offset;
>  }
> +
> +void put_page(struct page_info *page)
> +{
> +assert_failed(__func__);
> +}
> +
> +unsigned long get_upper_mfn_bound(void)
> +{
> +/* No memory hotplug yet, so current memory limit is the final one. */
> +return max_page - 1;
> +}
> +
> +void arch_dump_shared_mem_info(void)
> +{
> +WARN();
> +}
> +
> +int populate_pt_range(unsigned long virt, unsigned long nr_mfns)
> +{
> +assert_failed(__func__);
> +return -1;
> +}

Whats the pattern between picking WARN(), assert_failed() (which I don't
think you should be using anyway; if an assertion, then ASSERT_UNREACHABLE())
and BUG() (as used earlier in stubs living in header files)?

> --- /dev/null
> +++ b/xen/arch/riscv/stubs.c
> @@ -0,0 +1,426 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 

I think I can see why you need the former of these last two, but do you
really need the latter?

> +#include 
> +
> +/* smpboot.c */
> +
> +cpumask_t cpu_online_map;
> +cpumask_t cpu_present_map;
> +cpumask_t cpu_possible_map;
> +
> +/* ID of the PCPU we're running on */
> +DEFINE_PER_CPU(unsigned int, cpu_id);
> +/* XXX these seem awfully x86ish... */
> +/* representing HT siblings of each logical CPU */
> +DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_mask);
> +/* representing HT and core siblings of each logical CPU */
> +DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_mask);
> +
> +nodemask_t __read_mostly node_online_map = { { [0] = 1UL } };
> +
> +/* time.c */
> +
> +unsigned long __read_mostly cpu_khz;  /* CPU clock frequency in kHz. */
> +
> +s_time_t get_s_time(void)
> +{
> +BUG();
> +}
> +
> +int reprogram_timer(s_time_t timeout)
> +{
> +BUG();
> +}
> +
> +void send_timer_event(struct vcpu *v)
> +{
> +BUG();
> +}
> +
> +void domain_set_time_offset(struct domain *d, int64_t time_offset_seconds)
> +{
> +BUG();
> +}
> +
> +/* shutdown.c */
> +
> +void machine_restart(unsigned int delay_millisecs)
> +{
> +BUG();
> +}
> +
> +void machine_halt(void)
> +{
> +BUG();
> +}
> +
> +/* vm_event.c */
> +
> +void vm_event_fill_regs(vm_event_request_t *req)
> +{
> +BUG();
> +}
> +
> +void vm_event_set_registers(struct vcpu *v, vm_event_response_t *rsp)
> +{
> +BUG();
> +}
> +
> +void vm_event_monitor_next_interrupt(struct vcpu *v)
> +{
> +/* Not supported on RISCV. */
> +}
> +
> +void vm_event_reset_vmtrace(struct vcpu *v)
> +{
> +/* Not supported on RISCV. */
> +}
> +
> +/* domctl.c */
> +
> +long arch_do_domctl(struct xen_domctl *domctl, struct domain *d,
> +XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
> +{
> +BUG();
> +}
> +
> +void arch_get_domain_info(const struct domain *d,
> +  struct xen_domctl_getdomaininfo *info)
> +{
> +BUG();
> +}
> +
> +void arch_get_info_guest(struct vcpu *v, vcpu_guest_context_u c)
> +{
> +BUG();
> +}
> +
> +/* monitor.c */
> +
> +int arch_monitor_domctl_event(struct domain *d,
> +  struct xen_domctl_monitor_op *mop)
> +{
> +BUG();
> +}
> +
> +/* smp.c */
> +
> +void arch_flush_tlb_mask(const cpumask_t *mask)
> +{
> +BUG();
> +}
> +
> +void smp_send_event_check_mask(const cpumask_t *mask)
> +{
> +BUG();
> +}
> +
> +void smp_send_call_function_mask(const cpumask_t *mask)
> +{
> +BUG();
> +}
> +
> +/* irq.c */
> +
> +struct pirq *alloc_pirq_struct(struct domain *d)
> +{
> +BUG();
> +}
> +
> +int pirq_guest_bind(struct vcpu *v, struct pirq *pirq, int will_share)
> +{
> +BUG();
> +}
> +
> +void pirq_guest_unbind(struct domain *d, struct pirq *pirq)
> +{
> +BUG();
> +}
> +
> +void pirq_set_affinity(struct domain *d, int pirq, const cpumask_t *mask)
> +{
> +BUG();
> +}
> +
> +static void ack_none(struct irq_desc *irq)
> +{
> +BUG();
> +}
> +
> +static void end_none(struct irq_desc *irq)
> +{
> +BUG();
> +}

Much like I said for PPC - I don't think you need the two, as ...

> +hw_irq_controller no_irq_type = {
> +.typename = "none",
> +.startup = irq_startup_none,
> +.shutdown = irq_shutdown_none,
> +.enable = irq_enable_none,
> +

Re: [BUG]i2c_hid_acpi broken with 4.17.2 on Framework Laptop 13 AMD

On 18.12.2023 17:21, Sébastien Chaumat wrote:
> On 05.12.2023 21:31, Sébastien Chaumat wrote:
>>> This issue seems that IRQ 7 (the GPIO controller) is natively fasteoi
>>> (so level type) while in xen it  is mapped to oapic-edge  instead of
>>> oapic-level
>>> as the SSDT indicates :
>>>
>>>  Device (GPIO)
>>>
>>>  {
>>>  Name (_HID, "AMDI0030")  // _HID: Hardware ID
>>>  Name (_CID, "AMDI0030")  // _CID: Compatible ID
>>>  Name (_UID, Zero)  // _UID: Unique ID
>>>  Method (_CRS, 0, NotSerialized)  // _CRS: Current Resource Settings
>>>  {
>>>  Name (RBUF, ResourceTemplate ()
>>>  {
>>>  Interrupt (ResourceConsumer, Level, ActiveLow, Shared, ,, )
>>>  {
>>>  0x0007,
>>>}
>>> Any idea why ?
>>
>> Information coming from AML is required to be handed down by Dom0 to Xen.
>> May want checking that (a) Dom0 properly does so and (b) Xen doesn't screw
>> up in consuming that data. See PHYSDEVOP_setup_gsi. I wonder if this is
>> specific to it being IRQ7 which GPIO uses, as at the (master) PIC IRQ7 is
>> also the spurious vector. You may want to retry with the tip of the 4.17
>> branch (soon to become 4.17.3) - while it doesn't look very likely to me
>> that recent backports there were related, it may still be that they make
>> a difference.
>>
> 
> testing with 4.17.3:
> 
> Adding some printk in PHYSDEVOP_setup_gsi, I  see (in xl dmesg)  that
> (XEN) PHYSDEVOP_setup_gsi setup_gsi : gsi: 7 triggering: 1 polarity: 1
> 
> but later on in dmesg I see :
> [1.747958] xen: registering gsi 7 triggering 0 polarity 1
> 
> So triggering is flipped from 1 to 0 (cannot find the definition for
> those values).
> Could this be the culprit ?

Possibly. Since it would be the kernel to invoke PHYSDEVOP_setup_gsi, it
looks as if the kernel was confused about which trigger mode to use. Have
you figured from where the kernel takes the two different values?

Jan

xen | Successful pipeline for staging | 0cc74376

2023-12-18 Thread GitLab



Pipeline #343260 has passed!

Project: xen ( https://gitlab.com/xen-project/xen )
Branch: staging ( https://gitlab.com/xen-project/xen/-/commits/staging )

Commit: 0cc74376 ( 
https://gitlab.com/xen-project/xen/-/commit/0cc74376d6823e0883f89556be2a267f2240a558
 )
Commit Message: x86/hvm: address a violation of MISRA C:2012 Ru...
Commit Author: Maria Celeste Cesario
Committed by: Jan Beulich ( https://gitlab.com/jbeulich )



Pipeline #343260 ( 
https://gitlab.com/xen-project/xen/-/pipelines/343260 ) triggered by Ganis 
( https://gitlab.com/ganis )
successfully completed 129 jobs in 3 stages.

-- 
You're receiving this email because of your account on gitlab.com.

Re: [BUG]i2c_hid_acpi broken with 4.17.2 on Framework Laptop 13 AMD

2023-12-18 Thread Sébastien Chaumat

> >>> On 05.12.2023 21:31, Sébastien Chaumat wrote:
> > [2.464598] amd_gpio AMDI0030:00: failed to enable wake-up interrupt
> 
>  Is it expected that IRQ7 goes from fasteoi (kernel 6.6.4 ) to
>  ioapic-edge and IRQ9 to ioapic-level ?
> 
>  IR-IO-APIC7-fasteoi   pinctrl_amd
>  IR-IO-APIC9-fasteoi   acpi
> 
>  to (xen 4.18.0)
> 
>  xen-pirq -ioapic-edge  pinctrl_amd
>  xen-pirq -ioapic-level  acpi
> 
>  ?
> >

> > This look similar to
> > https://yhbt.net/lore/all/20201006044941.fdjsp346kc5thyzy@Rk/t/
> >
> > This issue seems that IRQ 7 (the GPIO controller) is natively fasteoi
> > (so level type) while in xen it  is mapped to oapic-edge  instead of
> > oapic-level
> > as the SSDT indicates :
> >
> >  Device (GPIO)
> >
> >  {
> >  Name (_HID, "AMDI0030")  // _HID: Hardware ID
> >  Name (_CID, "AMDI0030")  // _CID: Compatible ID
> >  Name (_UID, Zero)  // _UID: Unique ID
> >  Method (_CRS, 0, NotSerialized)  // _CRS: Current Resource Settings
> >  {
> >  Name (RBUF, ResourceTemplate ()
> >  {
> >  Interrupt (ResourceConsumer, Level, ActiveLow, Shared, ,, )
> >  {
> >  0x0007,
> >}
> > Any idea why ?
>
> Information coming from AML is required to be handed down by Dom0 to Xen.
> May want checking that (a) Dom0 properly does so and (b) Xen doesn't screw
> up in consuming that data. See PHYSDEVOP_setup_gsi. I wonder if this is
> specific to it being IRQ7 which GPIO uses, as at the (master) PIC IRQ7 is
> also the spurious vector. You may want to retry with the tip of the 4.17
> branch (soon to become 4.17.3) - while it doesn't look very likely to me
> that recent backports there were related, it may still be that they make
> a difference.
>

testing with 4.17.3:

Adding some printk in PHYSDEVOP_setup_gsi, I  see (in xl dmesg)  that
(XEN) PHYSDEVOP_setup_gsi setup_gsi : gsi: 7 triggering: 1 polarity: 1

but later on in dmesg I see :
[1.747958] xen: registering gsi 7 triggering 0 polarity 1

So triggering is flipped from 1 to 0 (cannot find the definition for
those values).
Could this be the culprit ?

Re: [RFC PATCH v1 1/1] xen/Makefile: introduce ARCH_FIXED_CONFIG for randconfig

On 18.12.2023 17:07, Andrew Cooper wrote:
> On 11/12/2023 3:56 pm, Jan Beulich wrote:
>> On 07.12.2023 21:17, Andrew Cooper wrote:
>>> On 07/12/2023 5:03 pm, Oleksii Kurochko wrote:
 ARCH_FIXED_CONFIG is required in the case of randconfig
 and CI for configs that aren't ready or are not
 supposed to be implemented for specific architecture.
 These configs should always be disabled to prevent randconfig
 related tests from failing.

 Signed-off-by: Oleksii Kurochko 
 ---
  xen/Makefile | 5 -
  1 file changed, 4 insertions(+), 1 deletion(-)

 diff --git a/xen/Makefile b/xen/Makefile
 index ca571103c8..8ae8fe1480 100644
 --- a/xen/Makefile
 +++ b/xen/Makefile
 @@ -336,11 +336,14 @@ ifeq ($(config-build),y)
  # *config targets only - make sure prerequisites are updated, and descend
  # in tools/kconfig to make the *config target
  
 +ARCH_FORCED_CONFIG := 
 $(srctree)/arch/$(SRCARCH)/configs/randomforced.config
 +
  # Create a file for KCONFIG_ALLCONFIG which depends on the environment.
  # This will be use by kconfig targets 
 allyesconfig/allmodconfig/allnoconfig/randconfig
  filechk_kconfig_allconfig = \
  $(if $(findstring n,$(XEN_HAS_CHECKPOLICY)), echo 
 'CONFIG_XSM_FLASK_POLICY=n';) \
 -$(if $(KCONFIG_ALLCONFIG), cat $(KCONFIG_ALLCONFIG);) \
 +$(if $(KCONFIG_ALLCONFIG), cat $(KCONFIG_ALLCONFIG); \
 +$(if $(wildcard $(ARCH_FORCED_CONFIG)), cat $(ARCH_FORCED_CONFIG);) ) 
 \
  :
  
  .allconfig.tmp: FORCE
>>> We already have infrastructure for this.  What's wrong with
>>> EXTRA_FIXED_RANDCONFIG?
>> What I don't understand here is why dealing with the issue would want
>> limiting to gitlab-CI. Anyone could run randconfig on their own, and
>> imo it would be helpful if the same issue(s) could be prevented there,
>> too. Hence my earlier suggestion to have a snippet which can be used
>> by "interested" parties. And once dealt with in e.g. the makefile
>> there should not be a need for any overrides in the CI config anymore.
> 
> This is trying to find a solution to a problem which doesn't exist.
> 
> RISC-V and PPC are experimental in Xen.  Noone else is going to come and
> randconfig them until they're rather more production ready, and a
> prerequisite of that is removing this list of exclusions.
> 
> Until you can actually find an interested party to comment, I think this
> is just churn for no useful improvement.  If nothing else, calling it
> randomforced.config isn't appropriate given the explanation of what this
> target is used for...

"random" in the name can't possibly be right anyway. Such collection of
fixed settings would also be relevant to e.g. all{yes,no}config. Yet
that's still not the same as any kind of "default" config, which the
two architectures presently kind of abuse for the purpose of defining
required-fixed settings.

Jan

Re: [PATCH v4 3/4] x86: limit issuing of IBPB during context switch

On 18.12.2023 16:19, Roger Pau Monné wrote:
> On Tue, Feb 14, 2023 at 05:11:40PM +0100, Jan Beulich wrote:
>> --- a/xen/arch/x86/domain.c
>> +++ b/xen/arch/x86/domain.c
>> @@ -2005,17 +2005,26 @@ void context_switch(struct vcpu *prev, s
>>  }
>>  else
>>  {
>> +unsigned int feat_sc_rsb = X86_FEATURE_SC_RSB_HVM;
>> +
>>  __context_switch();
>>  
>>  /* Re-enable interrupts before restoring state which may fault. */
>>  local_irq_enable();
>>  
>>  if ( is_pv_domain(nextd) )
>> +{
>>  load_segments(next);
>>  
>> +feat_sc_rsb = X86_FEATURE_SC_RSB_PV;
>> +}
>> +
>>  ctxt_switch_levelling(next);
>>  
>> -if ( opt_ibpb_ctxt_switch && !is_idle_domain(nextd) )
>> +if ( opt_ibpb_ctxt_switch && !is_idle_domain(nextd) &&
>> + (!(prevd->arch.spec_ctrl_flags & SCF_entry_ibpb) ||
>> +  /* is_idle_domain(prevd) || */
> 
> I would rather add a comment to note that the idle domain always has
> SCF_entry_ibpb clear, rather than leaving this commented check in the
> condition.
> 
>> +  !boot_cpu_has(feat_sc_rsb)) )

Oh, for completeness: For v5 I have this

@@ -2092,17 +2092,26 @@ void context_switch(struct vcpu *prev, s
 }
 else
 {
+unsigned int feat_sc_rsb = X86_FEATURE_SC_RSB_HVM;
+
 __context_switch();
 
 /* Re-enable interrupts before restoring state which may fault. */
 local_irq_enable();
 
 if ( is_pv_domain(nextd) )
+{
 load_segments(next);
 
+feat_sc_rsb = X86_FEATURE_SC_RSB_PV;
+}
+
 ctxt_switch_levelling(next);
 
-if ( opt_ibpb_ctxt_switch && !is_idle_domain(nextd) )
+if ( opt_ibpb_ctxt_switch && !is_idle_domain(nextd) &&
+ (!(prevd->arch.spec_ctrl_flags & SCF_entry_ibpb) ||
+  /* is_idle_domain(prevd) || */
+  (!cpu_has_auto_ibrs && !boot_cpu_has(feat_sc_rsb))) )
 {
 static DEFINE_PER_CPU(unsigned int, last);
 unsigned int *last_id = _cpu(last);

i.e. with the cpu_has_auto_ibrs check added.

Jan

Re: [PATCH v4 3/4] x86: limit issuing of IBPB during context switch

On 18.12.2023 16:19, Roger Pau Monné wrote:
> On Tue, Feb 14, 2023 at 05:11:40PM +0100, Jan Beulich wrote:
>> When the outgoing vCPU had IBPB issued and RSB overwritten upon entering
>> Xen, then there's no need for a 2nd barrier during context switch.
>>
>> Note that SCF_entry_ibpb is always clear for the idle domain, so no
>> explicit idle domain check is needed to augment the feature check
>> (which is simply inapplicable to "idle").
>>
>> Signed-off-by: Jan Beulich 
> 
> Acked-by: Roger Pau Monné 

Thanks. However, aiui the plan still is for Andrew to pick up this series
and integrate it with other work he has in progress (or he is planning to
do).

>> --- a/xen/arch/x86/domain.c
>> +++ b/xen/arch/x86/domain.c
>> @@ -2005,17 +2005,26 @@ void context_switch(struct vcpu *prev, s
>>  }
>>  else
>>  {
>> +unsigned int feat_sc_rsb = X86_FEATURE_SC_RSB_HVM;
>> +
>>  __context_switch();
>>  
>>  /* Re-enable interrupts before restoring state which may fault. */
>>  local_irq_enable();
>>  
>>  if ( is_pv_domain(nextd) )
>> +{
>>  load_segments(next);
>>  
>> +feat_sc_rsb = X86_FEATURE_SC_RSB_PV;
>> +}
>> +
>>  ctxt_switch_levelling(next);
>>  
>> -if ( opt_ibpb_ctxt_switch && !is_idle_domain(nextd) )
>> +if ( opt_ibpb_ctxt_switch && !is_idle_domain(nextd) &&
>> + (!(prevd->arch.spec_ctrl_flags & SCF_entry_ibpb) ||
>> +  /* is_idle_domain(prevd) || */
> 
> I would rather add a comment to note that the idle domain always has
> SCF_entry_ibpb clear, rather than leaving this commented check in the
> condition.

While I think I can see your point, I like it this way to match the
other !is_idle_domain() that's here.

>> +  !boot_cpu_has(feat_sc_rsb)) )
> 
> I do wonder if it would be more fail safe (and easier to expand going
> forward) if we introduce a new cpu_info field to track the CPU state:
> relevant here would be whether RSB has been overwritten and IBPB
> executed.  Such state would be cleared on each return from guest path.

To be honest - I'm not sure whether that would help or make things more
fragile. More state also means more things which can become incorrect /
inconsistent.

Jan

Re: [RFC PATCH v1 1/1] xen/Makefile: introduce ARCH_FIXED_CONFIG for randconfig

On 11/12/2023 3:56 pm, Jan Beulich wrote:
> On 07.12.2023 21:17, Andrew Cooper wrote:
>> On 07/12/2023 5:03 pm, Oleksii Kurochko wrote:
>>> ARCH_FIXED_CONFIG is required in the case of randconfig
>>> and CI for configs that aren't ready or are not
>>> supposed to be implemented for specific architecture.
>>> These configs should always be disabled to prevent randconfig
>>> related tests from failing.
>>>
>>> Signed-off-by: Oleksii Kurochko 
>>> ---
>>>  xen/Makefile | 5 -
>>>  1 file changed, 4 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/xen/Makefile b/xen/Makefile
>>> index ca571103c8..8ae8fe1480 100644
>>> --- a/xen/Makefile
>>> +++ b/xen/Makefile
>>> @@ -336,11 +336,14 @@ ifeq ($(config-build),y)
>>>  # *config targets only - make sure prerequisites are updated, and descend
>>>  # in tools/kconfig to make the *config target
>>>  
>>> +ARCH_FORCED_CONFIG := 
>>> $(srctree)/arch/$(SRCARCH)/configs/randomforced.config
>>> +
>>>  # Create a file for KCONFIG_ALLCONFIG which depends on the environment.
>>>  # This will be use by kconfig targets 
>>> allyesconfig/allmodconfig/allnoconfig/randconfig
>>>  filechk_kconfig_allconfig = \
>>>  $(if $(findstring n,$(XEN_HAS_CHECKPOLICY)), echo 
>>> 'CONFIG_XSM_FLASK_POLICY=n';) \
>>> -$(if $(KCONFIG_ALLCONFIG), cat $(KCONFIG_ALLCONFIG);) \
>>> +$(if $(KCONFIG_ALLCONFIG), cat $(KCONFIG_ALLCONFIG); \
>>> +$(if $(wildcard $(ARCH_FORCED_CONFIG)), cat $(ARCH_FORCED_CONFIG);) ) \
>>>  :
>>>  
>>>  .allconfig.tmp: FORCE
>> We already have infrastructure for this.  What's wrong with
>> EXTRA_FIXED_RANDCONFIG?
> What I don't understand here is why dealing with the issue would want
> limiting to gitlab-CI. Anyone could run randconfig on their own, and
> imo it would be helpful if the same issue(s) could be prevented there,
> too. Hence my earlier suggestion to have a snippet which can be used
> by "interested" parties. And once dealt with in e.g. the makefile
> there should not be a need for any overrides in the CI config anymore.

This is trying to find a solution to a problem which doesn't exist.

RISC-V and PPC are experimental in Xen.  Noone else is going to come and
randconfig them until they're rather more production ready, and a
prerequisite of that is removing this list of exclusions.

Until you can actually find an interested party to comment, I think this
is just churn for no useful improvement.  If nothing else, calling it
randomforced.config isn't appropriate given the explanation of what this
target is used for...

~Andrew

Re: [PATCH v4 1/4] x86/spec-ctrl: add logic to issue IBPB on exit to guest

On 18.12.2023 16:43, Roger Pau Monné wrote:
> On Mon, Dec 18, 2023 at 02:50:27PM +0100, Jan Beulich wrote:
>> On 18.12.2023 14:46, Jan Beulich wrote:
>>> On 18.12.2023 13:11, Roger Pau Monné wrote:
 Hello,

 I'm not as expert as Andrew in all the speculation stuff, but I will
 try to provide some feedback.

 On Tue, Feb 14, 2023 at 05:10:42PM +0100, Jan Beulich wrote:
> In order to be able to defer the context switch IBPB to the last
> possible point, add logic to the exit-to-guest paths to issue the
> barrier there, including the "IBPB doesn't flush the RSB/RAS"
> workaround. Since alternatives, for now at least, can't nest, emit JMP
> to skip past both constructs where both are needed. This may be more
> efficient anyway, as the sequence of NOPs is pretty long.

 Could you elaborate on the reason why deferring the IBPB to the exit
 to guest path is helpful?  So far it just seem to make the logic more
 complex without nay justification (at least in the changelog).
>>>
>>> I've added "(to leave behind as little as possible)" ahead of the 1st
>>> comma - is that sufficient, do you think?
>>
>> Actually, the next patch supplies better context, i.e. is more / also
>> about avoiding to clobber Xen's own predictions.
> 
> Right, that I got from next patch, but given context switch is already
> a quite heavy operation, does avoiding the cleaning of the branch
> predictor make that much of a difference?
> 
> IMO it needs good justification given it's a change that makes the
> logic harder to follow, so if it turns out there's no difference I
> would rather leave the IBPB at context switch.

As per another reply, I guess we want to discuss this with Andrew, so
maybe a good thing to try to remember to put up on the next x86 meeting
we're going to have.

Jan

Re: [PATCH v4 1/4] x86/spec-ctrl: add logic to issue IBPB on exit to guest

On 18.12.2023 16:40, Roger Pau Monné wrote:
> On Mon, Dec 18, 2023 at 02:46:37PM +0100, Jan Beulich wrote:
>> On 18.12.2023 13:11, Roger Pau Monné wrote:
>>> Hello,
>>>
>>> I'm not as expert as Andrew in all the speculation stuff, but I will
>>> try to provide some feedback.
>>>
>>> On Tue, Feb 14, 2023 at 05:10:42PM +0100, Jan Beulich wrote:
 In order to be able to defer the context switch IBPB to the last
 possible point, add logic to the exit-to-guest paths to issue the
 barrier there, including the "IBPB doesn't flush the RSB/RAS"
 workaround. Since alternatives, for now at least, can't nest, emit JMP
 to skip past both constructs where both are needed. This may be more
 efficient anyway, as the sequence of NOPs is pretty long.
>>>
>>> Could you elaborate on the reason why deferring the IBPB to the exit
>>> to guest path is helpful?  So far it just seem to make the logic more
>>> complex without nay justification (at least in the changelog).
>>
>> I've added "(to leave behind as little as possible)" ahead of the 1st
>> comma - is that sufficient, do you think?
> 
> Please bear with me, but I'm still uncertain.
> 
> Even if IBRS is not enabled, and such indirect branch predictions are
> at the same predictor mode, how would that be of any use to a guest?
> My understanding was that the attacker is the one that has to control
> the indirect branch predictor contents in order to attack the
> hypervisor or other guests.

Right; see my later reply.

 ---
 I have to admit that I'm not really certain about the placement of the
 IBPB wrt the MSR_SPEC_CTRL writes. For now I've simply used "opposite of
 entry".
>>>
>>> Maybe it would easier to just add the MSR_PRED_CMD PRED_CMD_IBPB write
>>> to the vmcs MSR load list?
>>>
>>> It's a one-time only AFAICT, as you would only want to do this for
>>> context-switch AFAICT.
>>
>> That would be a back and forth of adding and removing the MSR to/from
>> that list then, which I'm not convinced is helpful. With these special
>> MSRs I would further be uncertain as to their effect when used via one
>> of these lists.
> 
> Hm, we do seem to already use MSR_PRED_CMD with such lists, so I would
> assume they work just fine.

Ah, yes, I forgot about that. Still it would be a back and forth if we
wanted the MSR on the list only after a context switch, but not anymore
for later VM entry. Also iirc these lists are VMX-only?

Jan

> Anyway, was mostly a recommendation towards clarity, because I think
> the return to guest context assembly is getting rather convoluted, and
> it's IMO critical for it to be easy to follow.
> 
> Thanks, Roger.

Re: [PATCH v4 1/4] x86/spec-ctrl: add logic to issue IBPB on exit to guest

On Mon, Dec 18, 2023 at 02:50:27PM +0100, Jan Beulich wrote:
> On 18.12.2023 14:46, Jan Beulich wrote:
> > On 18.12.2023 13:11, Roger Pau Monné wrote:
> >> Hello,
> >>
> >> I'm not as expert as Andrew in all the speculation stuff, but I will
> >> try to provide some feedback.
> >>
> >> On Tue, Feb 14, 2023 at 05:10:42PM +0100, Jan Beulich wrote:
> >>> In order to be able to defer the context switch IBPB to the last
> >>> possible point, add logic to the exit-to-guest paths to issue the
> >>> barrier there, including the "IBPB doesn't flush the RSB/RAS"
> >>> workaround. Since alternatives, for now at least, can't nest, emit JMP
> >>> to skip past both constructs where both are needed. This may be more
> >>> efficient anyway, as the sequence of NOPs is pretty long.
> >>
> >> Could you elaborate on the reason why deferring the IBPB to the exit
> >> to guest path is helpful?  So far it just seem to make the logic more
> >> complex without nay justification (at least in the changelog).
> > 
> > I've added "(to leave behind as little as possible)" ahead of the 1st
> > comma - is that sufficient, do you think?
> 
> Actually, the next patch supplies better context, i.e. is more / also
> about avoiding to clobber Xen's own predictions.

Right, that I got from next patch, but given context switch is already
a quite heavy operation, does avoiding the cleaning of the branch
predictor make that much of a difference?

IMO it needs good justification given it's a change that makes the
logic harder to follow, so if it turns out there's no difference I
would rather leave the IBPB at context switch.

Thanks, Roger.

Re: Possible bug in Xen

On 18/12/2023 3:34 pm, Joe Tretter wrote:
> Hello,
>
> I discussed the below problem with the QubesOS team on Github
> (https://github.com/QubesOS/qubes-issues/issues/4493) and they suggest
> that this seems to be a problem with Xen, and suggested that I post it
> to this e-mail address.
>
> I have problems restoring backups in QubesOS release 4.1.2 on one of
> my machines.
> Other users reported the issue too, but no QubesOS developer seemed to
> be able to reproduce it, therefore nothing happened for a while and
> the assumption has been that it's some sort of hardware problem.
>
> I analyzed the problem down to the "scrypt" tool 
> (https://www.tarsnap.com/scrypt.html) falsely complaining that the
> password would be wrong.
> I proceeded and re-compiled the "scrypt" tool in it's latest version
> and found that the self-tests show intermittent erratic behavior.
>
> This issue only happens with QubesOS/Xen Kernel, if I boot into a
> kernel without Xen, the problem can't be observed.
> The problem can only be observed on one of my machines which is a Dell
> Inspiron 5675 with an AMD Ryzen 7 1700 Eight-Core Processor.
>
> I hope you can help with this issue.

So the issue is specific to your AMD Zen1 system, and only when Xen is
in the mix.

Can you try booting Xen with spec-ctrl=eager-fpu and see if that changes
the behaviour ?

~Andrew

Re: [PATCH v4 1/4] x86/spec-ctrl: add logic to issue IBPB on exit to guest

On Mon, Dec 18, 2023 at 02:46:37PM +0100, Jan Beulich wrote:
> On 18.12.2023 13:11, Roger Pau Monné wrote:
> > Hello,
> > 
> > I'm not as expert as Andrew in all the speculation stuff, but I will
> > try to provide some feedback.
> > 
> > On Tue, Feb 14, 2023 at 05:10:42PM +0100, Jan Beulich wrote:
> >> In order to be able to defer the context switch IBPB to the last
> >> possible point, add logic to the exit-to-guest paths to issue the
> >> barrier there, including the "IBPB doesn't flush the RSB/RAS"
> >> workaround. Since alternatives, for now at least, can't nest, emit JMP
> >> to skip past both constructs where both are needed. This may be more
> >> efficient anyway, as the sequence of NOPs is pretty long.
> > 
> > Could you elaborate on the reason why deferring the IBPB to the exit
> > to guest path is helpful?  So far it just seem to make the logic more
> > complex without nay justification (at least in the changelog).
> 
> I've added "(to leave behind as little as possible)" ahead of the 1st
> comma - is that sufficient, do you think?

Please bear with me, but I'm still uncertain.

Even if IBRS is not enabled, and such indirect branch predictions are
at the same predictor mode, how would that be of any use to a guest?
My understanding was that the attacker is the one that has to control
the indirect branch predictor contents in order to attack the
hypervisor or other guests.

> >> ---
> >> I have to admit that I'm not really certain about the placement of the
> >> IBPB wrt the MSR_SPEC_CTRL writes. For now I've simply used "opposite of
> >> entry".
> > 
> > Maybe it would easier to just add the MSR_PRED_CMD PRED_CMD_IBPB write
> > to the vmcs MSR load list?
> > 
> > It's a one-time only AFAICT, as you would only want to do this for
> > context-switch AFAICT.
> 
> That would be a back and forth of adding and removing the MSR to/from
> that list then, which I'm not convinced is helpful. With these special
> MSRs I would further be uncertain as to their effect when used via one
> of these lists.

Hm, we do seem to already use MSR_PRED_CMD with such lists, so I would
assume they work just fine.

Anyway, was mostly a recommendation towards clarity, because I think
the return to guest context assembly is getting rather convoluted, and
it's IMO critical for it to be easy to follow.

Thanks, Roger.

Re: [PATCH v2 10/14] aio: remove aio_context_acquire()/aio_context_release() API

Am 05.12.2023 um 19:20 hat Stefan Hajnoczi geschrieben:
> Delete these functions because nothing calls these functions anymore.
> 
> I introduced these APIs in commit 98563fc3ec44 ("aio: add
> aio_context_acquire() and aio_context_release()") in 2014. It's with a
> sigh of relief that I delete these APIs almost 10 years later.
> 
> Thanks to Paolo Bonzini's vision for multi-queue QEMU, we got an
> understanding of where the code needed to go in order to remove the
> limitations that the original dataplane and the IOThread/AioContext
> approach that followed it.
> 
> Emanuele Giuseppe Esposito had the splendid determination to convert
> large parts of the codebase so that they no longer needed the AioContext
> lock. This was a painstaking process, both in the actual code changes
> required and the iterations of code review that Emanuele eked out of
> Kevin and me over many months.
> 
> Kevin Wolf tackled multitudes of graph locking conversions to protect
> in-flight I/O from run-time changes to the block graph as well as the
> clang Thread Safety Analysis annotations that allow the compiler to
> check whether the graph lock is being used correctly.
> 
> And me, well, I'm just here to add some pizzazz to the QEMU multi-queue
> block layer :). Thank you to everyone who helped with this effort,
> including Eric Blake, code reviewer extraordinaire, and others who I've
> forgotten to mention.
> 
> Signed-off-by: Stefan Hajnoczi 
> Reviewed-by: Eric Blake 

Reviewed-by: Kevin Wolf

Re: [PATCH v2 08/14] scsi: remove AioContext locking

Am 05.12.2023 um 19:20 hat Stefan Hajnoczi geschrieben:
> The AioContext lock no longer has any effect. Remove it.
> 
> Signed-off-by: Stefan Hajnoczi 
> Reviewed-by: Eric Blake 

Reviewed-by: Kevin Wolf

Re: [PATCH v2 09/14] aio-wait: draw equivalence between AIO_WAIT_WHILE() and AIO_WAIT_WHILE_UNLOCKED()

Am 05.12.2023 um 19:20 hat Stefan Hajnoczi geschrieben:
> Now that the AioContext lock no longer exists, AIO_WAIT_WHILE() and
> AIO_WAIT_WHILE_UNLOCKED() are equivalent.
> 
> A future patch will get rid of AIO_WAIT_WHILE_UNLOCKED().
> 
> Signed-off-by: Stefan Hajnoczi 
> Reviewed-by: Eric Blake 

Reviewed-by: Kevin Wolf

Re: [PATCH v2 07/14] block: remove bdrv_co_lock()

Am 05.12.2023 um 19:20 hat Stefan Hajnoczi geschrieben:
> The bdrv_co_lock() and bdrv_co_unlock() functions are already no-ops.
> Remove them.
> 
> Signed-off-by: Stefan Hajnoczi 

Reviewed-by: Kevin Wolf

Re: [PATCH v2 02/14] scsi: assert that callbacks run in the correct AioContext

Am 05.12.2023 um 19:19 hat Stefan Hajnoczi geschrieben:
> Since the removal of AioContext locking, the correctness of the code
> relies on running requests from a single AioContext at any given time.
> 
> Add assertions that verify that callbacks are invoked in the correct
> AioContext.
> 
> Signed-off-by: Stefan Hajnoczi 

Reviewed-by: Kevin Wolf

Re: [PATCH v2 38/39] xen/riscv: enable full Xen build

On 24.11.2023 11:30, Oleksii Kurochko wrote:
> Signed-off-by: Oleksii Kurochko 

Reviewed-by: Jan Beulich 
with ...

> --- a/xen/arch/riscv/configs/tiny64_defconfig
> +++ b/xen/arch/riscv/configs/tiny64_defconfig
> @@ -24,7 +24,6 @@
>  # CONFIG_COVERAGE is not set
>  # CONFIG_UBSAN is not set
>  # CONFIG_NEEDS_LIBELF is not set
> -
>  CONFIG_RISCV_64=y
>  CONFIG_DEBUG=y
>  CONFIG_DEBUG_INFO=y

... this unrelated (and perhaps even unhelpful) change dropped.

Jan

Re: [PATCH] x86: don't open-code max_page calculation nor pfn_to_paddr()

On 18/12/2023 3:13 pm, Jan Beulich wrote:
> As observed by Roger while reviewing a somewhat related change, there's
> no need here either to open-code the (largely, i.e. once setup_max_pdx()
> was called) fixed relationship between max_pdx and max_page. Further we
> can avoid open-coding pfn_to_paddr() here.
>
> Signed-off-by: Jan Beulich 

Acked-by: Andrew Cooper

Re: [PATCH v4 3/4] x86: limit issuing of IBPB during context switch

On Tue, Feb 14, 2023 at 05:11:40PM +0100, Jan Beulich wrote:
> When the outgoing vCPU had IBPB issued and RSB overwritten upon entering
> Xen, then there's no need for a 2nd barrier during context switch.
> 
> Note that SCF_entry_ibpb is always clear for the idle domain, so no
> explicit idle domain check is needed to augment the feature check
> (which is simply inapplicable to "idle").
> 
> Signed-off-by: Jan Beulich 

Acked-by: Roger Pau Monné 

> ---
> v4: Tighten the condition.
> v3: Fold into series.
> ---
> I think in principle we could limit the impact from finding the idle
> domain as "prevd", by having __context_switch() tell us what kind
> domain's vCPU was switched out (it could still be "idle", but in fewer
> cases).
> 
> --- a/xen/arch/x86/domain.c
> +++ b/xen/arch/x86/domain.c
> @@ -2005,17 +2005,26 @@ void context_switch(struct vcpu *prev, s
>  }
>  else
>  {
> +unsigned int feat_sc_rsb = X86_FEATURE_SC_RSB_HVM;
> +
>  __context_switch();
>  
>  /* Re-enable interrupts before restoring state which may fault. */
>  local_irq_enable();
>  
>  if ( is_pv_domain(nextd) )
> +{
>  load_segments(next);
>  
> +feat_sc_rsb = X86_FEATURE_SC_RSB_PV;
> +}
> +
>  ctxt_switch_levelling(next);
>  
> -if ( opt_ibpb_ctxt_switch && !is_idle_domain(nextd) )
> +if ( opt_ibpb_ctxt_switch && !is_idle_domain(nextd) &&
> + (!(prevd->arch.spec_ctrl_flags & SCF_entry_ibpb) ||
> +  /* is_idle_domain(prevd) || */

I would rather add a comment to note that the idle domain always has
SCF_entry_ibpb clear, rather than leaving this commented check in the
condition.

> +  !boot_cpu_has(feat_sc_rsb)) )

I do wonder if it would be more fail safe (and easier to expand going
forward) if we introduce a new cpu_info field to track the CPU state:
relevant here would be whether RSB has been overwritten and IBPB
executed.  Such state would be cleared on each return from guest path.

Thanks, Roger.

[PATCH] x86: don't open-code max_page calculation nor pfn_to_paddr()

As observed by Roger while reviewing a somewhat related change, there's
no need here either to open-code the (largely, i.e. once setup_max_pdx()
was called) fixed relationship between max_pdx and max_page. Further we
can avoid open-coding pfn_to_paddr() here.

Signed-off-by: Jan Beulich 

--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -1569,7 +1569,7 @@ void asmlinkage __init noreturn __start_
 continue;
 }
 map_e = e;
-e = (pdx_to_pfn(max_pdx - 1) + 1ULL) << PAGE_SHIFT;
+e = pfn_to_paddr(max_page);
 printk(XENLOG_WARNING "Ignoring inaccessible memory range"
   " %013"PRIx64"-%013"PRIx64"\n",
e, map_e);

[XEN PATCH v2] xen: move declaration of first_valid_mfn to xen/numa.h

2023-12-18 Thread Nicola Vetrini

Such declaration is moved in order to provide it for Arm and PPC,
whilst not violating MISRA C:2012 Rule 8.4 in common/page_alloc.c:
"A compatible declaration shall be visible when an object or
function with external linkage is defined".

Signed-off-by: Julien Grall 
Signed-off-by: Nicola Vetrini 
---
Changes in v2:
This patch is a rework of a previous one appeared in this series [1], of which
patches 1 and 2 have been committed already.

The updated patch was provided by Julien in this thread [2]. I added the commit
message and the rest of the information.

[1] 
https://lore.kernel.org/xen-devel/cover.1702285639.git.nicola.vetr...@bugseng.com/T/#mee6def855787d932fe2f10d5648d437dcb6f046c
[2] 
https://lore.kernel.org/xen-devel/cover.1702285639.git.nicola.vetr...@bugseng.com/T/#m3c5b141b806530b5920bb5e9dd53631195560317
---
 xen/arch/arm/include/asm/numa.h | 6 --
 xen/arch/ppc/include/asm/numa.h | 6 --
 xen/common/page_alloc.c | 6 --
 xen/include/xen/numa.h  | 2 ++
 4 files changed, 6 insertions(+), 14 deletions(-)

diff --git a/xen/arch/arm/include/asm/numa.h b/xen/arch/arm/include/asm/numa.h
index e2bee2bd8223..a2c1da4a82f7 100644
--- a/xen/arch/arm/include/asm/numa.h
+++ b/xen/arch/arm/include/asm/numa.h
@@ -11,12 +11,6 @@ typedef u8 nodeid_t;
 #define cpu_to_node(cpu) 0
 #define node_to_cpumask(node)   (cpu_online_map)
 
-/*
- * TODO: make first_valid_mfn static when NUMA is supported on Arm, this
- * is required because the dummy helpers are using it.
- */
-extern mfn_t first_valid_mfn;
-
 /* XXX: implement NUMA support */
 #define node_spanned_pages(nid) (max_page - mfn_x(first_valid_mfn))
 #define node_start_pfn(nid) (mfn_x(first_valid_mfn))
diff --git a/xen/arch/ppc/include/asm/numa.h b/xen/arch/ppc/include/asm/numa.h
index 7fdf66c3da74..204180ad5b98 100644
--- a/xen/arch/ppc/include/asm/numa.h
+++ b/xen/arch/ppc/include/asm/numa.h
@@ -10,12 +10,6 @@ typedef uint8_t nodeid_t;
 #define cpu_to_node(cpu) 0
 #define node_to_cpumask(node)   (cpu_online_map)
 
-/*
- * TODO: make first_valid_mfn static when NUMA is supported on PPC, this
- * is required because the dummy helpers are using it.
- */
-extern mfn_t first_valid_mfn;
-
 /* XXX: implement NUMA support */
 #define node_spanned_pages(nid) (max_page - mfn_x(first_valid_mfn))
 #define node_start_pfn(nid) (mfn_x(first_valid_mfn))
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 9b5df74fddab..d874525916ea 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -255,8 +255,10 @@ static PAGE_LIST_HEAD(page_broken_list);
  */
 
 /*
- * first_valid_mfn is exported because it is use in ARM specific NUMA
- * helpers. See comment in arch/arm/include/asm/numa.h.
+ * first_valid_mfn is exported because it is used when !CONFIG_NUMA.
+ *
+ * TODO: Consider if we can conditionally export first_valid_mfn based
+ * on whether NUMA is selected.
  */
 mfn_t first_valid_mfn = INVALID_MFN_INITIALIZER;
 
diff --git a/xen/include/xen/numa.h b/xen/include/xen/numa.h
index 287e81ff..a10d4b1778a0 100644
--- a/xen/include/xen/numa.h
+++ b/xen/include/xen/numa.h
@@ -108,6 +108,8 @@ extern void numa_set_processor_nodes_parsed(nodeid_t node);
 
 #else
 
+extern mfn_t first_valid_mfn;
+
 static inline nodeid_t mfn_to_nid(mfn_t mfn)
 {
 return 0;
-- 
2.34.1

Re: [PATCH 4/9] ACPI: address violations of MISRA C:2012 Rule 11.8

2023-12-18 Thread Simone Ballarin


On 14/12/23 17:36, Jan Beulich wrote:

On 14.12.2023 13:07, Simone Ballarin wrote:

--- a/xen/include/acpi/acmacros.h
+++ b/xen/include/acpi/acmacros.h
@@ -116,7 +116,7 @@
  #define ACPI_PTR_TO_PHYSADDR(i) ACPI_TO_INTEGER(i)
  
  #ifndef ACPI_MISALIGNMENT_NOT_SUPPORTED

-#define ACPI_COMPARE_NAME(a,b)  (*ACPI_CAST_PTR (u32,(a)) == 
*ACPI_CAST_PTR (u32,(b)))
+#define ACPI_COMPARE_NAME(a,b)  (*ACPI_CAST_PTR (const u32,(a)) == 
*ACPI_CAST_PTR (const u32,(b)))


Hmm, I'm a little hesitant to take changes to this header. We've
inherited it from Linux, who in turn inherited / imported it from
ACPI CA.


  #else
  #define ACPI_COMPARE_NAME(a,b)  (!ACPI_STRNCMP (ACPI_CAST_PTR 
(char,(a)), ACPI_CAST_PTR (char,(b)), ACPI_NAME_SIZE))
  #endif


What about this alternative code, btw?

Jan


If the file is not supposed to be changed, I would consider adding it in
docs/misra/exclude-list*.
For the moment, I think it is better to drop the change.

--
Simone Ballarin, M.Sc.

Field Application Engineer, BUGSENG (https://bugseng.com)

Re: [PATCH] x86: allow non-BIGMEM configs to boot on >= 16Tb systems

On 18.12.2023 12:13, Roger Pau Monné wrote:
> On Mon, Dec 18, 2023 at 09:26:24AM +0100, Jan Beulich wrote:
>> On 15.12.2023 15:54, Roger Pau Monné wrote:
>>> On Wed, Jun 07, 2023 at 08:17:30AM +0200, Jan Beulich wrote:
 While frame table setup, directmap init, and boot allocator population
 respect all intended bounds, the logic passing memory to the heap
 allocator which wasn't passed to the boot allocator fails to respect
 max_{pdx,pfn}. This then typically triggers the BUG() in
 free_heap_pages() after checking page state, because of hitting a struct
 page_info instance which was set to all ~0.

 Of course all the memory above the 16Tb boundary is still going to
 remain unused; using it requires BIGMEM=y. And of course this fix
 similarly ought to help BIGMEM=y configurations on >= 123Tb systems
 (where all the memory beyond that boundary continues to be unused).

 Fixes: bac263ba ("x86-64: reduce range spanned by 1:1 mapping and 
 frame table indexes")
 Signed-off-by: Jan Beulich 
>>>
>>> Acked-by: Roger Pau Monné 
>>
>> Thanks.
>>
 --- a/xen/arch/x86/setup.c
 +++ b/xen/arch/x86/setup.c
 @@ -1722,15 +1722,16 @@ void __init noreturn __start_xen(unsigne
  
  if ( max_page - 1 > virt_to_mfn(HYPERVISOR_VIRT_END - 1) )
  {
 -unsigned long limit = virt_to_mfn(HYPERVISOR_VIRT_END - 1);
 +unsigned long lo = virt_to_mfn(HYPERVISOR_VIRT_END - 1);
 +unsigned long hi = pdx_to_pfn(max_pdx - 1) + 1;
>>>
>>> Maybe use max_page to avoid the pdx_to_pfn() call?  (And is also more
>>> in context with the condition on the outside if).
>>
>> You mean
>>
>> unsigned long hi = min(pdx_to_pfn(max_pdx - 1) + 1, max_page);
>>
>> ? I could switch to that, yes. I wouldn't feel well switching to using
>> just max_page, especially with me having nowhere to (reasonably) test.
> 
> Isn't max_page derived from max_pdx (see setup_max_pdx()), and
> hence we could avoid the pdx_to_pfn() conversion by just using it?
> 
> max_page = pdx_to_pfn(max_pdx - 1) + 1;
> 
> So hi == max_page in your proposed code.
> 
> Maybe there are further restrictions applied to max_pdx that are not
> propagated into max_page, the meaning of all those variables is very
> opaque, and hard to follow in the source code.

Looking more closely, the two appear to be properly in sync once
setup_max_pdx() was called the first time. I guess I was in part
mislead by

e = (pdx_to_pfn(max_pdx - 1) + 1ULL) << PAGE_SHIFT;

just a few lines past an update to both variables. I'll switch to
max_page here, and I may also make a patch to tidy the line quoted
above.

Jan

[PATCH v2 3/3] x86: detect PIT aliasing on ports other than 0x4[0-3]

... in order to also deny Dom0 access through the alias ports. Without
this it is only giving the impression of denying access to PIT. Unlike
for CMOS/RTC, do detection pretty early, to avoid disturbing normal
operation later on (even if typically we won't use much of the PIT).

Like for CMOS/RTC a fundamental assumption of the probing is that reads
from the probed alias port won't have side effects (beyond such that PIT
reads have anyway) in case it does not alias the PIT's.

At to the port 0x61 accesses: Unlike other accesses we do, this masks
off the top four bits (in addition to the bottom two ones), following
Intel chipset documentation saying that these (read-only) bits should
only be written with zero.

Signed-off-by: Jan Beulich 
---
If Xen was running on top of another instance of itself (in HVM mode,
not PVH, i.e. not as a shim), prior to 14f42af3f52d ('x86/vPIT: account
for "counter stopped" time') I'm afraid our vPIT logic would not have
allowed the "Try to further make sure ..." check to pass in the Xen
running on top: We don't respect the gate bit being clear when handling
counter reads. (There are more unhandled [and unmentioned as being so]
aspects of PIT behavior though, yet it's unclear in how far addressing
at least some of them would be useful.)
---
v2: Use new command line option. Re-base over changes to earlier
patches. Use ISOLATE_LSB().

--- a/xen/arch/x86/dom0_build.c
+++ b/xen/arch/x86/dom0_build.c
@@ -495,7 +495,11 @@ int __init dom0_setup_permissions(struct
 rc |= ioports_deny_access(d, 0x4D0, 0x4D1);
 
 /* Interval Timer (PIT). */
-rc |= ioports_deny_access(d, 0x40, 0x43);
+for ( offs = 0, i = ISOLATE_LSB(pit_alias_mask) ?: 4;
+  offs <= pit_alias_mask; offs += i )
+if ( !(offs & ~pit_alias_mask) )
+rc |= ioports_deny_access(d, 0x40 + offs, 0x43 + offs);
+
 /* PIT Channel 2 / PC Speaker Control. */
 rc |= ioports_deny_access(d, 0x61, 0x61);
 
--- a/xen/arch/x86/include/asm/setup.h
+++ b/xen/arch/x86/include/asm/setup.h
@@ -47,6 +47,7 @@ extern unsigned long highmem_start;
 #endif
 
 extern unsigned int i8259A_alias_mask;
+extern unsigned int pit_alias_mask;
 
 extern int8_t opt_smt;
 extern int8_t opt_probe_port_aliases;
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -425,6 +425,72 @@ static struct platform_timesource __init
 .resume = resume_pit,
 };
 
+unsigned int __initdata pit_alias_mask;
+
+static void __init probe_pit_alias(void)
+{
+unsigned int mask = 0x1c;
+uint8_t val = 0;
+
+if ( !opt_probe_port_aliases )
+return;
+
+/*
+ * Use channel 2 in mode 0 for probing.  In this mode even a non-initial
+ * count is loaded independent of counting being / becoming enabled.  Thus
+ * we have a 16-bit value fully under our control, to write and then check
+ * whether we can also read it back unaltered.
+ */
+
+/* Turn off speaker output and disable channel 2 counting. */
+outb(inb(0x61) & 0x0c, 0x61);
+
+outb((2 << 6) | (3 << 4) | (0 << 1), PIT_MODE); /* Mode 0, LSB/MSB. */
+
+do {
+uint8_t val2;
+unsigned int offs;
+
+outb(val, PIT_CH2);
+outb(val ^ 0xff, PIT_CH2);
+
+/* Wait for the Null Count bit to clear. */
+do {
+/* Latch status. */
+outb((3 << 6) | (1 << 5) | (1 << 3), PIT_MODE);
+
+/* Try to make sure we're actually having a PIT here. */
+val2 = inb(PIT_CH2);
+if ( (val2 & ~(3 << 6)) != ((3 << 4) | (0 << 1)) )
+return;
+} while ( val2 & (1 << 6) );
+
+/*
+ * Try to further make sure we're actually having a PIT here.
+ *
+ * NB: Deliberately |, not ||, as we always want both reads.
+ */
+val2 = inb(PIT_CH2);
+if ( (val2 ^ val) | (inb(PIT_CH2) ^ val ^ 0xff) )
+return;
+
+for ( offs = ISOLATE_LSB(mask); offs <= mask; offs <<= 1 )
+{
+if ( !(mask & offs) )
+continue;
+val2 = inb(PIT_CH2 + offs);
+if ( (val2 ^ val) | (inb(PIT_CH2 + offs) ^ val ^ 0xff) )
+mask &= ~offs;
+}
+} while ( mask && (val += 0x0b) );  /* Arbitrary uneven number. */
+
+if ( mask )
+{
+dprintk(XENLOG_INFO, "PIT aliasing mask: %02x\n", mask);
+pit_alias_mask = mask;
+}
+}
+
 /
  * PLATFORM TIMER 2: HIGH PRECISION EVENT TIMER (HPET)
  */
@@ -2414,6 +2480,8 @@ void __init early_time_init(void)
 }
 
 preinit_pit();
+probe_pit_alias();
+
 tmp = init_platform_timer();
 plt_tsc.frequency = tmp;

[PATCH v2 2/3] x86: detect PIC aliasing on ports other than 0x[2A][01]

... in order to also deny Dom0 access through the alias ports. Without
this it is only giving the impression of denying access to both PICs.
Unlike for CMOS/RTC, do detection very early, to avoid disturbing normal
operation later on.

Like for CMOS/RTC a fundamental assumption of the probing is that reads
from the probed alias port won't have side effects in case it does not
alias the respective PIC's one.

Signed-off-by: Jan Beulich 
---
v2: Use new command line option. s/pic/8252A/. Re-base over new earlier
patch. Use ISOLATE_LSB().

--- a/xen/arch/x86/dom0_build.c
+++ b/xen/arch/x86/dom0_build.c
@@ -467,7 +467,7 @@ static void __init process_dom0_ioports_
 int __init dom0_setup_permissions(struct domain *d)
 {
 unsigned long mfn;
-unsigned int i;
+unsigned int i, offs;
 int rc;
 
 if ( pv_shim )
@@ -480,10 +480,16 @@ int __init dom0_setup_permissions(struct
 
 /* Modify I/O port access permissions. */
 
-/* Master Interrupt Controller (PIC). */
-rc |= ioports_deny_access(d, 0x20, 0x21);
-/* Slave Interrupt Controller (PIC). */
-rc |= ioports_deny_access(d, 0xA0, 0xA1);
+for ( offs = 0, i = ISOLATE_LSB(i8259A_alias_mask) ?: 2;
+  offs <= i8259A_alias_mask; offs += i )
+{
+if ( offs & ~i8259A_alias_mask )
+continue;
+/* Master Interrupt Controller (PIC). */
+rc |= ioports_deny_access(d, 0x20 + offs, 0x21 + offs);
+/* Slave Interrupt Controller (PIC). */
+rc |= ioports_deny_access(d, 0xA0 + offs, 0xA1 + offs);
+}
 
 /* ELCR of both PICs. */
 rc |= ioports_deny_access(d, 0x4D0, 0x4D1);
--- a/xen/arch/x86/i8259.c
+++ b/xen/arch/x86/i8259.c
@@ -19,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -333,6 +334,58 @@ void __init make_8259A_irq(unsigned int
 irq_to_desc(irq)->handler = _irq_type;
 }
 
+unsigned int __initdata i8259A_alias_mask;
+
+static void __init probe_8259A_alias(void)
+{
+unsigned int mask = 0x1e;
+uint8_t val = 0;
+
+if ( !opt_probe_port_aliases )
+return;
+
+/*
+ * The only properly r/w register is OCW1.  While keeping the master
+ * fully masked (thus also masking anything coming through the slave),
+ * write all possible 256 values to the slave's base port, and check
+ * whether the same value can then be read back through any of the
+ * possible alias ports.  Probing just the slave of course builds on the
+ * assumption that aliasing is identical for master and slave.
+ */
+
+outb(0xff, 0x21); /* Fully mask master. */
+
+do {
+unsigned int offs;
+
+outb(val, 0xa1);
+
+/* Try to make sure we're actually having a PIC here. */
+if ( inb(0xa1) != val )
+{
+mask = 0;
+break;
+}
+
+for ( offs = ISOLATE_LSB(mask); offs <= mask; offs <<= 1 )
+{
+if ( !(mask & offs) )
+continue;
+if ( inb(0xa1 + offs) != val )
+mask &= ~offs;
+}
+} while ( mask && (val += 0x0d) );  /* Arbitrary uneven number. */
+
+outb(cached_A1, 0xa1); /* Restore slave IRQ mask. */
+outb(cached_21, 0x21); /* Restore master IRQ mask. */
+
+if ( mask )
+{
+dprintk(XENLOG_INFO, "PIC aliasing mask: %02x\n", mask);
+i8259A_alias_mask = mask;
+}
+}
+
 static struct irqaction __read_mostly cascade = { no_action, "cascade", NULL};
 
 void __init init_IRQ(void)
@@ -343,6 +396,8 @@ void __init init_IRQ(void)
 
 init_8259A(0);
 
+probe_8259A_alias();
+
 for (irq = 0; platform_legacy_irq(irq); irq++) {
 struct irq_desc *desc = irq_to_desc(irq);
 
--- a/xen/arch/x86/include/asm/setup.h
+++ b/xen/arch/x86/include/asm/setup.h
@@ -46,6 +46,8 @@ extern uint8_t kbd_shift_flags;
 extern unsigned long highmem_start;
 #endif
 
+extern unsigned int i8259A_alias_mask;
+
 extern int8_t opt_smt;
 extern int8_t opt_probe_port_aliases;

[PATCH v2 1/3] x86: allow to suppress port-alias probing

By default there's already no use for this when we run in shim mode.
Plus there may also be a need to suppress the probing in case of issues
with it. Before introducing further port alias probing, introduce a
command line option allowing to bypass it, default it to on when in shim
mode, and gate RTC/CMOS port alias probing on it.

Requested-by: Roger Pau Monné 
Signed-off-by: Jan Beulich 
---
While "probe-port-aliases" is longish, shorter forms (e.g. "port-probe")
partially lose the intended meaning.
---
v2: New.

--- a/docs/misc/xen-command-line.pandoc
+++ b/docs/misc/xen-command-line.pandoc
@@ -2000,6 +2000,17 @@ INVPCID is supported and not disabled vi
 This is a mask of C-states which are to be used preferably.  This option is
 applicable only on hardware were certain C-states are exclusive of one another.
 
+### probe-port-aliases (x86)
+> `= `
+
+> Default: `true` outside of shim mode, `false` in shim mode
+
+Certain devices accessible by I/O ports may be accessible also through "alias"
+ports (originally a result of incomplete address decoding).  When such devices
+are solely under Xen's control, Xen disallows even Dom0 access to the "primary"
+ports.  When alias probing is active and aliases are detected, "alias" ports
+would then be treated similar to the "primary" ones.
+
 ### psr (Intel)
 > `= List of ( cmt: | rmid_max: | cat: | 
 > cos_max: | cdp: )`
 
--- a/xen/arch/x86/include/asm/setup.h
+++ b/xen/arch/x86/include/asm/setup.h
@@ -47,6 +47,7 @@ extern unsigned long highmem_start;
 #endif
 
 extern int8_t opt_smt;
+extern int8_t opt_probe_port_aliases;
 
 #ifdef CONFIG_SHADOW_PAGING
 extern bool opt_dom0_shadow;
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -75,6 +75,9 @@ static bool __initdata opt_invpcid = tru
 boolean_param("invpcid", opt_invpcid);
 bool __read_mostly use_invpcid;
 
+int8_t __initdata opt_probe_port_aliases = -1;
+boolean_param("probe-port-aliases", opt_probe_port_aliases);
+
 /* Only used in asm code and within this source file */
 unsigned long asmlinkage __read_mostly cr4_pv32_mask;
 
@@ -1844,6 +1847,9 @@ void asmlinkage __init noreturn __start_
 /* Low mappings were only needed for some BIOS table parsing. */
 zap_low_mappings();
 
+if ( opt_probe_port_aliases < 0 )
+opt_probe_port_aliases = !pv_shim;
+
 init_apic_mappings();
 
 normalise_cpu_order();
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -1253,7 +1253,8 @@ static int __init cf_check probe_cmos_al
 {
 unsigned int offs;
 
-if ( acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_CMOS_RTC )
+if ( (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_CMOS_RTC) ||
+ !opt_probe_port_aliases )
 return 0;
 
 for ( offs = 2; offs < 8; offs <<= 1 )

[PATCH v2 0/3] x86: Dom0 I/O port access permissions

Following on from the CMOS/RTC port aliasing change, there are some
more missing restrictions; in particular there's more port aliasing to
be aware of. But first of all introduce a command line option to allow
suppressing this probing of aliases, as was requested.

Of course an alternative to all of this would be to do away with all
policy-only ioports_deny_access() in dom0_setup_permissions(), leaving
in place only ones which are truly required for functionality reasons.

1: allow to suppress port-alias probing
2: detect PIC aliasing on ports other than 0x[2A][01]
3: detect PIT aliasing on ports other than 0x4[0-3]

Jan

Re: [PATCH V8 00/12] fix migration of suspended runstate

2023-12-18 Thread Steven Sistare

On 12/18/2023 12:14 AM, Peter Xu wrote:
> On Wed, Dec 13, 2023 at 10:35:33AM -0500, Steven Sistare wrote:
>> Hi Peter, all have RB's, with all i's dotted and t's crossed - steve
>
> Yes this seems to be more migration related so maybe good candidate for a
> pull from migration submodule.
>
> But since this is still solving a generic issue, I'm copying a few more
> people from get_maintainers.pl that this series touches, just in case
> they'll have something to say before dev cycle starts.

The key aspects are summarized by the cover letter and the commit messages
pasted below for the first 6 patches:

https://lore.kernel.org/qemu-devel/1702481421-375368-1-git-send-email-steven.sist...@oracle.com

---

[PATCH V8 00/12] fix migration of suspended runstate

Migration of a guest in the suspended runstate is broken. The incoming
migration code automatically tries to wake the guest, which is wrong;
the guest should end migration in the same runstate it started. Further,
after saving a snapshot in the suspended state and loading it, the vm_start
fails. The runstate is RUNNING, but the guest is not.
---

[PATCH V8 01/12] cpus: vm_was_suspended

Add a state variable to remember if a vm previously transitioned into a
suspended state.
---

[PATCH V8 02/12] cpus: stop vm in suspended runstate

Currently, a vm in the suspended state is not completely stopped. The VCPUs
have been paused, but the cpu clock still runs, and runstate notifiers for
the transition to stopped have not been called. This causes problems for
live migration. Stale cpu timers_state is saved to the migration stream,
causing time errors in the guest when it wakes from suspend, and state that
would have been modified by runstate notifiers is wrong.

Modify vm_stop to completely stop the vm if the current state is suspended,
transition to RUN_STATE_PAUSED, and remember that the machine was suspended.
Modify vm_start to restore the suspended state.

This affects all callers of vm_stop and vm_start, notably, the qapi stop and
cont commands. For example:

(qemu) info status
VM status: paused (suspended)

(qemu) stop
(qemu) info status
VM status: paused

(qemu) system_wakeup
Error: Unable to wake up: guest is not in suspended state

(qemu) cont
(qemu) info status
VM status: paused (suspended)

(qemu) system_wakeup
(qemu) info status
VM status: running

---

[PATCH V8 03/12] cpus: check running not RUN_STATE_RUNNING

When a vm transitions from running to suspended, runstate notifiers are
not called, so the notifiers still think the vm is running. Hence, when
we call vm_start to restore the suspended state, we call vm_state_notify
with running=1. However, some notifiers check for RUN_STATE_RUNNING.
They must check the running boolean instead.

No functional change.
---

[PATCH V8 04/12] cpus: vm_resume

Define the vm_resume helper, for use in subsequent patches.
---

[PATCH V8 05/12] migration: propagate suspended runstate

If the outgoing machine was previously suspended, propagate that to the
incoming side via global_state, so a subsequent vm_start restores the
suspended state. To maintain backward and forward compatibility, reclaim
some space from the runstate member.
---

[PATCH V8 06/12] migration: preserve suspended runstate

A guest that is migrated in the suspended state automaticaly wakes and
continues execution. This is wrong; the guest should end migration in
the same state it started. The root cause is that the outgoing migration
code automatically wakes the guest, then saves the RUNNING runstate in
global_state_store(), hence the incoming migration code thinks the guest is
running and continues the guest if autostart is true.

On the outgoing side, delete the call to qemu_system_wakeup_request().
Now that vm_stop completely stops a vm in the suspended state (from the
preceding patches), the existing call to vm_stop_force_state is sufficient
to correctly migrate all vmstate.

On the incoming side, call vm_start if the pre-migration state was running
or suspended. For the latter, vm_start correctly restores the suspended
state, and a future system_wakeup monitor request will cause the vm to
resume running.
---

[PATCH v4 5/5] x86/vIRQ: split PCI link load state checking from actual loading

Move the checking into a check hook, and add checking of the padding
fields as well.

Signed-off-by: Jan Beulich 
---
v4: New.

--- a/xen/arch/x86/hvm/irq.c
+++ b/xen/arch/x86/hvm/irq.c
@@ -749,6 +749,30 @@ static int cf_check irq_load_isa(struct
 return 0;
 }
 
+static int cf_check irq_check_link(const struct domain *d,
+   hvm_domain_context_t *h)
+{
+const struct hvm_hw_pci_link *pci_link = hvm_get_entry(PCI_LINK, h);
+unsigned int link;
+
+if ( !pci_link )
+return -ENODATA;
+
+for ( link = 0; link < ARRAY_SIZE(pci_link->pad0); link++ )
+if ( pci_link->pad0[link] )
+return -EINVAL;
+
+for ( link = 0; link < ARRAY_SIZE(pci_link->route); link++ )
+if ( pci_link->route[link] > 15 )
+{
+printk(XENLOG_G_ERR
+   "HVM restore: PCI-ISA link %u out of range (%u)\n",
+   link, pci_link->route[link]);
+return -EINVAL;
+}
+
+return 0;
+}
 
 static int cf_check irq_load_link(struct domain *d, hvm_domain_context_t *h)
 {
@@ -759,16 +783,6 @@ static int cf_check irq_load_link(struct
 if ( hvm_load_entry(PCI_LINK, h, _irq->pci_link) != 0 )
 return -EINVAL;
 
-/* Sanity check */
-for ( link = 0; link < 4; link++ )
-if ( hvm_irq->pci_link.route[link] > 15 )
-{
-printk(XENLOG_G_ERR
-   "HVM restore: PCI-ISA link %u out of range (%u)\n",
-   link, hvm_irq->pci_link.route[link]);
-return -EINVAL;
-}
-
 /* Adjust the GSI assert counts for the link outputs.
  * This relies on the PCI and ISA IRQ state being loaded first */
 for ( link = 0; link < 4; link++ )
@@ -788,5 +802,5 @@ HVM_REGISTER_SAVE_RESTORE(PCI_IRQ, irq_s
   1, HVMSR_PER_DOM);
 HVM_REGISTER_SAVE_RESTORE(ISA_IRQ, irq_save_isa, NULL, irq_load_isa,
   1, HVMSR_PER_DOM);
-HVM_REGISTER_SAVE_RESTORE(PCI_LINK, irq_save_link, NULL, irq_load_link,
-  1, HVMSR_PER_DOM);
+HVM_REGISTER_SAVE_RESTORE(PCI_LINK, irq_save_link, irq_check_link,
+  irq_load_link, 1, HVMSR_PER_DOM);

[PATCH v4 4/5] x86/vPIC: check values loaded from state save record

Loading is_master from the state save record can lead to out-of-bounds
accesses via at least the two container_of() uses by vpic_domain() and
__vpic_lock(). Make sure the value is consistent with the instance being
loaded.

For ->int_output (which for whatever reason isn't a 1-bit bitfield),
besides bounds checking also take ->init_state into account.

For ELCR follow what vpic_intercept_elcr_io()'s write path and
vpic_reset() do, i.e. don't insist on the internal view of the value to
be saved.

Move the instance range check as well, leaving just an assertion in the
load handler.

Signed-off-by: Jan Beulich 
Reviewed-by: Roger Pau Monné 
---
v3: vpic_domain() fix and vpic_elcr_mask() adjustment split out. Re-base
over rename in earlier patch.
v2: Introduce separate checking function; switch to refusing to load
bogus values. Re-base.

--- a/xen/arch/x86/hvm/vpic.c
+++ b/xen/arch/x86/hvm/vpic.c
@@ -429,6 +429,38 @@ static int cf_check vpic_save(struct vcp
 return 0;
 }
 
+static int cf_check vpic_check(const struct domain *d, hvm_domain_context_t *h)
+{
+unsigned int inst = hvm_load_instance(h);
+const struct hvm_hw_vpic *s;
+
+if ( !has_vpic(d) )
+return -ENODEV;
+
+/* Which PIC is this? */
+if ( inst >= ARRAY_SIZE(d->arch.hvm.vpic) )
+return -ENOENT;
+
+s = hvm_get_entry(PIC, h);
+if ( !s )
+return -ENODATA;
+
+/*
+ * Check to-be-loaded values are within valid range, for them to represent
+ * actually reachable state.  Uses of some of the values elsewhere assume
+ * this is the case.
+ */
+if ( s->int_output > 1 )
+return -EDOM;
+
+if ( s->is_master != !inst ||
+ (s->int_output && s->init_state) ||
+ (s->elcr & ~vpic_elcr_mask(s, 1)) )
+return -EINVAL;
+
+return 0;
+}
+
 static int cf_check vpic_load(struct domain *d, hvm_domain_context_t *h)
 {
 struct hvm_hw_vpic *s;
@@ -438,18 +470,21 @@ static int cf_check vpic_load(struct dom
 return -ENODEV;
 
 /* Which PIC is this? */
-if ( inst > 1 )
-return -ENOENT;
+ASSERT(inst < ARRAY_SIZE(d->arch.hvm.vpic));
 s = >arch.hvm.vpic[inst];
 
 /* Load the state */
 if ( hvm_load_entry(PIC, h, s) != 0 )
 return -EINVAL;
 
+if ( s->is_master )
+s->elcr |= 1 << 2;
+
 return 0;
 }
 
-HVM_REGISTER_SAVE_RESTORE(PIC, vpic_save, NULL, vpic_load, 2, HVMSR_PER_DOM);
+HVM_REGISTER_SAVE_RESTORE(PIC, vpic_save, vpic_check, vpic_load, 2,
+  HVMSR_PER_DOM);
 
 void vpic_reset(struct domain *d)
 {

[PATCH v4 3/5] x86/vPIT: check values loaded from state save record

In particular pit_latch_status() and speaker_ioport_read() perform
calculations which assume in-bounds values. Several of the state save
record fields can hold wider ranges, though. Refuse to load values which
cannot result from normal operation, except mode, the init state of
which (see also below) cannot otherwise be reached.

Note that ->gate should only be possible to be zero for channel 2;
enforce that as well.

Adjust pit_reset()'s writing of ->mode as well, to not unduly affect
the value pit_latch_status() may calculate. The chosen mode of 7 is
still one which cannot be established by writing the control word. Note
that with or without this adjustment effectively all switch() statements
using mode as the control expression aren't quite right when the PIT is
still in that init state; there is an apparent assumption that before
these can sensibly be invoked, the guest would init the PIT (i.e. in
particular set the mode).

Signed-off-by: Jan Beulich 
Reviewed-by: Roger Pau Monné 
---
For mode we could refuse to load values in the [0x08,0xfe] range; I'm
not certain that's going to be overly helpful.

For count I was considering to clip the saved value to 16 bits (i.e. to
convert the internally used 0x1 back to the architectural 0x),
but pit_save() doesn't easily lend itself to such a "fixup". If desired
perhaps better a separate change anyway.
---
v3: Slightly adjust two comments. Re-base over rename in earlier patch.
v2: Introduce separate checking function; switch to refusing to load
bogus values. Re-base.

--- a/xen/arch/x86/emul-i8254.c
+++ b/xen/arch/x86/emul-i8254.c
@@ -47,6 +47,7 @@
 #define RW_STATE_MSB 2
 #define RW_STATE_WORD0 3
 #define RW_STATE_WORD1 4
+#define RW_STATE_NUM 5
 
 #define get_guest_time(v) \
(is_hvm_vcpu(v) ? hvm_get_guest_time(v) : (u64)get_s_time())
@@ -427,6 +428,47 @@ static int cf_check pit_save(struct vcpu
 return rc;
 }
 
+static int cf_check pit_check(const struct domain *d, hvm_domain_context_t *h)
+{
+const struct hvm_hw_pit *hw;
+unsigned int i;
+
+if ( !has_vpit(d) )
+return -ENODEV;
+
+hw = hvm_get_entry(PIT, h);
+if ( !hw )
+return -ENODATA;
+
+/*
+ * Check to-be-loaded values are within valid range, for them to represent
+ * actually reachable state.  Uses of some of the values elsewhere assume
+ * this is the case.  Note that the channels' mode fields aren't checked;
+ * Xen prior to 4.19 might save them as 0xff.
+ */
+if ( hw->speaker_data_on > 1 || hw->pad0 )
+return -EDOM;
+
+for ( i = 0; i < ARRAY_SIZE(hw->channels); ++i )
+{
+const struct hvm_hw_pit_channel *ch = >channels[i];
+
+if ( ch->count > 0x1 ||
+ ch->count_latched >= RW_STATE_NUM ||
+ ch->read_state >= RW_STATE_NUM ||
+ ch->write_state >= RW_STATE_NUM ||
+ ch->rw_mode > RW_STATE_WORD0 ||
+ ch->gate > 1 ||
+ ch->bcd > 1 )
+return -EDOM;
+
+if ( i != 2 && !ch->gate )
+return -EINVAL;
+}
+
+return 0;
+}
+
 static int cf_check pit_load(struct domain *d, hvm_domain_context_t *h)
 {
 PITState *pit = domain_vpit(d);
@@ -443,6 +485,14 @@ static int cf_check pit_load(struct doma
 goto out;
 }
 
+for ( i = 0; i < ARRAY_SIZE(pit->hw.channels); ++i )
+{
+struct hvm_hw_pit_channel *ch = >hw.channels[i];
+
+if ( (ch->mode &= 7) > 5 )
+ch->mode -= 4;
+}
+
 /*
  * Recreate platform timers from hardware state.  There will be some 
  * time jitter here, but the wall-clock will have jumped massively, so 
@@ -458,7 +508,7 @@ static int cf_check pit_load(struct doma
 return rc;
 }
 
-HVM_REGISTER_SAVE_RESTORE(PIT, pit_save, NULL, pit_load, 1, HVMSR_PER_DOM);
+HVM_REGISTER_SAVE_RESTORE(PIT, pit_save, pit_check, pit_load, 1, 
HVMSR_PER_DOM);
 #endif
 
 /* The intercept action for PIT DM retval: 0--not handled; 1--handled. */
@@ -575,7 +625,7 @@ void pit_reset(struct domain *d)
 for ( i = 0; i < 3; i++ )
 {
 s = >hw.channels[i];
-s->mode = 0xff; /* the init mode */
+s->mode = 7; /* unreachable sentinel */
 s->gate = (i != 2);
 pit_load_count(pit, i, 0);
 }

[PATCH v4 2/5] x86/HVM: adjust save/restore hook registration for optional check handler

Register NULL uniformly as a first step.

Signed-off-by: Jan Beulich 
Acked-by: Andrew Cooper 
---
v2: New.

--- a/xen/arch/x86/cpu/mcheck/vmce.c
+++ b/xen/arch/x86/cpu/mcheck/vmce.c
@@ -374,7 +374,7 @@ static int cf_check vmce_load_vcpu_ctxt(
 return err ?: vmce_restore_vcpu(v, );
 }
 
-HVM_REGISTER_SAVE_RESTORE(VMCE_VCPU, vmce_save_vcpu_ctxt,
+HVM_REGISTER_SAVE_RESTORE(VMCE_VCPU, vmce_save_vcpu_ctxt, NULL,
   vmce_load_vcpu_ctxt, 1, HVMSR_PER_VCPU);
 #endif
 
--- a/xen/arch/x86/emul-i8254.c
+++ b/xen/arch/x86/emul-i8254.c
@@ -458,7 +458,7 @@ static int cf_check pit_load(struct doma
 return rc;
 }
 
-HVM_REGISTER_SAVE_RESTORE(PIT, pit_save, pit_load, 1, HVMSR_PER_DOM);
+HVM_REGISTER_SAVE_RESTORE(PIT, pit_save, NULL, pit_load, 1, HVMSR_PER_DOM);
 #endif
 
 /* The intercept action for PIT DM retval: 0--not handled; 1--handled. */
--- a/xen/arch/x86/hvm/hpet.c
+++ b/xen/arch/x86/hvm/hpet.c
@@ -692,7 +692,7 @@ static int cf_check hpet_load(struct dom
 return 0;
 }
 
-HVM_REGISTER_SAVE_RESTORE(HPET, hpet_save, hpet_load, 1, HVMSR_PER_DOM);
+HVM_REGISTER_SAVE_RESTORE(HPET, hpet_save, NULL, hpet_load, 1, HVMSR_PER_DOM);
 
 static void hpet_set(HPETState *h)
 {
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -793,7 +793,7 @@ static int cf_check hvm_load_tsc_adjust(
 return 0;
 }
 
-HVM_REGISTER_SAVE_RESTORE(TSC_ADJUST, hvm_save_tsc_adjust,
+HVM_REGISTER_SAVE_RESTORE(TSC_ADJUST, hvm_save_tsc_adjust, NULL,
   hvm_load_tsc_adjust, 1, HVMSR_PER_VCPU);
 
 static int cf_check hvm_save_cpu_ctxt(struct vcpu *v, hvm_domain_context_t *h)
@@ -1189,7 +1189,7 @@ static int cf_check hvm_load_cpu_ctxt(st
 return 0;
 }
 
-HVM_REGISTER_SAVE_RESTORE(CPU, hvm_save_cpu_ctxt, hvm_load_cpu_ctxt, 1,
+HVM_REGISTER_SAVE_RESTORE(CPU, hvm_save_cpu_ctxt, NULL, hvm_load_cpu_ctxt, 1,
   HVMSR_PER_VCPU);
 
 #define HVM_CPU_XSAVE_SIZE(xcr0) (offsetof(struct hvm_hw_cpu_xsave, \
@@ -1538,6 +1538,7 @@ static int __init cf_check hvm_register_
 hvm_register_savevm(CPU_XSAVE_CODE,
 "CPU_XSAVE",
 hvm_save_cpu_xsave_states,
+NULL,
 hvm_load_cpu_xsave_states,
 HVM_CPU_XSAVE_SIZE(xfeature_mask) +
 sizeof(struct hvm_save_descriptor),
@@ -1546,6 +1547,7 @@ static int __init cf_check hvm_register_
 hvm_register_savevm(CPU_MSR_CODE,
 "CPU_MSR",
 hvm_save_cpu_msrs,
+NULL,
 hvm_load_cpu_msrs,
 HVM_CPU_MSR_SIZE(ARRAY_SIZE(msrs_to_send)) +
 sizeof(struct hvm_save_descriptor),
--- a/xen/arch/x86/hvm/irq.c
+++ b/xen/arch/x86/hvm/irq.c
@@ -784,9 +784,9 @@ static int cf_check irq_load_link(struct
 return 0;
 }
 
-HVM_REGISTER_SAVE_RESTORE(PCI_IRQ, irq_save_pci, irq_load_pci,
+HVM_REGISTER_SAVE_RESTORE(PCI_IRQ, irq_save_pci, NULL, irq_load_pci,
   1, HVMSR_PER_DOM);
-HVM_REGISTER_SAVE_RESTORE(ISA_IRQ, irq_save_isa, irq_load_isa,
+HVM_REGISTER_SAVE_RESTORE(ISA_IRQ, irq_save_isa, NULL, irq_load_isa,
   1, HVMSR_PER_DOM);
-HVM_REGISTER_SAVE_RESTORE(PCI_LINK, irq_save_link, irq_load_link,
+HVM_REGISTER_SAVE_RESTORE(PCI_LINK, irq_save_link, NULL, irq_load_link,
   1, HVMSR_PER_DOM);
--- a/xen/arch/x86/hvm/mtrr.c
+++ b/xen/arch/x86/hvm/mtrr.c
@@ -773,7 +773,7 @@ static int cf_check hvm_load_mtrr_msr(st
 return 0;
 }
 
-HVM_REGISTER_SAVE_RESTORE(MTRR, hvm_save_mtrr_msr, hvm_load_mtrr_msr, 1,
+HVM_REGISTER_SAVE_RESTORE(MTRR, hvm_save_mtrr_msr, NULL, hvm_load_mtrr_msr, 1,
   HVMSR_PER_VCPU);
 
 void memory_type_changed(struct domain *d)
--- a/xen/arch/x86/hvm/pmtimer.c
+++ b/xen/arch/x86/hvm/pmtimer.c
@@ -300,7 +300,7 @@ static int cf_check acpi_load(struct dom
 return 0;
 }
 
-HVM_REGISTER_SAVE_RESTORE(PMTIMER, acpi_save, acpi_load,
+HVM_REGISTER_SAVE_RESTORE(PMTIMER, acpi_save, NULL, acpi_load,
   1, HVMSR_PER_DOM);
 
 int pmtimer_change_ioport(struct domain *d, uint64_t version)
--- a/xen/arch/x86/hvm/rtc.c
+++ b/xen/arch/x86/hvm/rtc.c
@@ -797,7 +797,7 @@ static int cf_check rtc_load(struct doma
 return 0;
 }
 
-HVM_REGISTER_SAVE_RESTORE(RTC, rtc_save, rtc_load, 1, HVMSR_PER_DOM);
+HVM_REGISTER_SAVE_RESTORE(RTC, rtc_save, NULL, rtc_load, 1, HVMSR_PER_DOM);
 
 void rtc_reset(struct domain *d)
 {
--- a/xen/arch/x86/hvm/save.c
+++ b/xen/arch/x86/hvm/save.c
@@ -88,6 +88,7 @@ static struct {
 void __init hvm_register_savevm(uint16_t typecode,
 const char *name,
 hvm_save_handler save_state,
+hvm_check_handler check_state,
 hvm_load_handler load_state,
 size_t size, int kind)
 {
@@

[PATCH v4 1/5] x86/HVM: split restore state checking from state loading

..., at least as reasonably feasible without making a check hook
mandatory (in particular strict vs relaxed/zero-extend length checking
can't be done early this way).

Note that only one of the two uses of "real" hvm_load() is accompanied
with a "checking" one. The other directly consumes hvm_save() output,
which ought to be well-formed. This means that while input data related
checks don't need repeating in the "load" function when already done by
the "check" one (albeit assertions to this effect may be desirable),
domain state related checks (e.g. has_xyz(d)) will be required in both
places.

Suggested-by: Roger Pau Monné 
Signed-off-by: Jan Beulich 
---
Now that this re-arranges hvm_load() anyway, wouldn't it be better to
down the vCPU-s ahead of calling arch_hvm_load() (which is now easy to
arrange for)?

Do we really need all the copying involved in use of _hvm_read_entry()
(backing hvm_load_entry()? Zero-extending loads are likely easier to
handle that way, but for strict loads all we gain is a reduced risk of
unaligned accesses (compared to simply pointing into h->data[]).
---
v4: Fold hvm_check() into hvm_load().
v2: New.

--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -379,8 +379,12 @@ long arch_do_domctl(
 if ( copy_from_guest(c.data, domctl->u.hvmcontext.buffer, c.size) != 0 
)
 goto sethvmcontext_out;
 
+ret = hvm_load(d, false, );
+if ( ret )
+goto sethvmcontext_out;
+
 domain_pause(d);
-ret = hvm_load(d, );
+ret = hvm_load(d, true, );
 domain_unpause(d);
 
 sethvmcontext_out:
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -5397,7 +5397,7 @@ int hvm_copy_context_and_params(struct d
 }
 
 c.cur = 0;
-rc = hvm_load(dst, );
+rc = hvm_load(dst, true, );
 
  out:
 vfree(c.data);
--- a/xen/arch/x86/hvm/save.c
+++ b/xen/arch/x86/hvm/save.c
@@ -30,7 +30,8 @@ static void arch_hvm_save(struct domain
 d->arch.hvm.sync_tsc = rdtsc();
 }
 
-static int arch_hvm_load(struct domain *d, const struct hvm_save_header *hdr)
+static int arch_hvm_check(const struct domain *d,
+  const struct hvm_save_header *hdr)
 {
 uint32_t eax, ebx, ecx, edx;
 
@@ -55,6 +56,11 @@ static int arch_hvm_load(struct domain *
"(%#"PRIx32") and restored on another (%#"PRIx32").\n",
d->domain_id, hdr->cpuid, eax);
 
+return 0;
+}
+
+static void arch_hvm_load(struct domain *d, const struct hvm_save_header *hdr)
+{
 /* Restore guest's preferred TSC frequency. */
 if ( hdr->gtsc_khz )
 d->arch.tsc_khz = hdr->gtsc_khz;
@@ -66,13 +72,12 @@ static int arch_hvm_load(struct domain *
 
 /* VGA state is not saved/restored, so we nobble the cache. */
 d->arch.hvm.stdvga.cache = STDVGA_CACHE_DISABLED;
-
-return 0;
 }
 
 /* List of handlers for various HVM save and restore types */
 static struct {
 hvm_save_handler save;
+hvm_check_handler check;
 hvm_load_handler load;
 const char *name;
 size_t size;
@@ -88,6 +93,7 @@ void __init hvm_register_savevm(uint16_t
 {
 ASSERT(typecode <= HVM_SAVE_CODE_MAX);
 ASSERT(hvm_sr_handlers[typecode].save == NULL);
+ASSERT(hvm_sr_handlers[typecode].check == NULL);
 ASSERT(hvm_sr_handlers[typecode].load == NULL);
 hvm_sr_handlers[typecode].save = save_state;
 hvm_sr_handlers[typecode].load = load_state;
@@ -275,12 +281,10 @@ int hvm_save(struct domain *d, hvm_domai
 return 0;
 }
 
-int hvm_load(struct domain *d, hvm_domain_context_t *h)
+int hvm_load(struct domain *d, bool real, hvm_domain_context_t *h)
 {
 const struct hvm_save_header *hdr;
 struct hvm_save_descriptor *desc;
-hvm_load_handler handler;
-struct vcpu *v;
 int rc;
 
 if ( d->is_dying )
@@ -291,50 +295,91 @@ int hvm_load(struct domain *d, hvm_domai
 if ( !hdr )
 return -ENODATA;
 
-rc = arch_hvm_load(d, hdr);
-if ( rc )
-return rc;
+rc = arch_hvm_check(d, hdr);
+if ( real )
+{
+struct vcpu *v;
+
+ASSERT(!rc);
+arch_hvm_load(d, hdr);
 
-/* Down all the vcpus: we only re-enable the ones that had state saved. */
-for_each_vcpu(d, v)
-if ( !test_and_set_bit(_VPF_down, >pause_flags) )
-vcpu_sleep_nosync(v);
+/*
+ * Down all the vcpus: we only re-enable the ones that had state
+ * saved.
+ */
+for_each_vcpu(d, v)
+if ( !test_and_set_bit(_VPF_down, >pause_flags) )
+vcpu_sleep_nosync(v);
+}
+else if ( rc )
+return rc;
 
 for ( ; ; )
 {
+const char *name;
+hvm_load_handler load;
+
 if ( h->size - h->cur < sizeof(struct hvm_save_descriptor) )
 {
 /* Run out of data */
 printk(XENLOG_G_ERR
"HVM%d restore: save did not end with a null entry\n",
d->domain_id);
+ASSERT(!real);

[PATCH v4 0/5] x86/HVM: load state checking

With the request to convert bounding to actual refusal, and then
doing so in new hooks, the two previously separate patches now need
to be in a series, with infrastructure work done first. Clearly the
checking in other load handlers could (and likely wants to be) moved
to separate check handlers as well - one example of doing so is
added anew in v4, the rest will want doing down the road.

1: HVM: split restore state checking from state loading
2: HVM: adjust save/restore hook registration for optional check handler
3: vPIT: check values loaded from state save record
4: vPIC: check values loaded from state save record
5: vIRQ: split PCI link load state checking from actual loading

Jan

Re: [XEN PATCH] xen/arm: ffa: return fpi size from FFA_PARTITION_INFO_GET

2023-12-18 Thread Bertrand Marquis

Hi Jens,

> On 13 Dec 2023, at 11:31, Jens Wiklander  wrote:
> 
> Until now has FFA_PARTITION_INFO_GET always returned zero in w3, but
> FF-A v1.1 requires FFA_PARTITION_INFO_GET to return the size of each
> partition information descriptor returned if
> FFA_PARTITION_INFO_GET_COUNT_FLAG isn't set.
> 

Good finding.

> The SPMC queried with FFA_PARTITION_INFO_GET must also return the each
> partition information descriptor returned so fix this by passing along
> the same value.
> 
> Fixes: caf6491e95a9 ("xen/arm: ffa: support guest FFA_PARTITION_INFO_GET")
> Signed-off-by: Jens Wiklander 

Reviewed-by: Bertrand Marquis 

Cheers
Bertrand

> ---
> xen/arch/arm/tee/ffa.c | 21 +
> 1 file changed, 13 insertions(+), 8 deletions(-)
> 
> diff --git a/xen/arch/arm/tee/ffa.c b/xen/arch/arm/tee/ffa.c
> index 183528d13388..1d4e0a083006 100644
> --- a/xen/arch/arm/tee/ffa.c
> +++ b/xen/arch/arm/tee/ffa.c
> @@ -514,7 +514,7 @@ static int32_t ffa_rxtx_map(paddr_t tx_addr, paddr_t 
> rx_addr,
> 
> static int32_t ffa_partition_info_get(uint32_t w1, uint32_t w2, uint32_t w3,
>   uint32_t w4, uint32_t w5,
> -  uint32_t *count)
> +  uint32_t *count, uint32_t *fpi_size)
> {
> const struct arm_smccc_1_2_regs arg = {
> .a0 = FFA_PARTITION_INFO_GET,
> @@ -531,7 +531,10 @@ static int32_t ffa_partition_info_get(uint32_t w1, 
> uint32_t w2, uint32_t w3,
> 
> ret = get_ffa_ret_code();
> if ( !ret )
> +{
> *count = resp.a2;
> +*fpi_size = resp.a3;
> +}
> 
> return ret;
> }
> @@ -784,7 +787,7 @@ static uint32_t handle_rxtx_unmap(void)
> 
> static int32_t handle_partition_info_get(uint32_t w1, uint32_t w2, uint32_t 
> w3,
>  uint32_t w4, uint32_t w5,
> - uint32_t *count)
> + uint32_t *count, uint32_t *fpi_size)
> {
> int32_t ret = FFA_RET_DENIED;
> struct domain *d = current->domain;
> @@ -799,7 +802,7 @@ static int32_t handle_partition_info_get(uint32_t w1, 
> uint32_t w2, uint32_t w3,
>  */
> if ( w5 == FFA_PARTITION_INFO_GET_COUNT_FLAG &&
>  ctx->guest_vers == FFA_VERSION_1_1 )
> -return ffa_partition_info_get(w1, w2, w3, w4, w5, count);
> +return ffa_partition_info_get(w1, w2, w3, w4, w5, count, fpi_size);
> if ( w5 )
> return FFA_RET_INVALID_PARAMETERS;
> 
> @@ -812,7 +815,7 @@ static int32_t handle_partition_info_get(uint32_t w1, 
> uint32_t w2, uint32_t w3,
> if ( !ctx->page_count || !ctx->rx_is_free )
> goto out;
> spin_lock(_rx_buffer_lock);
> -ret = ffa_partition_info_get(w1, w2, w3, w4, w5, count);
> +ret = ffa_partition_info_get(w1, w2, w3, w4, w5, count, fpi_size);
> if ( ret )
> goto out_rx_buf_unlock;
> /*
> @@ -842,7 +845,7 @@ static int32_t handle_partition_info_get(uint32_t w1, 
> uint32_t w2, uint32_t w3,
> }
> else
> {
> -size_t sz = *count * sizeof(struct ffa_partition_info_1_1);
> +size_t sz = *count * *fpi_size;
> 
> if ( ctx->page_count * FFA_PAGE_SIZE < sz )
> {
> @@ -1409,6 +1412,7 @@ static bool ffa_handle_call(struct cpu_user_regs *regs)
> uint32_t fid = get_user_reg(regs, 0);
> struct domain *d = current->domain;
> struct ffa_ctx *ctx = d->arch.tee;
> +uint32_t fpi_size;
> uint32_t count;
> int e;
> 
> @@ -1444,11 +1448,11 @@ static bool ffa_handle_call(struct cpu_user_regs 
> *regs)
>   get_user_reg(regs, 2),
>   get_user_reg(regs, 3),
>   get_user_reg(regs, 4),
> -  get_user_reg(regs, 5), );
> +  get_user_reg(regs, 5), , 
> _size);
> if ( e )
> set_regs_error(regs, e);
> else
> -set_regs_success(regs, count, 0);
> +set_regs_success(regs, count, fpi_size);
> return true;
> case FFA_RX_RELEASE:
> e = handle_rx_release();
> @@ -1629,10 +1633,11 @@ static bool init_subscribers(struct 
> ffa_partition_info_1_1 *fpi, uint16_t count)
> static bool init_sps(void)
> {
> bool ret = false;
> +uint32_t fpi_size;
> uint32_t count;
> int e;
> 
> -e = ffa_partition_info_get(0, 0, 0, 0, 0, );
> +e = ffa_partition_info_get(0, 0, 0, 0, 0, , _size);
> if ( e )
> {
> printk(XENLOG_ERR "ffa: Failed to get list of SPs: %d\n", e);
> -- 
> 2.34.1
>

Re: [PATCH 3/9] xen/efi: address violations of MISRA C:2012 Rule 11.8

2023-12-18 Thread Simone Ballarin


On 14/12/23 17:32, Jan Beulich wrote:

On 14.12.2023 13:07, Simone Ballarin wrote:

--- a/xen/common/version.c
+++ b/xen/common/version.c
@@ -178,7 +178,7 @@ void __init xen_build_init(void)
  if ( [1] >= __note_gnu_build_id_end )
  return;
  
-sz = (void *)__note_gnu_build_id_end - (void *)n;

+sz = (const void *)__note_gnu_build_id_end - (const void *)n;
  
  rc = xen_build_id_check(n, sz, _id_p, _id_len);
  


How does this change fit the subject? I'm also inclined to ask that these
casts be changed to unsigned long or uintptr_t.

For the actual EFI change:
Reviewed-by: Jan Beulich 

Jan



Ok, v2 will use uintptr_t.

--
Simone Ballarin, M.Sc.

Field Application Engineer, BUGSENG (https://bugseng.com)

Re: [PATCH 0/9] xen: address violations of MISRA C:2012 Rule 11.8

2023-12-18 Thread Simone Ballarin


On 14/12/23 13:36, Jan Beulich wrote:

On 14.12.2023 13:07, Simone Ballarin wrote:

From: Maria Celeste Cesario 

The xen sources contain violations of MISRA C:2012 Rule 11.8 whose
headline states:
"A conversion shall not remove any const, volatile or _Atomic
qualification from the type pointed to by a pointer".

This patch amends or removes casts that unnecessarily drop
const and volatile qualifiers.

Example:

  static always_inline bool int_##name(volatile void *p)
  {
 volatile uint32_t *ptr = (uint32_t *)p; /* Non-compliant */
 volatile uint32_t *ptr = (volatile uint32_t *)p;/* Compliant, proposed 
change */
  }


Why would you further complicate things when here the cast can simply
be dropped?

Jan


Of course, the example will be improved in v2.


--
Simone Ballarin, M.Sc.

Field Application Engineer, BUGSENG (https://bugseng.com)

xen | Successful pipeline for staging-4.17 | 949a4aad

2023-12-18 Thread GitLab



Pipeline #177680 has passed!

Project: xen ( https://gitlab.com/xen-project/xen )
Branch: staging-4.17 ( 
https://gitlab.com/xen-project/xen/-/commits/staging-4.17 )

Commit: 949a4aad ( 
https://gitlab.com/xen-project/xen/-/commit/949a4aad417621638231e4cf9f1b35e8e0328463
 )
Commit Message: update Xen version to 4.17.3

Commit Author: Jan Beulich ( https://gitlab.com/jbeulich )



Pipeline #177680 ( 
https://gitlab.com/xen-project/xen/-/pipelines/177680 ) triggered by Ganis 
( https://gitlab.com/ganis )
successfully completed 84 jobs in 2 stages.

-- 
You're receiving this email because of your account on gitlab.com.

Re: [RFC PATCH v1 1/1] xen/Makefile: introduce ARCH_FIXED_CONFIG for randconfig

On Mon, 2023-12-11 at 16:56 +0100, Jan Beulich wrote:
> On 07.12.2023 21:17, Andrew Cooper wrote:
> > On 07/12/2023 5:03 pm, Oleksii Kurochko wrote:
> > > ARCH_FIXED_CONFIG is required in the case of randconfig
> > > and CI for configs that aren't ready or are not
> > > supposed to be implemented for specific architecture.
> > > These configs should always be disabled to prevent randconfig
> > > related tests from failing.
> > > 
> > > Signed-off-by: Oleksii Kurochko 
> > > ---
> > >  xen/Makefile | 5 -
> > >  1 file changed, 4 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/xen/Makefile b/xen/Makefile
> > > index ca571103c8..8ae8fe1480 100644
> > > --- a/xen/Makefile
> > > +++ b/xen/Makefile
> > > @@ -336,11 +336,14 @@ ifeq ($(config-build),y)
> > >  # *config targets only - make sure prerequisites are updated,
> > > and descend
> > >  # in tools/kconfig to make the *config target
> > >  
> > > +ARCH_FORCED_CONFIG :=
> > > $(srctree)/arch/$(SRCARCH)/configs/randomforced.config
> > > +
> > >  # Create a file for KCONFIG_ALLCONFIG which depends on the
> > > environment.
> > >  # This will be use by kconfig targets
> > > allyesconfig/allmodconfig/allnoconfig/randconfig
> > >  filechk_kconfig_allconfig = \
> > >  $(if $(findstring n,$(XEN_HAS_CHECKPOLICY)), echo
> > > 'CONFIG_XSM_FLASK_POLICY=n';) \
> > > -    $(if $(KCONFIG_ALLCONFIG), cat $(KCONFIG_ALLCONFIG);) \
> > > +    $(if $(KCONFIG_ALLCONFIG), cat $(KCONFIG_ALLCONFIG); \
> > > +    $(if $(wildcard $(ARCH_FORCED_CONFIG)), cat
> > > $(ARCH_FORCED_CONFIG);) ) \
> > >  :
> > >  
> > >  .allconfig.tmp: FORCE
> > 
> > We already have infrastructure for this.  What's wrong with
> > EXTRA_FIXED_RANDCONFIG?
> 
> What I don't understand here is why dealing with the issue would want
> limiting to gitlab-CI. Anyone could run randconfig on their own, and
> imo it would be helpful if the same issue(s) could be prevented
> there,
> too. Hence my earlier suggestion to have a snippet which can be used
> by "interested" parties. And once dealt with in e.g. the makefile
> there should not be a need for any overrides in the CI config
> anymore.
I agree with Jan's point of view that having a more universal solution
for cases where it is necessary to disable a config for randconfig
would be beneficial.

Sometimes, I test randconfig locally, especially during the RISC-V Xen
full build patch series where numerous configs are disabled. It would
be convenient to have only one place to disable configs instead of
duplicating them in two different places.

Does anyone have any objections?

~ Oleksii
> 
> > ---8<---
> > 
> > CI: Revert "automation: Drop ppc64le-*randconfig jobs", fix
> > Randconfig
> > with existing infrastructure
> >     
> > This reverts commit cbb71b95dd708b1e26899bbe1e7bf9a85081fd60.
> > 
> > Signed-off-by: Andrew Cooper 
> > 
> > diff --git a/automation/gitlab-ci/build.yaml
> > b/automation/gitlab-ci/build.yaml
> > index 32af30ccedc9..346d0400ed09 100644
> > --- a/automation/gitlab-ci/build.yaml
> > +++ b/automation/gitlab-ci/build.yaml
> > @@ -538,6 +538,7 @@ archlinux-current-gcc-riscv64-randconfig:
> >  RANDCONFIG: y
> >  EXTRA_FIXED_RANDCONFIG:
> >    CONFIG_COVERAGE=n
> > +  CONFIG_GRANT_TABLE=n
> >  
> >  archlinux-current-gcc-riscv64-debug-randconfig:
> >    extends: .gcc-riscv64-cross-build-debug
> > @@ -547,6 +548,7 @@ archlinux-current-gcc-riscv64-debug-randconfig:
> >  RANDCONFIG: y
> >  EXTRA_FIXED_RANDCONFIG:
> >    CONFIG_COVERAGE=n
> > +  CONFIG_GRANT_TABLE=n
> >  
> >  # Power cross-build
> >  debian-bullseye-gcc-ppc64le:
> > @@ -563,6 +565,26 @@ debian-bullseye-gcc-ppc64le-debug:
> >  KBUILD_DEFCONFIG: ppc64_defconfig
> >  HYPERVISOR_ONLY: y
> >  
> > +debian-bullseye-gcc-ppc64le-randconfig:
> > +  extends: .gcc-ppc64le-cross-build
> > +  variables:
> > +    CONTAINER: debian:bullseye-ppc64le
> > +    KBUILD_DEFCONFIG: ppc64_defconfig
> > +    RANDCONFIG: y
> > +    EXTRA_FIXED_RANDCONFIG:
> > +  CONFIG_COVERAGE=n
> > +  CONFIG_GRANT_TABLE=n
> > +
> > +debian-bullseye-gcc-ppc64le-debug-randconfig:
> > +  extends: .gcc-ppc64le-cross-build-debug
> > +  variables:
> > +    CONTAINER: debian:bullseye-ppc64le
> > +    KBUILD_DEFCONFIG: ppc64_defconfig
> > +    RANDCONFIG: y
> > +    EXTRA_FIXED_RANDCONFIG:
> > +  CONFIG_COVERAGE=n
> > +  CONFIG_GRANT_TABLE=n
> > +
> >  # Yocto test jobs
> >  yocto-qemuarm64:
> >    extends: .yocto-test-arm64
> > 
>

Re: [XEN PATCH v3 1/3] xen/x86: add missing instances of asmlinkage attributes

On 12.12.2023 02:48, Stefano Stabellini wrote:
> On Mon, 11 Dec 2023, Nicola Vetrini wrote:
>> No functional change.
>>
>> Signed-off-by: Nicola Vetrini 
> 
> Reviewed-by: Stefano Stabellini 

Acked-by: Jan Beulich

Re: [PATCH v4 1/4] x86/spec-ctrl: add logic to issue IBPB on exit to guest

On 18.12.2023 14:46, Jan Beulich wrote:
> On 18.12.2023 13:11, Roger Pau Monné wrote:
>> On Tue, Feb 14, 2023 at 05:10:42PM +0100, Jan Beulich wrote:
>>> ---
>>> I have to admit that I'm not really certain about the placement of the
>>> IBPB wrt the MSR_SPEC_CTRL writes. For now I've simply used "opposite of
>>> entry".
>>
>> Maybe it would easier to just add the MSR_PRED_CMD PRED_CMD_IBPB write
>> to the vmcs MSR load list?
>>
>> It's a one-time only AFAICT, as you would only want to do this for
>> context-switch AFAICT.
> 
> That would be a back and forth of adding and removing the MSR to/from
> that list then, which I'm not convinced is helpful. With these special
> MSRs I would further be uncertain as to their effect when used via one
> of these lists.

Plus (as only a secondary argument, but still) it would make PV and HVM
mechanisms entirely different.

Jan

Re: [PATCH v4 2/4] x86/spec-ctrl: defer context-switch IBPB until guest entry

On 18.12.2023 13:39, Roger Pau Monné wrote:
> On Tue, Feb 14, 2023 at 05:11:05PM +0100, Jan Beulich wrote:
>> In order to avoid clobbering Xen's own predictions, defer the barrier as
>> much as possible. Merely mark the CPU as needing a barrier issued the
>> next time we're exiting to guest context.
> 
> While I understand that doing the flush in the middle of the guest
> context might not be ideal, as it's my understanding we also

s/guest context/context switch/?

> needlessly flush Xen predictions, I'm unsure whether this makes any
> difference in practice, and IMO just makes the exit to guest paths
> more complex.

I need to redirect this question to Andrew, who suggested that doing so
can be expected to make a difference. When we were discussing this, I
could easily see it might make a difference, but I cannot provide hard
proof.

Jan

[linux-linus test] 184162: tolerable FAIL - PUSHED

flight 184162 linux-linus real [real]
flight 184166 linux-linus real-retest [real]
http://logs.test-lab.xenproject.org/osstest/logs/184162/
http://logs.test-lab.xenproject.org/osstest/logs/184166/

Failures :-/ but no regressions.

Tests which are failing intermittently (not blocking):
 test-armhf-armhf-examine  8 reboot  fail pass in 184166-retest
 test-armhf-armhf-libvirt  8 xen-bootfail pass in 184166-retest

Tests which did not succeed, but are not blocking:
 test-armhf-armhf-libvirt-raw 15 saverestore-support-check fail blocked in 
184159
 test-armhf-armhf-libvirt 16 saverestore-support-check fail in 184166 like 
184159
 test-armhf-armhf-libvirt15 migrate-support-check fail in 184166 never pass
 test-amd64-amd64-xl-qemut-win7-amd64 19 guest-stopfail like 184159
 test-amd64-amd64-xl-qemuu-win7-amd64 19 guest-stopfail like 184159
 test-amd64-amd64-xl-qemuu-ws16-amd64 19 guest-stopfail like 184159
 test-armhf-armhf-libvirt-qcow2 15 saverestore-support-check   fail like 184159
 test-amd64-amd64-xl-qemut-ws16-amd64 19 guest-stopfail like 184159
 test-amd64-amd64-qemuu-nested-amd 20 debian-hvm-install/l1/l2 fail like 184159
 test-amd64-amd64-libvirt 15 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt-xsm 15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-thunderx 15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-thunderx 16 saverestore-support-checkfail   never pass
 test-arm64-arm64-libvirt-xsm 15 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-xsm 16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-credit2  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-credit2  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-credit1  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-credit1  16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-multivcpu 15 migrate-support-checkfail  never pass
 test-armhf-armhf-xl-multivcpu 16 saverestore-support-checkfail  never pass
 test-armhf-armhf-xl-credit1  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit1  16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-arndale  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-arndale  16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-rtds 15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-rtds 16 saverestore-support-checkfail   never pass
 test-amd64-amd64-libvirt-qcow2 14 migrate-support-checkfail never pass
 test-armhf-armhf-xl  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  16 saverestore-support-checkfail   never pass
 test-amd64-amd64-libvirt-raw 14 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-raw 14 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-raw 15 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-vhd  14 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-vhd  15 saverestore-support-checkfail   never pass
 test-armhf-armhf-libvirt-qcow2 14 migrate-support-checkfail never pass
 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check 
fail never pass
 test-armhf-armhf-xl-vhd  14 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-vhd  15 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-credit2  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit2  16 saverestore-support-checkfail   never pass
 test-armhf-armhf-libvirt-raw 14 migrate-support-checkfail   never pass

version targeted for testing:
 linuxceb6a6f023fd3e8b07761ed900352ef574010bcb
baseline version:
 linux0e389834672c723435a44818ed2cabc4dad24429

Last test of basis   184159  2023-12-17 17:42:14 Z0 days
Testing same since   184162  2023-12-18 02:29:46 Z0 days1 attempts


People who touched revisions under test:
  Linus Torvalds 
  Mark Rutland 
  Peter Zijlstra (Intel) 

jobs:
 build-amd64-xsm  pass
 build-arm64-xsm  pass
 build-i386-xsm   pass
 build-amd64  pass
 build-arm64

Re: [PATCH v4 1/4] x86/spec-ctrl: add logic to issue IBPB on exit to guest

On 18.12.2023 14:46, Jan Beulich wrote:
> On 18.12.2023 13:11, Roger Pau Monné wrote:
>> Hello,
>>
>> I'm not as expert as Andrew in all the speculation stuff, but I will
>> try to provide some feedback.
>>
>> On Tue, Feb 14, 2023 at 05:10:42PM +0100, Jan Beulich wrote:
>>> In order to be able to defer the context switch IBPB to the last
>>> possible point, add logic to the exit-to-guest paths to issue the
>>> barrier there, including the "IBPB doesn't flush the RSB/RAS"
>>> workaround. Since alternatives, for now at least, can't nest, emit JMP
>>> to skip past both constructs where both are needed. This may be more
>>> efficient anyway, as the sequence of NOPs is pretty long.
>>
>> Could you elaborate on the reason why deferring the IBPB to the exit
>> to guest path is helpful?  So far it just seem to make the logic more
>> complex without nay justification (at least in the changelog).
> 
> I've added "(to leave behind as little as possible)" ahead of the 1st
> comma - is that sufficient, do you think?

Actually, the next patch supplies better context, i.e. is more / also
about avoiding to clobber Xen's own predictions.

Jan

Re: [PATCH v4 1/4] x86/spec-ctrl: add logic to issue IBPB on exit to guest

On 18.12.2023 13:11, Roger Pau Monné wrote:
> Hello,
> 
> I'm not as expert as Andrew in all the speculation stuff, but I will
> try to provide some feedback.
> 
> On Tue, Feb 14, 2023 at 05:10:42PM +0100, Jan Beulich wrote:
>> In order to be able to defer the context switch IBPB to the last
>> possible point, add logic to the exit-to-guest paths to issue the
>> barrier there, including the "IBPB doesn't flush the RSB/RAS"
>> workaround. Since alternatives, for now at least, can't nest, emit JMP
>> to skip past both constructs where both are needed. This may be more
>> efficient anyway, as the sequence of NOPs is pretty long.
> 
> Could you elaborate on the reason why deferring the IBPB to the exit
> to guest path is helpful?  So far it just seem to make the logic more
> complex without nay justification (at least in the changelog).

I've added "(to leave behind as little as possible)" ahead of the 1st
comma - is that sufficient, do you think?

>> ---
>> I have to admit that I'm not really certain about the placement of the
>> IBPB wrt the MSR_SPEC_CTRL writes. For now I've simply used "opposite of
>> entry".
> 
> Maybe it would easier to just add the MSR_PRED_CMD PRED_CMD_IBPB write
> to the vmcs MSR load list?
> 
> It's a one-time only AFAICT, as you would only want to do this for
> context-switch AFAICT.

That would be a back and forth of adding and removing the MSR to/from
that list then, which I'm not convinced is helpful. With these special
MSRs I would further be uncertain as to their effect when used via one
of these lists.

Jan

Re: [PATCH v4 2/4] x86/spec-ctrl: defer context-switch IBPB until guest entry

On Tue, Feb 14, 2023 at 05:11:05PM +0100, Jan Beulich wrote:
> In order to avoid clobbering Xen's own predictions, defer the barrier as
> much as possible. Merely mark the CPU as needing a barrier issued the
> next time we're exiting to guest context.

While I understand that doing the flush in the middle of the guest
context might not be ideal, as it's my understanding we also
needlessly flush Xen predictions, I'm unsure whether this makes any
difference in practice, and IMO just makes the exit to guest paths
more complex.

Thanks, Roger.

Re: [PATCH v4 1/4] x86/spec-ctrl: add logic to issue IBPB on exit to guest

Hello,

I'm not as expert as Andrew in all the speculation stuff, but I will
try to provide some feedback.

On Tue, Feb 14, 2023 at 05:10:42PM +0100, Jan Beulich wrote:
> In order to be able to defer the context switch IBPB to the last
> possible point, add logic to the exit-to-guest paths to issue the
> barrier there, including the "IBPB doesn't flush the RSB/RAS"
> workaround. Since alternatives, for now at least, can't nest, emit JMP
> to skip past both constructs where both are needed. This may be more
> efficient anyway, as the sequence of NOPs is pretty long.

Could you elaborate on the reason why deferring the IBPB to the exit
to guest path is helpful?  So far it just seem to make the logic more
complex without nay justification (at least in the changelog).

> 
> As with all other conditional blocks on exit-to-guest paths, no
> Spectre-v1 protections are necessary as execution will imminently be
> hitting a serialising event.
> 
> Signed-off-by: Jan Beulich 
> ---
> I have to admit that I'm not really certain about the placement of the
> IBPB wrt the MSR_SPEC_CTRL writes. For now I've simply used "opposite of
> entry".

Maybe it would easier to just add the MSR_PRED_CMD PRED_CMD_IBPB write
to the vmcs MSR load list?

It's a one-time only AFAICT, as you would only want to do this for
context-switch AFAICT.

Thanks, Roger.

Re: [PATCH v2 32/39] xen/riscv: add minimal stuff to asm/page.h to build full Xen

On 18.12.2023 12:57, Oleksii wrote:
> On Mon, 2023-12-18 at 12:36 +0100, Jan Beulich wrote:
>> On 18.12.2023 11:45, Oleksii wrote:
>>> On Thu, 2023-12-14 at 16:57 +0100, Jan Beulich wrote:
 On 24.11.2023 11:30, Oleksii Kurochko wrote:
> Signed-off-by: Oleksii Kurochko 

 Acked-by: Jan Beulich 

 I wonder though ...

> --- a/xen/arch/riscv/include/asm/page.h
> +++ b/xen/arch/riscv/include/asm/page.h
> @@ -6,6 +6,7 @@
>  #ifndef __ASSEMBLY__
>  
>  #include 
> +#include 
>  #include 
>  
>  #include 
> @@ -32,6 +33,9 @@
>  #define PTE_LEAF_DEFAULT    (PTE_VALID | PTE_READABLE
> |
> PTE_WRITABLE)
>  #define PTE_TABLE   (PTE_VALID)
>  
> +/* TODO */
> +#define PAGE_HYPERVISOR 0

 ... whether this couldn't be defined properly right away.
>>> It can be introduced now but it requires some additional defines to
>>> be
>>> introduced in the same time:
>>>
>>> #define _PAGE_W_BIT 2
>>> #define _PAGE_XN_BIT    3
>>> #define _PAGE_RO_BIT    1
>>> #define _PAGE_XN    (1U << _PAGE_XN_BIT)
>>> #define _PAGE_RO    (1U << _PAGE_RO_BIT)
>>> #define _PAGE_W (1U << _PAGE_W_BIT)
>>
>> Why would you need these, when you already have
>> PTE_{READABLE,WRITABLE,EXECUTABLE} just out of context from above?
> I thought that the hypervisor page table is fully software-related, and
> that's why a separate PAGE*BIT was introduced. These bits can be
> different from PTE* bits, which are hardware-related
> https://gitlab.com/xen-project/xen/-/blob/staging/xen/arch/arm/include/asm/page.h?ref_type=heads#L66
> 
> It looks like I misunderstood that, and PTE* can be used everywhere
> instead of _PAGE*. Alternatively, we could consider renaming everything
> to PAGE* to maintain consistency across architectures.
> 
> Does it make sense?

Sure. Whether renaming makes sense is harder to tell though. It would
be only the name prefix that's uniform, as how exactly e.g. permissions
are controlled is arch-specific anyway. The one place I'm aware where
PAGE_* (or, sadly, still _PAGE_*) would matter for common code is
_PAGE_NONE (not sure though whether that's something that can / wants
to be expressed for RISC-V).

Jan

Community Manager Update

2023-12-18 Thread Kelly Choi

Hi all,

Merry Christmas!

Just a quick update from myself.

Whilst I can’t speak for everyone in the community, I know we have valuable
members who are committed to making the Xen Project successful. We are a
strong and passionate community made up of individuals who consistently
seek improvement in ways of working to ensure our open source project is
welcoming and friendly. Our project is not only advancing the industry in
many ways, but there are policies and governance practices that the
community has helped shape to make it a better environment for all.

As such, I strongly condemn activities or communications that diminish our
efforts on how great the community is. We may not be perfect, but
collectively we are all working towards a common goal on how to be better.

This is a reminder to everyone to keep up the great work you are already
doing, and for other individuals to refrain from making comments that could
be seen as harmful to the community. Our communications can easily be
misperceived and interpreted by others negatively, so if your messaging is
not beneficial, please do not send these.

Our next steps will be to look at creating a community working or technical
group of some sort, to help address concerns and drive direction within the
community. Given this will be something that affects everyone, I’d like to
ask for your patience as we scope out the specifics before asking for your
feedback. I want to make sure we do this properly to ensure the process is
something that benefits the community in the long run. As soon as I have
the details, I will be sharing them with you all.

I wish you all a great holiday and a Happy New Year!

Many thanks,
Kelly Choi

Community Manager
Xen Project

Re: [PATCH v2 32/39] xen/riscv: add minimal stuff to asm/page.h to build full Xen

On Mon, 2023-12-18 at 12:36 +0100, Jan Beulich wrote:
> On 18.12.2023 11:45, Oleksii wrote:
> > On Thu, 2023-12-14 at 16:57 +0100, Jan Beulich wrote:
> > > On 24.11.2023 11:30, Oleksii Kurochko wrote:
> > > > Signed-off-by: Oleksii Kurochko 
> > > 
> > > Acked-by: Jan Beulich 
> > > 
> > > I wonder though ...
> > > 
> > > > --- a/xen/arch/riscv/include/asm/page.h
> > > > +++ b/xen/arch/riscv/include/asm/page.h
> > > > @@ -6,6 +6,7 @@
> > > >  #ifndef __ASSEMBLY__
> > > >  
> > > >  #include 
> > > > +#include 
> > > >  #include 
> > > >  
> > > >  #include 
> > > > @@ -32,6 +33,9 @@
> > > >  #define PTE_LEAF_DEFAULT    (PTE_VALID | PTE_READABLE
> > > > |
> > > > PTE_WRITABLE)
> > > >  #define PTE_TABLE   (PTE_VALID)
> > > >  
> > > > +/* TODO */
> > > > +#define PAGE_HYPERVISOR 0
> > > 
> > > ... whether this couldn't be defined properly right away.
> > It can be introduced now but it requires some additional defines to
> > be
> > introduced in the same time:
> > 
> > #define _PAGE_W_BIT 2
> > #define _PAGE_XN_BIT    3
> > #define _PAGE_RO_BIT    1
> > #define _PAGE_XN    (1U << _PAGE_XN_BIT)
> > #define _PAGE_RO    (1U << _PAGE_RO_BIT)
> > #define _PAGE_W (1U << _PAGE_W_BIT)
> 
> Why would you need these, when you already have
> PTE_{READABLE,WRITABLE,EXECUTABLE} just out of context from above?
I thought that the hypervisor page table is fully software-related, and
that's why a separate PAGE*BIT was introduced. These bits can be
different from PTE* bits, which are hardware-related
https://gitlab.com/xen-project/xen/-/blob/staging/xen/arch/arm/include/asm/page.h?ref_type=heads#L66

It looks like I misunderstood that, and PTE* can be used everywhere
instead of _PAGE*. Alternatively, we could consider renaming everything
to PAGE* to maintain consistency across architectures.

Does it make sense?


> (And
> that's aside from me (a) not following the naming (vs their purpose)
> and
> (b) not seeing what _PAGE_*_BIT are needed for, not even thinking
> about
> the leading underscores in these identifiers again.)
> 
> > ...
> > /*
> >  * _PAGE_DEVICE and _PAGE_NORMAL are convenience defines. They are
> > not
> >  * meant to be used outside of this header.
> >  */
> > // #define _PAGE_DEVICE    _PAGE_XN
> > #define _PAGE_NORMAL    _PAGE_PRESENT
> > 
> > #define PAGE_HYPERVISOR_RW  (_PAGE_NORMAL | _PAGE_RO | _PAGE_XN
> > |
> > _PAGE_W)
> > 
> > #define PAGE_HYPERVISOR PAGE_HYPERVISOR_RW
> > 
> > And _PAGE_PRESENT in pgtbl-bits.h:
> > 
> > #define _PAGE_PRESENT   (1 << 0)
> > 
> > I prefer to introduce all this things when it will be really used.
> 
> I understand that, yet for easy things it may help doing them right
> away, rather than leaving them to be touched (in a straightforward
> way)
> again very soon.
> 
~ Oleksii

[XEN PATCH] docs/misra: add entries to exclude-list

2023-12-18 Thread Federico Serafini

Exclude efibind.h for all the architectures: it is used to build the
efi stub, which is a separate entry point for Xen when booted from EFI
firmware.
Remove redundant entries from out_of_scope.ecl.

Exclude common/coverage: it is code to support gcov, hence it is part
of the testing machinery.

Exclude decompress.h: file ported from Linux that defines a unique and
documented interface towards all the (adopted) decompress functions.

Signed-off-by: Federico Serafini 
---
 .../eclair_analysis/ECLAIR/out_of_scope.ecl  |  5 -
 docs/misra/exclude-list.json | 16 
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/automation/eclair_analysis/ECLAIR/out_of_scope.ecl 
b/automation/eclair_analysis/ECLAIR/out_of_scope.ecl
index fd870716cf..9bcec4c69d 100644
--- a/automation/eclair_analysis/ECLAIR/out_of_scope.ecl
+++ b/automation/eclair_analysis/ECLAIR/out_of_scope.ecl
@@ -17,11 +17,6 @@
 -file_tag+={out_of_scope,"^xen/arch/x86/include/asm/intel-family\\.h$"}
 -doc_end
 
--doc_begin="Files imported from the gnu-efi package"
--file_tag+={adopted,"^xen/include/efi/.*$"}
--file_tag+={adopted,"^xen/arch/x86/include/asm/x86_64/efibind\\.h$"}
--doc_end
-
 -doc_begin="Build tools are out of scope."
 -file_tag+={out_of_scope_tools,"^xen/tools/.*$"}
 -file_tag+={out_of_scope_tools,"^xen/arch/x86/efi/mkreloc\\.c$"}
diff --git a/docs/misra/exclude-list.json b/docs/misra/exclude-list.json
index 48f671c548..83fae0b4a4 100644
--- a/docs/misra/exclude-list.json
+++ b/docs/misra/exclude-list.json
@@ -1,6 +1,10 @@
 {
 "version": "1.0",
 "content": [
+{
+"rel_path": "arch/*/include/asm/*/efibind.h",
+"comment": "Imported from gnu-efi package, ignore for now"
+},
 {
 "rel_path": "arch/arm/arm64/cpufeature.c",
 "comment": "Imported from Linux, ignore for now"
@@ -97,6 +101,10 @@
 "rel_path": "arch/x86/efi/check.c",
 "comment": "The resulting code is not included in the final Xen 
binary, ignore for now"
 },
+{
+"rel_path": "common/coverage/*",
+"comment": "Files to support gcov, ignore for now"
+},
 {
 "rel_path": "common/bitmap.c",
 "comment": "Imported from Linux, ignore for now"
@@ -213,6 +221,14 @@
 "rel_path": "include/xen/acpi.h",
 "comment": "Imported from Linux, ignore for now"
 },
+{
+"rel_path": "include/efi/*",
+"comment": "Imported from gnu-efi package, ignore for now"
+},
+{
+"rel_path": "include/xen/decompress.h",
+"comment": "Imported from Linux, ignore for now"
+},
 {
 "rel_path": "lib/list-sort.c",
 "comment": "Imported from Linux, ignore for now"
-- 
2.34.1

Re: [PATCH v2 31/39] xen/riscv: add required things to asm/current.h

On Mon, 2023-12-18 at 12:28 +0100, Jan Beulich wrote:
> On 18.12.2023 11:39, Oleksii wrote:
> > On Thu, 2023-12-14 at 16:55 +0100, Jan Beulich wrote:
> > > On 24.11.2023 11:30, Oleksii Kurochko wrote:
> > > > --- a/xen/arch/riscv/include/asm/current.h
> > > > +++ b/xen/arch/riscv/include/asm/current.h
> > > > @@ -3,6 +3,22 @@
> > > >  #ifndef __ASM_CURRENT_H
> > > >  #define __ASM_CURRENT_H
> > > >  
> > > > +#include 
> > > > +#include 
> > > > +
> > > > +#ifndef __ASSEMBLY__
> > > > +
> > > > +struct vcpu;
> > > 
> > > I don't think you need this here?
> > Shouldn't forward declaration be for the case of curr_vcpu
> > declaration
> > in the next line ?
> 
> I don't think so. In C forward decls are needed only when an
> otherwise
> undeclared type is used as type of a function parameter. (C++ is
> different
> in this regard.)
Thanks for clarifying; in this case, it can be dropped.


~ Oleksii
> > > > +/* Which VCPU is "current" on this PCPU. */
> > > > +DECLARE_PER_CPU(struct vcpu *, curr_vcpu);
> > > > +
> > > > +#define current    (this_cpu(curr_vcpu))
>

Re: [PATCH v2 24/39] xen/riscv: introduce asm/irq.h

On Mon, 2023-12-18 at 11:12 +0100, Jan Beulich wrote:
> On 18.12.2023 11:04, Oleksii wrote:
> > On Thu, 2023-12-14 at 15:09 +0100, Jan Beulich wrote:
> > > On 24.11.2023 11:30, Oleksii Kurochko wrote:
> > > > Signed-off-by: Oleksii Kurochko 
> > > > ---
> > > > Changes in V2:
> > > > - add ifdef CONFIG_HAS_DEVICE_TREE for things that
> > > > shouldn't be
> > > >   in case !CONFIG_HAS_DEVICE_TREE
> > > 
> > > Is there going to be a RISC-V build without this enabled
> > > (selected)?
> > > If
> > > not, I'd recommend against such pointless #ifdef-ary.
> > For this stage (Xen RISC-V full build), CONFIG_HAS_DEVICE_TREE will
> > not
> > be selected, but it will be in the near future.
> 
> And from then on it'll always be selected, or only conditionally? In
> the
> former case it would still feel odd if #ifdef-s were introduced.
It will always be selected until ACPI support is provided. I've seen
patches that add ACPI support for the Linux kernel, but I'm not sure if
it is really needed at this point. So, I've planned to go with Device
Tree for quite a while

~ Oleksii

Re: [PATCH v2 33/39] xen/riscv: add minimal stuff to asm/processor.h to build full Xen

On 18.12.2023 11:49, Oleksii wrote:
> On Thu, 2023-12-14 at 17:04 +0100, Jan Beulich wrote:
>> On 24.11.2023 11:30, Oleksii Kurochko wrote:
>>> +static inline void cpu_relax(void)
>>> +{
>>> +   int dummy;
>>> +   /* In lieu of a halt instruction, induce a long-latency
>>> stall. */
>>> +   __asm__ __volatile__ ("div %0, %0, zero" : "=r" (dummy));
>>
>> Any reason for this, when Arm's is just barrier(), and apparently
>> they got
>> away with this quite fine? Also isn't this causing a division by
>> zero,
>> which I'd expect to cause some kind of exception? (Terminology-wise
>> I'm of
>> course biased by x86, where "halt instruction" wouldn't be suitable
>> to use
>> here. But if that terminology is fine on RISC-V, then obviously no
>> objection.)
> It was based on Linux kernel code:
> https://elixir.bootlin.com/linux/latest/source/arch/riscv/include/asm/vdso/processor.h#L9
> 
> But looks I missed barrier()...
> Probably it will be better update cpu_relax() to:
> 
>   /* Encoding of the pause instruction */
>   __asm__ __volatile__ (".4byte 0x10F");
> 
>   barrier();

But definitely without .4byte, which defines a piece of data. If for
whatever reason you don't want to use "pause" directly, please use
.insn.

Jan

Re: [PATCH v2 32/39] xen/riscv: add minimal stuff to asm/page.h to build full Xen