[PATCH v6 07/11] virtio-gpu: Handle resource blob commands
From: Antonio Caggiano Support BLOB resources creation, mapping and unmapping by calling the new stable virglrenderer 0.10 interface. Only enabled when available and via the blob config. E.g. -device virtio-vga-gl,blob=true Signed-off-by: Antonio Caggiano Signed-off-by: Dmitry Osipenko Signed-off-by: Xenia Ragiadakou Signed-off-by: Huang Rui --- Changes in v6: - Use new struct virgl_gpu_resource. - Unmap, unref and destroy the resource only after the memory region has been completely removed. - In unref check whether the resource is still mapped. - In unmap_blob check whether the resource has been already unmapped. - Fix coding style hw/display/virtio-gpu-virgl.c | 274 +- hw/display/virtio-gpu.c | 4 +- meson.build | 4 + 3 files changed, 276 insertions(+), 6 deletions(-) diff --git a/hw/display/virtio-gpu-virgl.c b/hw/display/virtio-gpu-virgl.c index faab374336..5a3a292f79 100644 --- a/hw/display/virtio-gpu-virgl.c +++ b/hw/display/virtio-gpu-virgl.c @@ -17,6 +17,7 @@ #include "trace.h" #include "hw/virtio/virtio.h" #include "hw/virtio/virtio-gpu.h" +#include "hw/virtio/virtio-gpu-bswap.h" #include "ui/egl-helpers.h" @@ -24,8 +25,62 @@ struct virgl_gpu_resource { struct virtio_gpu_simple_resource res; +uint32_t ref; +VirtIOGPU *g; + +#ifdef HAVE_VIRGL_RESOURCE_BLOB +/* only blob resource needs this region to be mapped as guest mmio */ +MemoryRegion *region; +#endif }; +static void vres_get_ref(struct virgl_gpu_resource *vres) +{ +uint32_t ref; + +ref = qatomic_fetch_inc(>ref); +g_assert(ref < INT_MAX); +} + +static void virgl_resource_destroy(struct virgl_gpu_resource *vres) +{ +struct virtio_gpu_simple_resource *res; +VirtIOGPU *g; + +if (!vres) { +return; +} + +g = vres->g; +res = >res; +QTAILQ_REMOVE(>reslist, res, next); +virtio_gpu_cleanup_mapping(g, res); +g_free(vres); +} + +static void virgl_resource_unref(struct virgl_gpu_resource *vres) +{ +struct virtio_gpu_simple_resource *res; + +if (!vres) { +return; +} + +res = >res; +virgl_renderer_resource_detach_iov(res->resource_id, NULL, NULL); +virgl_renderer_resource_unref(res->resource_id); +} + +static void vres_put_ref(struct virgl_gpu_resource *vres) +{ +g_assert(vres->ref > 0); + +if (qatomic_fetch_dec(>ref) == 1) { +virgl_resource_unref(vres); +virgl_resource_destroy(vres); +} +} + static struct virgl_gpu_resource * virgl_gpu_find_resource(VirtIOGPU *g, uint32_t resource_id) { @@ -59,6 +114,8 @@ static void virgl_cmd_create_resource_2d(VirtIOGPU *g, c2d.width, c2d.height); vres = g_new0(struct virgl_gpu_resource, 1); +vres_get_ref(vres); +vres->g = g; vres->res.width = c2d.width; vres->res.height = c2d.height; vres->res.format = c2d.format; @@ -91,6 +148,8 @@ static void virgl_cmd_create_resource_3d(VirtIOGPU *g, c3d.width, c3d.height, c3d.depth); vres = g_new0(struct virgl_gpu_resource, 1); +vres_get_ref(vres); +vres->g = g; vres->res.width = c3d.width; vres->res.height = c3d.height; vres->res.format = c3d.format; @@ -126,12 +185,21 @@ static void virgl_cmd_resource_unref(VirtIOGPU *g, return; } -virgl_renderer_resource_detach_iov(unref.resource_id, NULL, NULL); -virgl_renderer_resource_unref(unref.resource_id); +#ifdef HAVE_VIRGL_RESOURCE_BLOB +if (vres->region) { +VirtIOGPUBase *b = VIRTIO_GPU_BASE(g); +MemoryRegion *mr = vres->region; + +warn_report("%s: blob resource %d not unmapped", +__func__, unref.resource_id); +vres->region = NULL; +memory_region_set_enabled(mr, false); +memory_region_del_subregion(>hostmem, mr); +object_unparent(OBJECT(mr)); +} +#endif /* HAVE_VIRGL_RESOURCE_BLOB */ -QTAILQ_REMOVE(>reslist, >res, next); -virtio_gpu_cleanup_mapping(g, >res); -g_free(vres); +vres_put_ref(vres); } static void virgl_cmd_context_create(VirtIOGPU *g, @@ -470,6 +538,191 @@ static void virgl_cmd_get_capset(VirtIOGPU *g, g_free(resp); } +#ifdef HAVE_VIRGL_RESOURCE_BLOB + +static void virgl_resource_unmap(struct virgl_gpu_resource *vres) +{ +if (!vres) { +return; +} + +virgl_renderer_resource_unmap(vres->res.resource_id); + +vres_put_ref(vres); +} + +static void virgl_resource_blob_async_unmap(void *obj) +{ +MemoryRegion *mr = MEMORY_REGION(obj); +struct virgl_gpu_resource *vres = mr->opaque; + +virgl_resource_unmap(vres); + +g_free(obj); +} + +static void virgl_cmd_resource_create_blob(VirtIOGPU *g, + struct virtio_gpu_ctrl_command *cmd) +{ +struct virgl_gpu_resource *vres; +struct virtio_gpu_resource_create_blob cblob; +struct
[PATCH v6 08/11] virtio-gpu: Resource UUID
From: Antonio Caggiano Enable resource UUID feature and implement command resource assign UUID. This is done by introducing a hash table to map resource IDs to their UUIDs. Signed-off-by: Antonio Caggiano Signed-off-by: Huang Rui --- Changes in v6: - Set resource uuid as option. - Implement optional subsection of vmstate_virtio_gpu_resource_uuid_state or virtio live migration. - Use g_int_hash/g_int_equal instead of the default. - Move virtio_vgpu_simple_resource initialization in the earlier new patch "virtio-gpu: Introduce virgl_gpu_resource structure" hw/display/trace-events| 1 + hw/display/virtio-gpu-base.c | 4 ++ hw/display/virtio-gpu-virgl.c | 3 + hw/display/virtio-gpu.c| 119 + include/hw/virtio/virtio-gpu.h | 7 ++ 5 files changed, 134 insertions(+) diff --git a/hw/display/trace-events b/hw/display/trace-events index 2336a0ca15..54d6894c59 100644 --- a/hw/display/trace-events +++ b/hw/display/trace-events @@ -41,6 +41,7 @@ virtio_gpu_cmd_res_create_blob(uint32_t res, uint64_t size) "res 0x%x, size %" P virtio_gpu_cmd_res_unref(uint32_t res) "res 0x%x" virtio_gpu_cmd_res_back_attach(uint32_t res) "res 0x%x" virtio_gpu_cmd_res_back_detach(uint32_t res) "res 0x%x" +virtio_gpu_cmd_res_assign_uuid(uint32_t res) "res 0x%x" virtio_gpu_cmd_res_xfer_toh_2d(uint32_t res) "res 0x%x" virtio_gpu_cmd_res_xfer_toh_3d(uint32_t res) "res 0x%x" virtio_gpu_cmd_res_xfer_fromh_3d(uint32_t res) "res 0x%x" diff --git a/hw/display/virtio-gpu-base.c b/hw/display/virtio-gpu-base.c index 37af256219..6bcee3882f 100644 --- a/hw/display/virtio-gpu-base.c +++ b/hw/display/virtio-gpu-base.c @@ -236,6 +236,10 @@ virtio_gpu_base_get_features(VirtIODevice *vdev, uint64_t features, features |= (1 << VIRTIO_GPU_F_CONTEXT_INIT); } +if (virtio_gpu_resource_uuid_enabled(g->conf)) { +features |= (1 << VIRTIO_GPU_F_RESOURCE_UUID); +} + return features; } diff --git a/hw/display/virtio-gpu-virgl.c b/hw/display/virtio-gpu-virgl.c index 5a3a292f79..be9da6e780 100644 --- a/hw/display/virtio-gpu-virgl.c +++ b/hw/display/virtio-gpu-virgl.c @@ -777,6 +777,9 @@ void virtio_gpu_virgl_process_cmd(VirtIOGPU *g, /* TODO add security */ virgl_cmd_ctx_detach_resource(g, cmd); break; +case VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID: +virtio_gpu_resource_assign_uuid(g, cmd); +break; case VIRTIO_GPU_CMD_GET_CAPSET_INFO: virgl_cmd_get_capset_info(g, cmd); break; diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c index 8189c392dc..466debb256 100644 --- a/hw/display/virtio-gpu.c +++ b/hw/display/virtio-gpu.c @@ -958,6 +958,37 @@ virtio_gpu_resource_detach_backing(VirtIOGPU *g, virtio_gpu_cleanup_mapping(g, res); } +void virtio_gpu_resource_assign_uuid(VirtIOGPU *g, + struct virtio_gpu_ctrl_command *cmd) +{ +struct virtio_gpu_simple_resource *res; +struct virtio_gpu_resource_assign_uuid assign; +struct virtio_gpu_resp_resource_uuid resp; +QemuUUID *uuid; + +VIRTIO_GPU_FILL_CMD(assign); +virtio_gpu_bswap_32(, sizeof(assign)); +trace_virtio_gpu_cmd_res_assign_uuid(assign.resource_id); + +res = virtio_gpu_find_check_resource(g, assign.resource_id, false, __func__, >error); +if (!res) { +return; +} + +memset(, 0, sizeof(resp)); +resp.hdr.type = VIRTIO_GPU_RESP_OK_RESOURCE_UUID; + +uuid = g_hash_table_lookup(g->resource_uuids, _id); +if (!uuid) { +uuid = g_new(QemuUUID, 1); +qemu_uuid_generate(uuid); +g_hash_table_insert(g->resource_uuids, _id, uuid); +} + +memcpy(resp.uuid, uuid, sizeof(QemuUUID)); +virtio_gpu_ctrl_response(g, cmd, , sizeof(resp)); +} + void virtio_gpu_simple_process_cmd(VirtIOGPU *g, struct virtio_gpu_ctrl_command *cmd) { @@ -1006,6 +1037,9 @@ void virtio_gpu_simple_process_cmd(VirtIOGPU *g, case VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING: virtio_gpu_resource_detach_backing(g, cmd); break; +case VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID: +virtio_gpu_resource_assign_uuid(g, cmd); +break; default: cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; break; @@ -1400,6 +1434,57 @@ static int virtio_gpu_blob_load(QEMUFile *f, void *opaque, size_t size, return 0; } +static int virtio_gpu_resource_uuid_save(QEMUFile *f, void *opaque, size_t size, + const VMStateField *field, + JSONWriter *vmdesc) +{ +VirtIOGPU *g = opaque; +struct virtio_gpu_simple_resource *res; +QemuUUID *uuid; + +/* in 2d mode we should never find unprocessed commands here */ +assert(QTAILQ_EMPTY(>cmdq)); + +QTAILQ_FOREACH(res, >reslist, next) { +qemu_put_be32(f, res->resource_id); +uuid = g_hash_table_lookup(g->resource_uuids,
[PATCH v6 09/11] virtio-gpu: Support Venus capset
From: Antonio Caggiano Add support for the Venus capset, which enables Vulkan support through the Venus Vulkan driver for virtio-gpu. Signed-off-by: Antonio Caggiano Signed-off-by: Huang Rui --- No change in v6. hw/display/virtio-gpu-virgl.c | 21 + 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/hw/display/virtio-gpu-virgl.c b/hw/display/virtio-gpu-virgl.c index be9da6e780..f35a751824 100644 --- a/hw/display/virtio-gpu-virgl.c +++ b/hw/display/virtio-gpu-virgl.c @@ -506,6 +506,11 @@ static void virgl_cmd_get_capset_info(VirtIOGPU *g, virgl_renderer_get_cap_set(resp.capset_id, _max_version, _max_size); +} else if (info.capset_index == 2) { +resp.capset_id = VIRTIO_GPU_CAPSET_VENUS; +virgl_renderer_get_cap_set(resp.capset_id, + _max_version, + _max_size); } else { resp.capset_max_version = 0; resp.capset_max_size = 0; @@ -978,10 +983,18 @@ int virtio_gpu_virgl_init(VirtIOGPU *g) int virtio_gpu_virgl_get_num_capsets(VirtIOGPU *g) { -uint32_t capset2_max_ver, capset2_max_size; +uint32_t capset2_max_ver, capset2_max_size, num_capsets; +num_capsets = 1; + virgl_renderer_get_cap_set(VIRTIO_GPU_CAPSET_VIRGL2, - _max_ver, - _max_size); + _max_ver, + _max_size); +num_capsets += capset2_max_ver ? 1 : 0; + +virgl_renderer_get_cap_set(VIRTIO_GPU_CAPSET_VENUS, + _max_ver, + _max_size); +num_capsets += capset2_max_size ? 1 : 0; -return capset2_max_ver ? 2 : 1; +return num_capsets; } -- 2.25.1
[PATCH v6 05/11] virtio-gpu: Introduce virgl_gpu_resource structure
Introduce a new virgl_gpu_resource data structure and helper functions for virgl. It's used to add new member which is specific for virgl in following patches of blob memory support. Signed-off-by: Huang Rui --- New patch: - Introduce new struct virgl_gpu_resource to store virgl specific members. - Move resource initialization from path "virtio-gpu: Resource UUID" here. - Remove error handling of g_new0, because glib will abort() on OOM. - Set iov and iov_cnt in struct virtio_gpu_simple_resource for all types of resources. hw/display/virtio-gpu-virgl.c | 84 ++- 1 file changed, 64 insertions(+), 20 deletions(-) diff --git a/hw/display/virtio-gpu-virgl.c b/hw/display/virtio-gpu-virgl.c index 5bbc8071b2..faab374336 100644 --- a/hw/display/virtio-gpu-virgl.c +++ b/hw/display/virtio-gpu-virgl.c @@ -22,6 +22,23 @@ #include +struct virgl_gpu_resource { +struct virtio_gpu_simple_resource res; +}; + +static struct virgl_gpu_resource * +virgl_gpu_find_resource(VirtIOGPU *g, uint32_t resource_id) +{ +struct virtio_gpu_simple_resource *res; + +res = virtio_gpu_find_resource(g, resource_id); +if (!res) { +return NULL; +} + +return container_of(res, struct virgl_gpu_resource, res); +} + #if VIRGL_RENDERER_CALLBACKS_VERSION >= 4 static void * virgl_get_egl_display(G_GNUC_UNUSED void *cookie) @@ -35,11 +52,19 @@ static void virgl_cmd_create_resource_2d(VirtIOGPU *g, { struct virtio_gpu_resource_create_2d c2d; struct virgl_renderer_resource_create_args args; +struct virgl_gpu_resource *vres; VIRTIO_GPU_FILL_CMD(c2d); trace_virtio_gpu_cmd_res_create_2d(c2d.resource_id, c2d.format, c2d.width, c2d.height); +vres = g_new0(struct virgl_gpu_resource, 1); +vres->res.width = c2d.width; +vres->res.height = c2d.height; +vres->res.format = c2d.format; +vres->res.resource_id = c2d.resource_id; +QTAILQ_INSERT_HEAD(>reslist, >res, next); + args.handle = c2d.resource_id; args.target = 2; args.format = c2d.format; @@ -59,11 +84,19 @@ static void virgl_cmd_create_resource_3d(VirtIOGPU *g, { struct virtio_gpu_resource_create_3d c3d; struct virgl_renderer_resource_create_args args; +struct virgl_gpu_resource *vres; VIRTIO_GPU_FILL_CMD(c3d); trace_virtio_gpu_cmd_res_create_3d(c3d.resource_id, c3d.format, c3d.width, c3d.height, c3d.depth); +vres = g_new0(struct virgl_gpu_resource, 1); +vres->res.width = c3d.width; +vres->res.height = c3d.height; +vres->res.format = c3d.format; +vres->res.resource_id = c3d.resource_id; +QTAILQ_INSERT_HEAD(>reslist, >res, next); + args.handle = c3d.resource_id; args.target = c3d.target; args.format = c3d.format; @@ -82,19 +115,23 @@ static void virgl_cmd_resource_unref(VirtIOGPU *g, struct virtio_gpu_ctrl_command *cmd) { struct virtio_gpu_resource_unref unref; -struct iovec *res_iovs = NULL; -int num_iovs = 0; +struct virgl_gpu_resource *vres; VIRTIO_GPU_FILL_CMD(unref); trace_virtio_gpu_cmd_res_unref(unref.resource_id); -virgl_renderer_resource_detach_iov(unref.resource_id, - _iovs, - _iovs); -if (res_iovs != NULL && num_iovs != 0) { -virtio_gpu_cleanup_mapping_iov(g, res_iovs, num_iovs); +vres = virgl_gpu_find_resource(g, unref.resource_id); +if (!vres) { +cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; +return; } + +virgl_renderer_resource_detach_iov(unref.resource_id, NULL, NULL); virgl_renderer_resource_unref(unref.resource_id); + +QTAILQ_REMOVE(>reslist, >res, next); +virtio_gpu_cleanup_mapping(g, >res); +g_free(vres); } static void virgl_cmd_context_create(VirtIOGPU *g, @@ -310,44 +347,51 @@ static void virgl_resource_attach_backing(VirtIOGPU *g, struct virtio_gpu_ctrl_command *cmd) { struct virtio_gpu_resource_attach_backing att_rb; -struct iovec *res_iovs; -uint32_t res_niov; +struct virgl_gpu_resource *vres; int ret; VIRTIO_GPU_FILL_CMD(att_rb); trace_virtio_gpu_cmd_res_back_attach(att_rb.resource_id); +vres = virgl_gpu_find_resource(g, att_rb.resource_id); +if (!vres) { +cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; +return; +} + ret = virtio_gpu_create_mapping_iov(g, att_rb.nr_entries, sizeof(att_rb), -cmd, NULL, _iovs, _niov); +cmd, NULL, >res.iov, +>res.iov_cnt); if (ret != 0) { cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; return; } ret = virgl_renderer_resource_attach_iov(att_rb.resource_id, -
[PATCH v6 06/11] softmmu/memory: enable automatic deallocation of memory regions
From: Xenia Ragiadakou When the memory region has a different life-cycle from that of her parent, could be automatically released, once has been unparent and once all of her references have gone away, via the object's free callback. However, currently, the address space subsystem keeps references to the memory region without first incrementing its object's reference count. As a result, the automatic deallocation of the object, not taking into account those references, results in use-after-free memory corruption. More specifically, reference to the memory region is kept in flatview ranges. If the reference count of the memory region is not incremented, flatview_destroy(), that is asynchronous, may be called after memory region's destruction. If the reference count of the memory region is incremented, memory region's destruction will take place after flatview_destroy() has released its references. This patch increases the reference count of an owned memory region object on each memory_region_ref() and decreases it on each memory_region_unref(). Signed-off-by: Xenia Ragiadakou Signed-off-by: Huang Rui --- Changes in v6: - remove in-code comment because it is confusing and explain the issue, that the patch attempts to fix, with more details in commit message system/memory.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/system/memory.c b/system/memory.c index 304fa843ea..4d5e7e7a4c 100644 --- a/system/memory.c +++ b/system/memory.c @@ -1824,6 +1824,7 @@ void memory_region_ref(MemoryRegion *mr) * we do not ref/unref them because it slows down DMA sensibly. */ if (mr && mr->owner) { +object_ref(OBJECT(mr)); object_ref(mr->owner); } } @@ -1832,6 +1833,7 @@ void memory_region_unref(MemoryRegion *mr) { if (mr && mr->owner) { object_unref(mr->owner); +object_unref(OBJECT(mr)); } } -- 2.25.1
[PATCH v6 03/11] virtio-gpu: Support context init feature with virglrenderer
Patch "virtio-gpu: CONTEXT_INIT feature" has added the context_init feature flags. We would like to enable the feature with virglrenderer, so add to create virgl renderer context with flags using context_id when valid. Originally-by: Antonio Caggiano Signed-off-by: Huang Rui --- Changes in v6: - Handle the case while context_init is disabled. - Enable context_init by default. hw/display/virtio-gpu-virgl.c | 13 +++-- hw/display/virtio-gpu.c | 4 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/hw/display/virtio-gpu-virgl.c b/hw/display/virtio-gpu-virgl.c index 8bb7a2c21f..5bbc8071b2 100644 --- a/hw/display/virtio-gpu-virgl.c +++ b/hw/display/virtio-gpu-virgl.c @@ -106,8 +106,17 @@ static void virgl_cmd_context_create(VirtIOGPU *g, trace_virtio_gpu_cmd_ctx_create(cc.hdr.ctx_id, cc.debug_name); -virgl_renderer_context_create(cc.hdr.ctx_id, cc.nlen, - cc.debug_name); +#ifdef HAVE_VIRGL_CONTEXT_CREATE_WITH_FLAGS +if (cc.context_init && virtio_gpu_context_init_enabled(g->parent_obj.conf)) { +virgl_renderer_context_create_with_flags(cc.hdr.ctx_id, + cc.context_init, + cc.nlen, + cc.debug_name); +return; +} +#endif + +virgl_renderer_context_create(cc.hdr.ctx_id, cc.nlen, cc.debug_name); } static void virgl_cmd_context_destroy(VirtIOGPU *g, diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c index b016d3bac8..8b2f4c6be3 100644 --- a/hw/display/virtio-gpu.c +++ b/hw/display/virtio-gpu.c @@ -1619,6 +1619,10 @@ static Property virtio_gpu_properties[] = { DEFINE_PROP_BIT("blob", VirtIOGPU, parent_obj.conf.flags, VIRTIO_GPU_FLAG_BLOB_ENABLED, false), DEFINE_PROP_SIZE("hostmem", VirtIOGPU, parent_obj.conf.hostmem, 0), +#ifdef HAVE_VIRGL_CONTEXT_CREATE_WITH_FLAGS +DEFINE_PROP_BIT("context_init", VirtIOGPU, parent_obj.conf.flags, +VIRTIO_GPU_FLAG_CONTEXT_INIT_ENABLED, true), +#endif DEFINE_PROP_END_OF_LIST(), }; -- 2.25.1
[PATCH v6 04/11] virtio-gpu: Don't require udmabuf when blobs and virgl are enabled
From: Dmitry Osipenko The udmabuf usage is mandatory when virgl is disabled and blobs feature enabled in the Qemu machine configuration. If virgl and blobs are enabled, then udmabuf requirement is optional. Since udmabuf isn't widely supported by a popular Linux distros today, let's relax the udmabuf requirement for blobs=on,virgl=on. Now, a full-featured virtio-gpu acceleration is available to Qemu users without a need to have udmabuf available in the system. Reviewed-by: Antonio Caggiano Signed-off-by: Dmitry Osipenko Signed-off-by: Huang Rui --- No change in v6. hw/display/virtio-gpu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c index 8b2f4c6be3..4c3ec9d0ea 100644 --- a/hw/display/virtio-gpu.c +++ b/hw/display/virtio-gpu.c @@ -1443,6 +1443,7 @@ void virtio_gpu_device_realize(DeviceState *qdev, Error **errp) if (virtio_gpu_blob_enabled(g->parent_obj.conf)) { if (!virtio_gpu_rutabaga_enabled(g->parent_obj.conf) && +!virtio_gpu_virgl_enabled(g->parent_obj.conf) && !virtio_gpu_have_udmabuf()) { error_setg(errp, "need rutabaga or udmabuf for blob resources"); return; -- 2.25.1
[PATCH v6 02/11] virtio-gpu: Configure new feature flag context_create_with_flags for virglrenderer
Configure a new feature flag (context_create_with_flags) for virglrenderer. Originally-by: Antonio Caggiano Signed-off-by: Huang Rui --- Changes in v6: - Move macros configurations under virgl.found() and rename HAVE_VIRGL_CONTEXT_CREATE_WITH_FLAGS. meson.build | 4 1 file changed, 4 insertions(+) diff --git a/meson.build b/meson.build index ec01f8b138..ea52ef1b9c 100644 --- a/meson.build +++ b/meson.build @@ -1050,6 +1050,10 @@ if not get_option('virglrenderer').auto() or have_system or have_vhost_user_gpu cc.has_member('struct virgl_renderer_resource_info_ext', 'd3d_tex2d', prefix: '#include ', dependencies: virgl)) +config_host_data.set('HAVE_VIRGL_CONTEXT_CREATE_WITH_FLAGS', + cc.has_function('virgl_renderer_context_create_with_flags', + prefix: '#include ', + dependencies: virgl)) endif endif rutabaga = not_found -- 2.25.1
[PATCH v6 00/11] Support blob memory and venus on qemu
Hi all, Sorry to late for V6, I was occupied by other stuff last two months, and right now resume the submission. Antonio Caggiano made the venus with QEMU on KVM platform last September[1]. This series are inherited from his original work to support the features of context init, hostmem, resource uuid, and blob resources for venus. At March of this year, we sent out the V1 version[2] for the review. But those series are included both xen and virtio gpu. Right now, we would like to divide into two parts, one is to continue the Antonio's work to upstream virtio-gpu support for blob memory and venus, and another is to upstream xen specific patches. This series is focusing on virtio-gpu, so we are marking as V4 version here to continue Antonio's patches[1]. And we will send xen specific patches separately, because they are hypervisor specific. Besides of QEMU, these supports also included virglrenderer[3][4] and mesa[5][6] as well. Right now, virglrenderer and mesa parts are all accepted by upstream. In this qemu version, we try to address the concerns around not proper cleanup during blob resource unmap and unref. Appreciate it if you have any commments. [1] https://lore.kernel.org/qemu-devel/20220926142422.22325-1-antonio.caggi...@collabora.com/ [2] V1: https://lore.kernel.org/qemu-devel/20230312092244.451465-1-ray.hu...@amd.com [3] https://gitlab.freedesktop.org/virgl/virglrenderer/-/merge_requests/1068 [4] https://gitlab.freedesktop.org/virgl/virglrenderer/-/merge_requests/1180 [5] https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22108 [6] https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23680 Please note the first 4 patches 1 -> 4 are inlcuded in these series because the series depends on them and not because we want them to be reviewed since they are already in the process of review through the "rutabaga_gfx + gfxstream" series. - https://lore.kernel.org/qemu-devel/20230829003629.410-1-gurchetansi...@chromium.org/ V4: https://lore.kernel.org/qemu-devel/20230831093252.2461282-1-ray.hu...@amd.com V5: https://lore.kernel.org/qemu-devel/2023091530.24064-1-ray.hu...@amd.com Changes from V5 to V6 - Move macros configurations under virgl.found() and rename HAVE_VIRGL_CONTEXT_CREATE_WITH_FLAGS. - Handle the case while context_init is disabled. - Enable context_init by default. - Move virtio_gpu_virgl_resource_unmap() into virgl_cmd_resource_unmap_blob(). - Introduce new struct virgl_gpu_resource to store virgl specific members. - Remove erro handling of g_new0, because glib will abort() on OOM. - Set resource uuid as option. - Implement optional subsection of vmstate_virtio_gpu_resource_uuid_state for virtio live migration. - Use g_int_hash/g_int_equal instead of the default - Add scanout_blob function for virtio-gpu-virgl - Resolve the memory leak on virtio-gpu-virgl - Remove the unstable API flags check because virglrenderer is already 1.0 - Squash the render server flag support into "Initialize Venus" Changes from V4 (virtio gpu V4) to V5 - Inverted patch 5 and 6 because we should configure HAVE_VIRGL_CONTEXT_INIT firstly. - Validate owner of memory region to avoid slowing down DMA. - Use memory_region_init_ram_ptr() instead of memory_region_init_ram_device_ptr(). - Adjust sequence to allocate gpu resource before virglrender resource creation - Add virtio migration handling for uuid. - Send kernel patch to define VIRTIO_GPU_CAPSET_VENUS. https://lore.kernel.org/lkml/20230915105918.3763061-1-ray.hu...@amd.com/ - Add meson check to make sure unstable APIs defined from 0.9.0. Changes from V1 to V2 (virtio gpu V4) - Remove unused #include "hw/virtio/virtio-iommu.h" - Add a local function, called virgl_resource_destroy(), that is used to release a vgpu resource on error paths and in resource_unref. - Remove virtio_gpu_virgl_resource_unmap from virtio_gpu_cleanup_mapping(), since this function won't be called on blob resources and also because blob resources are unmapped via virgl_cmd_resource_unmap_blob(). - In virgl_cmd_resource_create_blob(), do proper cleanup in error paths and move QTAILQ_INSERT_HEAD(>reslist, res, next) after the resource has been fully initialized. - Memory region has a different life-cycle from virtio gpu resources i.e. cannot be released synchronously along with the vgpu resource. So, here the field "region" was changed to a pointer and is allocated dynamically when the blob is mapped. Also, since the pointer can be used to indicate whether the blob is mapped, the explicite field "mapped" was removed. - In virgl_cmd_resource_map_blob(), add check on the value of res->region, to prevent beeing called twice on the same resource. - Add a patch to enable automatic deallocation of memory regions to resolve use-after-free memory corruption with a reference. References Demo with Venus: - https://static.sched.com/hosted_files/xen2023/3f/xen_summit_2023_virtgpu_demo.mp4 QEMU repository: -
[PATCH v6 01/11] linux-headers: Update to kernel headers to add venus capset
Sync up kernel headers to update venus macro till they are merged into mainline. Signed-off-by: Huang Rui --- Changes in v6: - Venus capset is applied in kernel, so update it in qemu for future use. https://lore.kernel.org/lkml/b79dcf75-c9e8-490e-644f-3b97d95f7...@collabora.com/ https://cgit.freedesktop.org/drm-misc/commit/?id=216d86b9a430f3280e5b631c51e6fd1a7774cfa0 include/standard-headers/linux/virtio_gpu.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/standard-headers/linux/virtio_gpu.h b/include/standard-headers/linux/virtio_gpu.h index 2da48d3d4c..2db643ed8f 100644 --- a/include/standard-headers/linux/virtio_gpu.h +++ b/include/standard-headers/linux/virtio_gpu.h @@ -309,6 +309,8 @@ struct virtio_gpu_cmd_submit { #define VIRTIO_GPU_CAPSET_VIRGL 1 #define VIRTIO_GPU_CAPSET_VIRGL2 2 +/* 3 is reserved for gfxstream */ +#define VIRTIO_GPU_CAPSET_VENUS 4 /* VIRTIO_GPU_CMD_GET_CAPSET_INFO */ struct virtio_gpu_get_capset_info { -- 2.25.1
[linux-linus test] 184170: tolerable FAIL - PUSHED
flight 184170 linux-linus real [real] http://logs.test-lab.xenproject.org/osstest/logs/184170/ Failures :-/ but no regressions. Tests which did not succeed, but are not blocking: test-armhf-armhf-libvirt 16 saverestore-support-check fail blocked in 184162 test-amd64-amd64-xl-qemut-win7-amd64 19 guest-stopfail like 184162 test-amd64-amd64-xl-qemuu-win7-amd64 19 guest-stopfail like 184162 test-amd64-amd64-xl-qemuu-ws16-amd64 19 guest-stopfail like 184162 test-armhf-armhf-libvirt-qcow2 15 saverestore-support-check fail like 184162 test-amd64-amd64-xl-qemut-ws16-amd64 19 guest-stopfail like 184162 test-armhf-armhf-libvirt-raw 15 saverestore-support-checkfail like 184162 test-amd64-amd64-qemuu-nested-amd 20 debian-hvm-install/l1/l2 fail like 184162 test-amd64-amd64-libvirt 15 migrate-support-checkfail never pass test-amd64-amd64-libvirt-xsm 15 migrate-support-checkfail never pass test-arm64-arm64-xl-thunderx 15 migrate-support-checkfail never pass test-arm64-arm64-xl-thunderx 16 saverestore-support-checkfail never pass test-arm64-arm64-libvirt-xsm 15 migrate-support-checkfail never pass test-arm64-arm64-libvirt-xsm 16 saverestore-support-checkfail never pass test-arm64-arm64-xl 15 migrate-support-checkfail never pass test-arm64-arm64-xl 16 saverestore-support-checkfail never pass test-arm64-arm64-xl-credit2 15 migrate-support-checkfail never pass test-arm64-arm64-xl-credit2 16 saverestore-support-checkfail never pass test-arm64-arm64-xl-xsm 15 migrate-support-checkfail never pass test-arm64-arm64-xl-xsm 16 saverestore-support-checkfail never pass test-arm64-arm64-xl-credit1 15 migrate-support-checkfail never pass test-arm64-arm64-xl-credit1 16 saverestore-support-checkfail never pass test-armhf-armhf-xl-multivcpu 15 migrate-support-checkfail never pass test-armhf-armhf-xl-multivcpu 16 saverestore-support-checkfail never pass test-armhf-armhf-xl-rtds 15 migrate-support-checkfail never pass test-armhf-armhf-xl-rtds 16 saverestore-support-checkfail never pass test-armhf-armhf-xl-credit1 15 migrate-support-checkfail never pass test-armhf-armhf-xl-credit1 16 saverestore-support-checkfail never pass test-armhf-armhf-xl-arndale 15 migrate-support-checkfail never pass test-armhf-armhf-xl-arndale 16 saverestore-support-checkfail never pass test-amd64-amd64-libvirt-qcow2 14 migrate-support-checkfail never pass test-armhf-armhf-xl 15 migrate-support-checkfail never pass test-armhf-armhf-xl 16 saverestore-support-checkfail never pass test-amd64-amd64-libvirt-raw 14 migrate-support-checkfail never pass test-arm64-arm64-libvirt-raw 14 migrate-support-checkfail never pass test-arm64-arm64-libvirt-raw 15 saverestore-support-checkfail never pass test-arm64-arm64-xl-vhd 14 migrate-support-checkfail never pass test-arm64-arm64-xl-vhd 15 saverestore-support-checkfail never pass test-armhf-armhf-libvirt-qcow2 14 migrate-support-checkfail never pass test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check fail never pass test-armhf-armhf-xl-vhd 14 migrate-support-checkfail never pass test-armhf-armhf-xl-vhd 15 saverestore-support-checkfail never pass test-armhf-armhf-xl-credit2 15 migrate-support-checkfail never pass test-armhf-armhf-xl-credit2 16 saverestore-support-checkfail never pass test-armhf-armhf-libvirt 15 migrate-support-checkfail never pass test-armhf-armhf-libvirt-raw 14 migrate-support-checkfail never pass version targeted for testing: linux2cf4f94d8e8646803f8fb0facf134b0cd7fb691a baseline version: linuxceb6a6f023fd3e8b07761ed900352ef574010bcb Last test of basis 184162 2023-12-18 02:29:46 Z1 days Testing same since 184170 2023-12-18 19:42:38 Z0 days1 attempts People who touched revisions under test: Benjamin Bigler Linus Torvalds Louis Chauvet Manish Pandey Mark Brown Martin K. Petersen Miquel Raynal Nam Cao Nitin Rawat Ronald Wahl jobs: build-amd64-xsm pass build-arm64-xsm pass build-i386-xsm pass build-amd64 pass build-arm64 pass build-armhf pass build-i386 pass build-amd64-libvirt
[xen-unstable test] 184169: tolerable FAIL - PUSHED
flight 184169 xen-unstable real [real] flight 184171 xen-unstable real-retest [real] http://logs.test-lab.xenproject.org/osstest/logs/184169/ http://logs.test-lab.xenproject.org/osstest/logs/184171/ Failures :-/ but no regressions. Tests which are failing intermittently (not blocking): test-armhf-armhf-xl-credit1 19 guest-start.2 fail pass in 184171-retest test-armhf-armhf-libvirt-qcow2 13 guest-start fail pass in 184171-retest Tests which did not succeed, but are not blocking: test-armhf-armhf-libvirt-qcow2 15 saverestore-support-check fail in 184171 like 184161 test-armhf-armhf-libvirt-qcow2 14 migrate-support-check fail in 184171 never pass test-armhf-armhf-libvirt 16 saverestore-support-checkfail like 184161 test-amd64-amd64-xl-qemut-win7-amd64 19 guest-stopfail like 184161 test-amd64-i386-xl-qemuu-win7-amd64 19 guest-stop fail like 184161 test-amd64-amd64-xl-qemuu-ws16-amd64 19 guest-stopfail like 184161 test-amd64-i386-xl-qemut-ws16-amd64 19 guest-stop fail like 184161 test-armhf-armhf-libvirt-raw 15 saverestore-support-checkfail like 184161 test-amd64-i386-xl-qemut-win7-amd64 19 guest-stop fail like 184161 test-amd64-amd64-xl-qemut-ws16-amd64 19 guest-stopfail like 184161 test-amd64-amd64-qemuu-nested-amd 20 debian-hvm-install/l1/l2 fail like 184161 test-amd64-i386-xl-qemuu-ws16-amd64 19 guest-stop fail like 184161 test-amd64-amd64-xl-qemuu-win7-amd64 19 guest-stopfail like 184161 test-amd64-i386-libvirt-xsm 15 migrate-support-checkfail never pass test-amd64-amd64-libvirt-xsm 15 migrate-support-checkfail never pass test-amd64-i386-libvirt 15 migrate-support-checkfail never pass test-amd64-i386-xl-pvshim14 guest-start fail never pass test-amd64-amd64-libvirt 15 migrate-support-checkfail never pass test-arm64-arm64-xl 15 migrate-support-checkfail never pass test-arm64-arm64-xl 16 saverestore-support-checkfail never pass test-arm64-arm64-xl-thunderx 15 migrate-support-checkfail never pass test-arm64-arm64-xl-thunderx 16 saverestore-support-checkfail never pass test-arm64-arm64-xl-credit2 15 migrate-support-checkfail never pass test-arm64-arm64-xl-xsm 15 migrate-support-checkfail never pass test-arm64-arm64-xl-credit2 16 saverestore-support-checkfail never pass test-arm64-arm64-xl-xsm 16 saverestore-support-checkfail never pass test-arm64-arm64-libvirt-xsm 15 migrate-support-checkfail never pass test-arm64-arm64-libvirt-xsm 16 saverestore-support-checkfail never pass test-arm64-arm64-xl-credit1 15 migrate-support-checkfail never pass test-arm64-arm64-xl-credit1 16 saverestore-support-checkfail never pass test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check fail never pass test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check fail never pass test-armhf-armhf-xl-credit2 15 migrate-support-checkfail never pass test-armhf-armhf-xl-credit2 16 saverestore-support-checkfail never pass test-armhf-armhf-libvirt 15 migrate-support-checkfail never pass test-armhf-armhf-xl-multivcpu 15 migrate-support-checkfail never pass test-armhf-armhf-xl-multivcpu 16 saverestore-support-checkfail never pass test-armhf-armhf-xl-rtds 15 migrate-support-checkfail never pass test-armhf-armhf-xl-rtds 16 saverestore-support-checkfail never pass test-armhf-armhf-xl-credit1 15 migrate-support-checkfail never pass test-armhf-armhf-xl-credit1 16 saverestore-support-checkfail never pass test-armhf-armhf-xl-arndale 15 migrate-support-checkfail never pass test-armhf-armhf-xl-arndale 16 saverestore-support-checkfail never pass test-amd64-i386-libvirt-raw 14 migrate-support-checkfail never pass test-arm64-arm64-libvirt-raw 14 migrate-support-checkfail never pass test-arm64-arm64-libvirt-raw 15 saverestore-support-checkfail never pass test-amd64-amd64-libvirt-vhd 14 migrate-support-checkfail never pass test-arm64-arm64-xl-vhd 14 migrate-support-checkfail never pass test-arm64-arm64-xl-vhd 15 saverestore-support-checkfail never pass test-armhf-armhf-libvirt-raw 14 migrate-support-checkfail never pass test-armhf-armhf-xl 15 migrate-support-checkfail never pass test-armhf-armhf-xl 16 saverestore-support-checkfail never pass test-armhf-armhf-xl-vhd 14 migrate-support-checkfail never pass test-armhf-armhf-xl-vhd 15 saverestore-support-checkfail never pass version targeted for testing: xen 0cc74376d6823e0883f89556be2a267f2240a558 baseline version: xen
Re: [RFC PATCH v1 1/1] xen/Makefile: introduce ARCH_FIXED_CONFIG for randconfig
On Mon, 18 Dec 2023, Jan Beulich wrote: > On 18.12.2023 17:07, Andrew Cooper wrote: > > On 11/12/2023 3:56 pm, Jan Beulich wrote: > >> On 07.12.2023 21:17, Andrew Cooper wrote: > >>> On 07/12/2023 5:03 pm, Oleksii Kurochko wrote: > ARCH_FIXED_CONFIG is required in the case of randconfig > and CI for configs that aren't ready or are not > supposed to be implemented for specific architecture. > These configs should always be disabled to prevent randconfig > related tests from failing. > > Signed-off-by: Oleksii Kurochko > --- > xen/Makefile | 5 - > 1 file changed, 4 insertions(+), 1 deletion(-) > > diff --git a/xen/Makefile b/xen/Makefile > index ca571103c8..8ae8fe1480 100644 > --- a/xen/Makefile > +++ b/xen/Makefile > @@ -336,11 +336,14 @@ ifeq ($(config-build),y) > # *config targets only - make sure prerequisites are updated, and > descend > # in tools/kconfig to make the *config target > > +ARCH_FORCED_CONFIG := > $(srctree)/arch/$(SRCARCH)/configs/randomforced.config > + > # Create a file for KCONFIG_ALLCONFIG which depends on the environment. > # This will be use by kconfig targets > allyesconfig/allmodconfig/allnoconfig/randconfig > filechk_kconfig_allconfig = \ > $(if $(findstring n,$(XEN_HAS_CHECKPOLICY)), echo > 'CONFIG_XSM_FLASK_POLICY=n';) \ > -$(if $(KCONFIG_ALLCONFIG), cat $(KCONFIG_ALLCONFIG);) \ > +$(if $(KCONFIG_ALLCONFIG), cat $(KCONFIG_ALLCONFIG); \ > +$(if $(wildcard $(ARCH_FORCED_CONFIG)), cat $(ARCH_FORCED_CONFIG);) > ) \ > : > > .allconfig.tmp: FORCE > >>> We already have infrastructure for this. What's wrong with > >>> EXTRA_FIXED_RANDCONFIG? > >> What I don't understand here is why dealing with the issue would want > >> limiting to gitlab-CI. Anyone could run randconfig on their own, and > >> imo it would be helpful if the same issue(s) could be prevented there, > >> too. Hence my earlier suggestion to have a snippet which can be used > >> by "interested" parties. And once dealt with in e.g. the makefile > >> there should not be a need for any overrides in the CI config anymore. > > > > This is trying to find a solution to a problem which doesn't exist. > > > > RISC-V and PPC are experimental in Xen. Noone else is going to come and > > randconfig them until they're rather more production ready, and a > > prerequisite of that is removing this list of exclusions. > > > > Until you can actually find an interested party to comment, I think this > > is just churn for no useful improvement. If nothing else, calling it > > randomforced.config isn't appropriate given the explanation of what this > > target is used for... > > "random" in the name can't possibly be right anyway. Such collection of > fixed settings would also be relevant to e.g. all{yes,no}config. Yet > that's still not the same as any kind of "default" config, which the > two architectures presently kind of abuse for the purpose of defining > required-fixed settings. One thing for sure, I don't think it would be a good idea to add extra temporary Kconfig changes like these: [1] https://lore.kernel.org/xen-devel/cdc20255540a66ba0b6946ac6d48c11029cd3385.1701453087.git.oleksii.kuroc...@gmail.com/ [2] https://lore.kernel.org/xen-devel/d42a34866edc70a12736b5c6976aa1b44b4ebd8a.1701453087.git.oleksii.kuroc...@gmail.com/ I agree with Andrew that RISC-V and PPC are experimental so whatever works to enable them to make progress on this issue with a small effort is sufficient. I would be happy with a quick respin of this series following the gitlab-ci approach. This is good enough. And I think that having some sort of fixed seed (seed.config?) for randconfig would also be fine and potentially more reusable. But I think Oleksii should go forward with whatever approach he prefers and he is more comfortable with, as long as it is not [1] and [2].
Re: [XEN PATCH v2] xen: move declaration of first_valid_mfn to xen/numa.h
On Mon, 18 Dec 2023, Nicola Vetrini wrote: > Such declaration is moved in order to provide it for Arm and PPC, > whilst not violating MISRA C:2012 Rule 8.4 in common/page_alloc.c: > "A compatible declaration shall be visible when an object or > function with external linkage is defined". > > Signed-off-by: Julien Grall > Signed-off-by: Nicola Vetrini Reviewed-by: Stefano Stabellini > --- > Changes in v2: > This patch is a rework of a previous one appeared in this series [1], of which > patches 1 and 2 have been committed already. > > The updated patch was provided by Julien in this thread [2]. I added the > commit > message and the rest of the information. > > [1] > https://lore.kernel.org/xen-devel/cover.1702285639.git.nicola.vetr...@bugseng.com/T/#mee6def855787d932fe2f10d5648d437dcb6f046c > [2] > https://lore.kernel.org/xen-devel/cover.1702285639.git.nicola.vetr...@bugseng.com/T/#m3c5b141b806530b5920bb5e9dd53631195560317 > --- > xen/arch/arm/include/asm/numa.h | 6 -- > xen/arch/ppc/include/asm/numa.h | 6 -- > xen/common/page_alloc.c | 6 -- > xen/include/xen/numa.h | 2 ++ > 4 files changed, 6 insertions(+), 14 deletions(-) > > diff --git a/xen/arch/arm/include/asm/numa.h b/xen/arch/arm/include/asm/numa.h > index e2bee2bd8223..a2c1da4a82f7 100644 > --- a/xen/arch/arm/include/asm/numa.h > +++ b/xen/arch/arm/include/asm/numa.h > @@ -11,12 +11,6 @@ typedef u8 nodeid_t; > #define cpu_to_node(cpu) 0 > #define node_to_cpumask(node) (cpu_online_map) > > -/* > - * TODO: make first_valid_mfn static when NUMA is supported on Arm, this > - * is required because the dummy helpers are using it. > - */ > -extern mfn_t first_valid_mfn; > - > /* XXX: implement NUMA support */ > #define node_spanned_pages(nid) (max_page - mfn_x(first_valid_mfn)) > #define node_start_pfn(nid) (mfn_x(first_valid_mfn)) > diff --git a/xen/arch/ppc/include/asm/numa.h b/xen/arch/ppc/include/asm/numa.h > index 7fdf66c3da74..204180ad5b98 100644 > --- a/xen/arch/ppc/include/asm/numa.h > +++ b/xen/arch/ppc/include/asm/numa.h > @@ -10,12 +10,6 @@ typedef uint8_t nodeid_t; > #define cpu_to_node(cpu) 0 > #define node_to_cpumask(node) (cpu_online_map) > > -/* > - * TODO: make first_valid_mfn static when NUMA is supported on PPC, this > - * is required because the dummy helpers are using it. > - */ > -extern mfn_t first_valid_mfn; > - > /* XXX: implement NUMA support */ > #define node_spanned_pages(nid) (max_page - mfn_x(first_valid_mfn)) > #define node_start_pfn(nid) (mfn_x(first_valid_mfn)) > diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c > index 9b5df74fddab..d874525916ea 100644 > --- a/xen/common/page_alloc.c > +++ b/xen/common/page_alloc.c > @@ -255,8 +255,10 @@ static PAGE_LIST_HEAD(page_broken_list); > */ > > /* > - * first_valid_mfn is exported because it is use in ARM specific NUMA > - * helpers. See comment in arch/arm/include/asm/numa.h. > + * first_valid_mfn is exported because it is used when !CONFIG_NUMA. > + * > + * TODO: Consider if we can conditionally export first_valid_mfn based > + * on whether NUMA is selected. > */ > mfn_t first_valid_mfn = INVALID_MFN_INITIALIZER; > > diff --git a/xen/include/xen/numa.h b/xen/include/xen/numa.h > index 287e81ff..a10d4b1778a0 100644 > --- a/xen/include/xen/numa.h > +++ b/xen/include/xen/numa.h > @@ -108,6 +108,8 @@ extern void numa_set_processor_nodes_parsed(nodeid_t > node); > > #else > > +extern mfn_t first_valid_mfn; > + > static inline nodeid_t mfn_to_nid(mfn_t mfn) > { > return 0; > -- > 2.34.1 >
Re: [XEN PATCH v2 7/7] automation/eclair_analysis: avoid violation of MISRA Rule 2.1
On Mon, 18 Dec 2023, Stefano Stabellini wrote: > On Mon, 18 Dec 2023, Nicola Vetrini wrote: > > The presence of an unlinked object file triggers a violation > > of MISRA C Rule 2.1, which is deviated, as it's not part of > > the final Xen binary. > > > > No functional change. > > > > Signed-off-by: Nicola Vetrini > > Acked-by: Stefano Stabellini > > > > --- > > automation/eclair_analysis/ECLAIR/deviations.ecl | 5 + > > 1 file changed, 5 insertions(+) > > > > diff --git a/automation/eclair_analysis/ECLAIR/deviations.ecl > > b/automation/eclair_analysis/ECLAIR/deviations.ecl > > index 85741a2c01a9..e3de0fb2adf8 100644 > > --- a/automation/eclair_analysis/ECLAIR/deviations.ecl > > +++ b/automation/eclair_analysis/ECLAIR/deviations.ecl > > @@ -28,6 +28,11 @@ not executable, and therefore it is safe for them to be > > unreachable." > > -config=MC3R1.R2.1,ignored_stmts+={"any()", "pure_decl()"} > > -doc_end > > > > ++-doc_begin="The following autogenerated file is not linked deliberately." > > ++-file_tag+={C_runtime_failures,"^automation/eclair_analysis/C-runtime-failures\\.rst\\.c$"} > > ++-config=MC3R1.R2.1,reports+={deliberate, > > "any_area(any_loc(file(C_runtime_failures)))"} > > ++-doc_end Would it make sense to add it to exclude-list instead?
Re: [XEN PATCH] docs/misra: add entries to exclude-list
On Mon, 18 Dec 2023, Federico Serafini wrote: > Exclude efibind.h for all the architectures: it is used to build the > efi stub, which is a separate entry point for Xen when booted from EFI > firmware. > Remove redundant entries from out_of_scope.ecl. > > Exclude common/coverage: it is code to support gcov, hence it is part > of the testing machinery. > > Exclude decompress.h: file ported from Linux that defines a unique and > documented interface towards all the (adopted) decompress functions. > > Signed-off-by: Federico Serafini Acked-by: Stefano Stabellini > --- > .../eclair_analysis/ECLAIR/out_of_scope.ecl | 5 - > docs/misra/exclude-list.json | 16 > 2 files changed, 16 insertions(+), 5 deletions(-) > > diff --git a/automation/eclair_analysis/ECLAIR/out_of_scope.ecl > b/automation/eclair_analysis/ECLAIR/out_of_scope.ecl > index fd870716cf..9bcec4c69d 100644 > --- a/automation/eclair_analysis/ECLAIR/out_of_scope.ecl > +++ b/automation/eclair_analysis/ECLAIR/out_of_scope.ecl > @@ -17,11 +17,6 @@ > -file_tag+={out_of_scope,"^xen/arch/x86/include/asm/intel-family\\.h$"} > -doc_end > > --doc_begin="Files imported from the gnu-efi package" > --file_tag+={adopted,"^xen/include/efi/.*$"} > --file_tag+={adopted,"^xen/arch/x86/include/asm/x86_64/efibind\\.h$"} > --doc_end > - > -doc_begin="Build tools are out of scope." > -file_tag+={out_of_scope_tools,"^xen/tools/.*$"} > -file_tag+={out_of_scope_tools,"^xen/arch/x86/efi/mkreloc\\.c$"} > diff --git a/docs/misra/exclude-list.json b/docs/misra/exclude-list.json > index 48f671c548..83fae0b4a4 100644 > --- a/docs/misra/exclude-list.json > +++ b/docs/misra/exclude-list.json > @@ -1,6 +1,10 @@ > { > "version": "1.0", > "content": [ > +{ > +"rel_path": "arch/*/include/asm/*/efibind.h", > +"comment": "Imported from gnu-efi package, ignore for now" > +}, > { > "rel_path": "arch/arm/arm64/cpufeature.c", > "comment": "Imported from Linux, ignore for now" > @@ -97,6 +101,10 @@ > "rel_path": "arch/x86/efi/check.c", > "comment": "The resulting code is not included in the final Xen > binary, ignore for now" > }, > +{ > +"rel_path": "common/coverage/*", > +"comment": "Files to support gcov, ignore for now" > +}, > { > "rel_path": "common/bitmap.c", > "comment": "Imported from Linux, ignore for now" > @@ -213,6 +221,14 @@ > "rel_path": "include/xen/acpi.h", > "comment": "Imported from Linux, ignore for now" > }, > +{ > +"rel_path": "include/efi/*", > +"comment": "Imported from gnu-efi package, ignore for now" > +}, > +{ > +"rel_path": "include/xen/decompress.h", > +"comment": "Imported from Linux, ignore for now" > +}, > { > "rel_path": "lib/list-sort.c", > "comment": "Imported from Linux, ignore for now" > -- > 2.34.1 >
Re: [XEN PATCH v2 7/7] automation/eclair_analysis: avoid violation of MISRA Rule 2.1
On Mon, 18 Dec 2023, Nicola Vetrini wrote: > The presence of an unlinked object file triggers a violation > of MISRA C Rule 2.1, which is deviated, as it's not part of > the final Xen binary. > > No functional change. > > Signed-off-by: Nicola Vetrini Acked-by: Stefano Stabellini > --- > automation/eclair_analysis/ECLAIR/deviations.ecl | 5 + > 1 file changed, 5 insertions(+) > > diff --git a/automation/eclair_analysis/ECLAIR/deviations.ecl > b/automation/eclair_analysis/ECLAIR/deviations.ecl > index 85741a2c01a9..e3de0fb2adf8 100644 > --- a/automation/eclair_analysis/ECLAIR/deviations.ecl > +++ b/automation/eclair_analysis/ECLAIR/deviations.ecl > @@ -28,6 +28,11 @@ not executable, and therefore it is safe for them to be > unreachable." > -config=MC3R1.R2.1,ignored_stmts+={"any()", "pure_decl()"} > -doc_end > > ++-doc_begin="The following autogenerated file is not linked deliberately." > ++-file_tag+={C_runtime_failures,"^automation/eclair_analysis/C-runtime-failures\\.rst\\.c$"} > ++-config=MC3R1.R2.1,reports+={deliberate, > "any_area(any_loc(file(C_runtime_failures)))"} > ++-doc_end > + > -doc_begin="Proving compliance with respect to Rule 2.2 is generally > impossible: > see https://arxiv.org/abs/2212.13933 for details. Moreover, peer review > gives us > confidence that no evidence of errors in the program's logic has been missed > due > -- > 2.34.1 >
Re: [XEN PATCH v2 6/7] xen/arm: vcpreg: address violation of MISRA C Rule 2.1
On Mon, 18 Dec 2023, Nicola Vetrini wrote: > There is no path that reaches the call to 'advance_pc', thus violating MISRA C > Rule 2.1. > A call to ASSERT_UNREACHABLE() is added after the switch, despite this being > useful to detect errors only in debug builds; if that marker is ever reached, > a domain crash is triggered, as a defensive coding measure. > > No functional change. > > Signed-off-by: Julien Grall > Signed-off-by: Nicola Vetrini Reviewed-by: Stefano Stabellini > --- > The code changes (including the comment) were made by Julien in [1]; I added > the > commit text and all other informations. > > All the switch clauses, when expanded, end with a return statement > and the default clause has an unconditional return, therefore > advance_pc() is never reached. > > However, it has been deemed safer to crash the domain if the switch is ever > exited. > > [1] > https://lore.kernel.org/xen-devel/alpine.DEB.2.22.394.2312151232580.3175268@ubuntu-linux-20-04-desktop/T/#maa91d8025532455a6317119a1e4affa00a99e1ce > --- > xen/arch/arm/vcpreg.c | 10 -- > 1 file changed, 8 insertions(+), 2 deletions(-) > > diff --git a/xen/arch/arm/vcpreg.c b/xen/arch/arm/vcpreg.c > index 39aeda9dab62..a2d050070473 100644 > --- a/xen/arch/arm/vcpreg.c > +++ b/xen/arch/arm/vcpreg.c > @@ -707,8 +707,14 @@ void do_cp10(struct cpu_user_regs *regs, const union hsr > hsr) > inject_undef_exception(regs, hsr); > return; > } > - > -advance_pc(regs, hsr); > + > +/* > + * All the cases in the switch should return. If this is not the > + * case, then something went wrong and it is best to crash the > + * domain. > + */ > +ASSERT_UNREACHABLE(); > +domain_crash(current->domain); > } > > void do_cp(struct cpu_user_regs *regs, const union hsr hsr) > -- > 2.34.1 >
Re: [XEN PATCH v2 5/7] x86/platform: removed break to address MISRA C:2012 Rule 2.1
On Mon, 18 Dec 2023, Nicola Vetrini wrote: > The break statement is redundant, hence it can be removed. > > Signed-off-by: Nicola Vetrini Reviewed-by: Stefano Stabellini
Re: [XEN PATCH v2 4/7] xen/arm: traps: add ASSERT_UNREACHABLE() where needed
On Mon, 18 Dec 2023, Nicola Vetrini wrote: > The statements after a call to the noreturn function 'do_unexpected_trap' > can't be reached, thus violating MISRA C:2012 Rule 2.1 > ("A project shall not contain unreachable code."). > ASSERT_UNREACHABLE() is used to signal that the unreachable break-s are used > as > a defensive coding measure to prevent inadvertent fallthrough. > > Signed-off-by: Nicola Vetrini Reviewed-by: Stefano Stabellini > --- > Changes in v2: > - Use ASSERT_UNREACHABLE() to prevent mistakes. > --- > xen/arch/arm/traps.c | 3 ++- > 1 file changed, 2 insertions(+), 1 deletion(-) > > diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c > index 3784e8276ef6..77220ba0927a 100644 > --- a/xen/arch/arm/traps.c > +++ b/xen/arch/arm/traps.c > @@ -2152,6 +2152,7 @@ void do_trap_hyp_sync(struct cpu_user_regs *regs) > case HSR_EC_SVE: > /* An SVE exception is a bug somewhere in hypervisor code */ > do_unexpected_trap("SVE trap at EL2", regs); > +ASSERT_UNREACHABLE(); > break; > #endif > case HSR_EC_DATA_ABORT_CURR_EL: > @@ -2171,7 +2172,7 @@ void do_trap_hyp_sync(struct cpu_user_regs *regs) > dump_hyp_walk(get_hfar(is_data)); > > do_unexpected_trap(fault, regs); > - > +ASSERT_UNREACHABLE(); > break; > } > default: > -- > 2.34.1 >
Re: [XEN PATCH v2 3/7] xen/arm: address MISRA C:2012 Rule 2.1
On Mon, 18 Dec 2023, Nicola Vetrini wrote: > There are no paths that can reach the last return statement > of function 'vgic_v3_its_mmio_write' in 'vcig-v3-its.c' and > 'arch_memory_op' in 'arch/arm/mm.c', thus violating > MISRA C:2012 Rule 2.1: > "A project shall not contain unreachable code". > > Therefore, an ASSERT_UNREACHABLE() is inserted to remove the unreachable > return statement and protect against possible mistakes. > > Signed-off-by: Nicola Vetrini Reviewed-by: Stefano Stabellini > --- > Changes in v2: > - Changed resolution strategy to have an ASSERT_UNREACHABLE() before > the return. > --- > xen/arch/arm/mm.c | 1 + > xen/arch/arm/vgic-v3-its.c | 1 + > 2 files changed, 2 insertions(+) > > diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c > index eeb65ca6bb79..b15a18a49412 100644 > --- a/xen/arch/arm/mm.c > +++ b/xen/arch/arm/mm.c > @@ -283,6 +283,7 @@ long arch_memory_op(int op, XEN_GUEST_HANDLE_PARAM(void) > arg) > return -ENOSYS; > } > > +ASSERT_UNREACHABLE(); > return 0; > } > > diff --git a/xen/arch/arm/vgic-v3-its.c b/xen/arch/arm/vgic-v3-its.c > index 05429030b539..70b5aeb82219 100644 > --- a/xen/arch/arm/vgic-v3-its.c > +++ b/xen/arch/arm/vgic-v3-its.c > @@ -1409,6 +1409,7 @@ static int vgic_v3_its_mmio_write(struct vcpu *v, > mmio_info_t *info, > return 0; > } > > +ASSERT_UNREACHABLE(); > return 1; > > write_ignore_64: > -- > 2.34.1 >
Re: [XEN PATCH v2 2/7] x86/mm: address MISRA C:2012 Rule 2.1
On Mon, 18 Dec 2023, Nicola Vetrini wrote: > The "return 0" after the swich statement in 'xen/arch/x86/mm.c' > is unreachable because all switch clauses end with returns, and > can thus be dropped. > > No functional changes. > > Signed-off-by: Nicola Vetrini Reviewed-by: Stefano Stabellini
Re: [XEN PATCH v2 1/7] xen/shutdown: address MISRA C:2012 Rule 2.1
On Mon, 18 Dec 2023, Nicola Vetrini wrote: > Given that 'hwdom_shutdown' is a noreturn function, unreachable > breaks can be eliminated to resolve violations of Rule 2.1. > > The rename s/maybe_reboot/reboot_or_halt/ is done to clarify > that the function is noreturn. > > No functional change. > > Signed-off-by: Nicola Vetrini Reviewed-by: Stefano Stabellini
Re: [XEN PATCH v3] automation/eclair: add deviations for MISRA C:2012 Rule 16.3
On Mon, 18 Dec 2023, Federico Serafini wrote: > MISRA C:2012 Rule 16.3 states that an unconditional break statement > shall terminate every switch-clause. > > Update ECLAIR configuration to take into account: > - continue, goto, return statements; > - functions with attribute noreturn; > - pseudo-keyword fallthrough; > - macro BUG(); > - comments. > > Update docs/misra/deviations.rst accordingly. > > Signed-off-by: Federico Serafini Reviewed-by: Stefano Stabellini
Re: [XEN PATCH v2] automation/eclair: update configuration of MISRA C:2012 Rule 5.6
On Mon, 18 Dec 2023, Federico Serafini wrote: > Deviate typedef names that are delberately defined multiple times. > > Update docs/misra/deviations.rst accordingly. > > Tag Rule 5.6 as clean. > > Signed-off-by: Federico Serafini Reviewed-by: Stefano Stabellini
Re: [PATCH 9/9] xen: add SAF deviation for safe cast removal.
On Mon, 18 Dec 2023, Jan Beulich wrote: > On 15.12.2023 22:02, Stefano Stabellini wrote: > > On Fri, 15 Dec 2023, Jan Beulich wrote: > >> On 14.12.2023 23:04, Stefano Stabellini wrote: > >>> On Thu, 14 Dec 2023, Jan Beulich wrote: > On 14.12.2023 13:07, Simone Ballarin wrote: > > --- a/docs/misra/safe.json > > +++ b/docs/misra/safe.json > > @@ -28,6 +28,14 @@ > > }, > > { > > "id": "SAF-3-safe", > > +"analyser": { > > +"eclair": "MC3R1.R11.8" > > +}, > > +"name": "MC3R1.R11.8: removal of const qualifier to comply > > with function signature", > > +"text": "It is safe to cast away const qualifiers to > > comply with function signature if the function does not modify the > > pointee." > > I'm not happy with this description, as it invites for all sorts of > abuse. > Yet I'm also puzzled that ... > >>> > >>> We can improve the language but the concept would still be the same. For > >>> instance: > >>> > >>> A single function might or might not modify the pointee depending on > >>> other function parameters (for instance a single function that could > >>> either read or write depending on how it is called). It is safe to cast > >>> away const qualifiers when passing a parameter to a function of this > >>> type when the other parameters are triggering a read-only operation. > >> > >> Right, but I think the next here needs to be setting as tight boundaries > >> as possible: It should cover only this one specific pattern. Anything > >> else would better get its own deviation, imo. > > > > OK. What about: > > > > A single function might or might not modify the pointee depending on > > other function parameters, for instance a common pattern is to implement > > one function that could either read or write depending on how it is > > called. It is safe to cast away const qualifiers when passing a > > parameter to a function following this pattern when the other parameters > > are triggering a read-only operation. > > > > Feel free to suggest a better wording. > > Well, my point was to get rid of "for instance" and "common pattern" (and > anything alike). E.g.: > > "A single function could either read or write through a passed in pointer, > depending on how it is called. It is deemed safe to cast away a const > qualifier when passing a pointer to such a function, when the other > parameters guarantee read-only operation." That's fine by me
Re: Possible bug in Xen
On 18/12/2023 11:36 pm, Joe Tretter wrote: > Hi Andrew, > > I have tried it with your suggested command line, and it "feels" like > it makes the issue occur less often (but one never knows with sparse > intermittent issues). > The first time I ran my test, it took 23 minutes before the error > occurred. > However, I picked that when running two processes simultaneously, the > issue occurs reasonably fast. Do you mean running the unit tests twice simultaneously? Or something else. > > I have, however, not been able to verify during runtime that the > parameter really arrived in Xen because /proc/cmdline didn't show the > Xen parameters. But I have taken a screen shot of what I have done to > submit the parameter. > > If you tell me how I can view the boot options in a running system to > be certain the option has is effective, I'm happy to run the test again. > > Side remark: To make reasonably sure the issue really only happens > with Xen, I have re-verified that without Xen the issue does not > exist, even with two instances for more than 1.5 hours. Running `xl info` in dom0 will tell you various things, one of which is xen_cmdline. This is the command line that Xen is currently booted with. ~Andrew
[xen-4.17-testing test] 184165: tolerable FAIL - PUSHED
flight 184165 xen-4.17-testing real [real] http://logs.test-lab.xenproject.org/osstest/logs/184165/ Failures :-/ but no regressions. Tests which did not succeed, but are not blocking: test-armhf-armhf-libvirt 16 saverestore-support-checkfail like 184109 test-amd64-i386-xl-qemuu-win7-amd64 19 guest-stop fail like 184109 test-amd64-i386-xl-qemut-win7-amd64 19 guest-stop fail like 184109 test-amd64-amd64-xl-qemuu-win7-amd64 19 guest-stopfail like 184109 test-amd64-amd64-xl-qemut-win7-amd64 19 guest-stopfail like 184109 test-armhf-armhf-libvirt-qcow2 15 saverestore-support-check fail like 184109 test-amd64-i386-xl-qemuu-ws16-amd64 19 guest-stop fail like 184109 test-armhf-armhf-libvirt-raw 15 saverestore-support-checkfail like 184109 test-amd64-amd64-xl-qemut-ws16-amd64 19 guest-stopfail like 184109 test-amd64-i386-xl-qemut-ws16-amd64 19 guest-stop fail like 184109 test-amd64-amd64-xl-qemuu-ws16-amd64 19 guest-stopfail like 184109 test-amd64-amd64-qemuu-nested-amd 20 debian-hvm-install/l1/l2 fail like 184109 test-arm64-arm64-xl-thunderx 15 migrate-support-checkfail never pass test-arm64-arm64-xl-thunderx 16 saverestore-support-checkfail never pass test-amd64-i386-xl-pvshim14 guest-start fail never pass test-amd64-i386-libvirt 15 migrate-support-checkfail never pass test-amd64-amd64-libvirt-xsm 15 migrate-support-checkfail never pass test-amd64-i386-libvirt-xsm 15 migrate-support-checkfail never pass test-amd64-amd64-libvirt 15 migrate-support-checkfail never pass test-arm64-arm64-xl-xsm 15 migrate-support-checkfail never pass test-arm64-arm64-xl-xsm 16 saverestore-support-checkfail never pass test-arm64-arm64-libvirt-xsm 15 migrate-support-checkfail never pass test-arm64-arm64-libvirt-xsm 16 saverestore-support-checkfail never pass test-arm64-arm64-xl-credit1 15 migrate-support-checkfail never pass test-arm64-arm64-xl-credit1 16 saverestore-support-checkfail never pass test-arm64-arm64-xl 15 migrate-support-checkfail never pass test-arm64-arm64-xl 16 saverestore-support-checkfail never pass test-armhf-armhf-xl-vhd 14 migrate-support-checkfail never pass test-armhf-armhf-xl-vhd 15 saverestore-support-checkfail never pass test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check fail never pass test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check fail never pass test-amd64-amd64-libvirt-vhd 14 migrate-support-checkfail never pass test-armhf-armhf-xl-multivcpu 15 migrate-support-checkfail never pass test-armhf-armhf-xl-multivcpu 16 saverestore-support-checkfail never pass test-armhf-armhf-xl-arndale 15 migrate-support-checkfail never pass test-armhf-armhf-xl-arndale 16 saverestore-support-checkfail never pass test-armhf-armhf-xl 15 migrate-support-checkfail never pass test-armhf-armhf-xl 16 saverestore-support-checkfail never pass test-armhf-armhf-xl-rtds 15 migrate-support-checkfail never pass test-armhf-armhf-xl-rtds 16 saverestore-support-checkfail never pass test-arm64-arm64-xl-credit2 15 migrate-support-checkfail never pass test-armhf-armhf-libvirt 15 migrate-support-checkfail never pass test-arm64-arm64-xl-credit2 16 saverestore-support-checkfail never pass test-armhf-armhf-xl-credit2 15 migrate-support-checkfail never pass test-armhf-armhf-xl-credit2 16 saverestore-support-checkfail never pass test-armhf-armhf-xl-credit1 15 migrate-support-checkfail never pass test-armhf-armhf-xl-credit1 16 saverestore-support-checkfail never pass test-amd64-i386-libvirt-raw 14 migrate-support-checkfail never pass test-arm64-arm64-libvirt-raw 14 migrate-support-checkfail never pass test-arm64-arm64-libvirt-raw 15 saverestore-support-checkfail never pass test-arm64-arm64-xl-vhd 14 migrate-support-checkfail never pass test-arm64-arm64-xl-vhd 15 saverestore-support-checkfail never pass test-armhf-armhf-libvirt-qcow2 14 migrate-support-checkfail never pass test-armhf-armhf-libvirt-raw 14 migrate-support-checkfail never pass version targeted for testing: xen 949a4aad417621638231e4cf9f1b35e8e0328463 baseline version: xen 958706fd2e178ffe8e5597b05b694b494e24258b Last test of basis 184109 2023-12-12 14:07:07 Z6 days Testing same since 184165 2023-12-18 12:38:50 Z0 days1 attempts People who touched revisions under test: Jan Beulich jobs: build-amd64-xsm
[PATCH v2 3/3] xen/arm: Introduce "partial-emulation" xen command line option
This option is used to enable/disable partial emulation of registers at runtime. While CONFIG_PARTIAL_EMULATION enables support for partial emulation at compile time (ie adds code for partial emulation), this option may be enabled or disabled by Yocto or other build systems. However, customers can use scripts like Imagebuilder to generate uboot script for booting Xen. These scripts can use "partial-emulation=true" to support this at runtime. This option is set to false by default so that customers are fully aware when they enable partial emulation. They can also disable it without the need to rebuild Xen. Signed-off-by: Ayan Kumar Halder --- Changes from v1:- 1. New patch introduced in v2. docs/misc/xen-command-line.pandoc | 7 +++ xen/arch/arm/arm64/vsysreg.c | 5 - xen/arch/arm/include/asm/regs.h | 6 ++ xen/arch/arm/traps.c | 3 +++ xen/arch/arm/vcpreg.c | 17 +++-- 5 files changed, 31 insertions(+), 7 deletions(-) diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc index 8e65f8bd18..dd2a76fb19 100644 --- a/docs/misc/xen-command-line.pandoc +++ b/docs/misc/xen-command-line.pandoc @@ -1949,6 +1949,13 @@ This option is ignored in **pv-shim** mode. > Default: `on` +### partial-emulation (arm) +> `= ` + +> Default: `false` + +Flag to enable or disable partial emulation of registers + ### pci = List of [ serr=, perr= ] diff --git a/xen/arch/arm/arm64/vsysreg.c b/xen/arch/arm/arm64/vsysreg.c index 0fa8716884..02497c9fef 100644 --- a/xen/arch/arm/arm64/vsysreg.c +++ b/xen/arch/arm/arm64/vsysreg.c @@ -192,7 +192,10 @@ void do_sysreg(struct cpu_user_regs *regs, case HSR_SYSREG_DBGDTR_EL0: /* DBGDTR[TR]X_EL0 share the same encoding */ case HSR_SYSREG_DBGDTRTX_EL0: -return handle_raz_wi(regs, regidx, hsr.sysreg.read, hsr, 0); +if ( opt_partial_emulation ) +return handle_raz_wi(regs, regidx, hsr.sysreg.read, hsr, 0); +else +goto fail; #endif HSR_SYSREG_DBG_CASES(DBGBVR): diff --git a/xen/arch/arm/include/asm/regs.h b/xen/arch/arm/include/asm/regs.h index f998aedff5..b71fa20f91 100644 --- a/xen/arch/arm/include/asm/regs.h +++ b/xen/arch/arm/include/asm/regs.h @@ -13,6 +13,12 @@ #define psr_mode(psr,m) (((psr) & PSR_MODE_MASK) == (m)) +/* + * opt_partial_emulation: If true, partial emulation for registers will be + * enabled. + */ +extern bool opt_partial_emulation; + static inline bool regs_mode_is_32bit(const struct cpu_user_regs *regs) { #ifdef CONFIG_ARM_32 diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c index f5ab555b19..c8c00d2dd5 100644 --- a/xen/arch/arm/traps.c +++ b/xen/arch/arm/traps.c @@ -42,6 +42,9 @@ #include #include +bool opt_partial_emulation = false; +boolean_param("partial-emulation", opt_partial_emulation); + /* The base of the stack must always be double-word aligned, which means * that both the kernel half of struct cpu_user_regs (which is pushed in * entry.S) and struct cpu_info (which lives at the bottom of a Xen diff --git a/xen/arch/arm/vcpreg.c b/xen/arch/arm/vcpreg.c index 52a8732423..6bf417487a 100644 --- a/xen/arch/arm/vcpreg.c +++ b/xen/arch/arm/vcpreg.c @@ -578,12 +578,17 @@ void do_cp14_32(struct cpu_user_regs *regs, const union hsr hsr) #ifdef CONFIG_PARTIAL_EMULATION case HSR_CPREG32(DBGDTRTXINT): { -/* - * As DBGDSCRINT is emulated which is architecturally mapped to AArch64 - * register MDCCSR_EL0. MDSCR_EL1 is not emulated. So DBGDTR[TR]XINT can - * only be accessed as EL0 level. - */ -return handle_raz_wi(regs, regidx, cp32.read, hsr, 0); +if ( opt_partial_emulation ) +{ +/* + * As DBGDSCRINT is emulated which is architecturally mapped to + * AArch64 register MDCCSR_EL0. MDSCR_EL1 is not emulated. So + * DBGDTR[TR]XINT can only be accessed as EL0 level. + */ +return handle_raz_wi(regs, regidx, cp32.read, hsr, 0); +} +else +goto fail; } #endif -- 2.25.1
[PATCH v2 2/3] xen: arm: Introduce CONFIG_PARTIAL_EMULATION
There are can be situations when the registers cannot be emulated to its full functionality. This can be due to the complexity involved. In such cases, we can emulate those registers as RAZ/WI. A suitable example of this is DBGDTRTX_EL0 (on Arm64) and DBGDTRTXINT(on Arm32). As this register is not optional, guests may try to access this. Currently, this would result in a crash. With this patch, Xen will emulated this as RAZ/WI and the crash will be avoided. Such partial emulations will be enclosed within CONFIG_PARTIAL_EMULATION. Also "CONFIG_PARTIAL_EMULATION" is default to y, so that Xen does not need to be rebuilt in order to prevent guest from crashing while accessing registers like DBGDTRTX_EL0. Signed-off-by: Ayan Kumar Halder --- Changes from v1:- 1. New patch introduced in v2. xen/arch/arm/Kconfig | 8 xen/arch/arm/arm64/vsysreg.c | 3 +++ xen/arch/arm/vcpreg.c| 2 ++ 3 files changed, 13 insertions(+) diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig index 50e9bfae1a..8f25d9cba0 100644 --- a/xen/arch/arm/Kconfig +++ b/xen/arch/arm/Kconfig @@ -225,6 +225,14 @@ config STATIC_EVTCHN This option enables establishing static event channel communication between domains on a dom0less system (domU-domU as well as domU-dom0). +config PARTIAL_EMULATION +bool "Enable partial emulation for registers" +default y +help + This option enabled partial emulation for registers to avoid guests + crashing when accessing registers which are not optional but has not been + emulated to its complete functionality. + endmenu menu "ARM errata workaround via the alternative framework" diff --git a/xen/arch/arm/arm64/vsysreg.c b/xen/arch/arm/arm64/vsysreg.c index ebeb83dd65..0fa8716884 100644 --- a/xen/arch/arm/arm64/vsysreg.c +++ b/xen/arch/arm/arm64/vsysreg.c @@ -188,10 +188,13 @@ void do_sysreg(struct cpu_user_regs *regs, return handle_ro_read_val(regs, regidx, hsr.sysreg.read, hsr, 0, 1U << 29); } +#ifdef CONFIG_PARTIAL_EMULATION case HSR_SYSREG_DBGDTR_EL0: /* DBGDTR[TR]X_EL0 share the same encoding */ case HSR_SYSREG_DBGDTRTX_EL0: return handle_raz_wi(regs, regidx, hsr.sysreg.read, hsr, 0); +#endif + HSR_SYSREG_DBG_CASES(DBGBVR): HSR_SYSREG_DBG_CASES(DBGBCR): HSR_SYSREG_DBG_CASES(DBGWVR): diff --git a/xen/arch/arm/vcpreg.c b/xen/arch/arm/vcpreg.c index 5087125111..52a8732423 100644 --- a/xen/arch/arm/vcpreg.c +++ b/xen/arch/arm/vcpreg.c @@ -575,6 +575,7 @@ void do_cp14_32(struct cpu_user_regs *regs, const union hsr hsr) case HSR_CPREG32(DBGOSLSR): return handle_ro_read_val(regs, regidx, cp32.read, hsr, 1, 1 << 3); +#ifdef CONFIG_PARTIAL_EMULATION case HSR_CPREG32(DBGDTRTXINT): { /* @@ -584,6 +585,7 @@ void do_cp14_32(struct cpu_user_regs *regs, const union hsr hsr) */ return handle_raz_wi(regs, regidx, cp32.read, hsr, 0); } +#endif case HSR_CPREG32(DBGVCR): case HSR_CPREG32(DBGBVR0): -- 2.25.1
[PATCH v2 1/3] xen/arm: Add emulation of Debug Data Transfer Registers
From: Michal Orzel Currently if user enables HVC_DCC config option in Linux, it invokes access to debug data transfer registers (ie MDCCSR_EL0 on arm64, DBGDTRTXINT on arm32). As these registers are not emulated, Xen injects an undefined exception to the guest. And Linux crashes. We wish to avoid this crash by adding a "partial" emulation. DBGDTR_EL0 is emulated as TXfull. Refer ARM DDI 0487I.a ID081822, D17.3.8, DBGDTRTX_EL0 "If TXfull is set to 1, set DTRRX and DTRTX to UNKNOWN" Thus, any OS is expected to read DBGDTR_EL0 and check for TXfull before using DBGDTRTX_EL0. Linux does it via hvc_dcc_init() ---> hvc_dcc_check() , it returns -ENODEV. In this way, we are preventing the guest to be aborted. We also emulate DBGDTRTX_EL0 as RAZ/WI. We have added emulation for AArch32 variant of these registers as well. Also in the case of AArch32, DBGOSLSR is emulated in the same way as its AArch64 variant (ie OSLSR_EL1). This is to ensure that DBGOSLSR.OSLK is 0, thus MDSCR_EL1.TXfull is treated as UNK/SBZP. Thus only MDCCSR_EL0 can be emulated (which is DBGDTRTXINT on arm32). So, DBGDTR[TR]XINT is emulated as RAZ/WI. Signed-off-by: Michal Orzel Signed-off-by: Ayan Kumar Halder --- Changes from v1 :- 1. DBGDTR_EL0 does not emulate RXfull. This is to avoid giving the OS any indication that the RX buffer is full and is waiting to be read. 2. In Arm32, DBGOSLSR is emulated. Also DBGDTRTXINT is emulated at EL0 only. 3. Fixed the commit message and inline code comments. xen/arch/arm/arm64/vsysreg.c | 26 +++ xen/arch/arm/include/asm/arm64/hsr.h | 3 +++ xen/arch/arm/include/asm/cpregs.h| 2 ++ xen/arch/arm/include/asm/traps.h | 4 +++ xen/arch/arm/traps.c | 18 + xen/arch/arm/vcpreg.c| 38 +++- 6 files changed, 79 insertions(+), 12 deletions(-) diff --git a/xen/arch/arm/arm64/vsysreg.c b/xen/arch/arm/arm64/vsysreg.c index b5d54c569b..ebeb83dd65 100644 --- a/xen/arch/arm/arm64/vsysreg.c +++ b/xen/arch/arm/arm64/vsysreg.c @@ -159,9 +159,6 @@ void do_sysreg(struct cpu_user_regs *regs, * * Unhandled: *MDCCINT_EL1 - *DBGDTR_EL0 - *DBGDTRRX_EL0 - *DBGDTRTX_EL0 *OSDTRRX_EL1 *OSDTRTX_EL1 *OSECCR_EL1 @@ -172,11 +169,29 @@ void do_sysreg(struct cpu_user_regs *regs, case HSR_SYSREG_MDSCR_EL1: return handle_raz_wi(regs, regidx, hsr.sysreg.read, hsr, 1); case HSR_SYSREG_MDCCSR_EL0: +{ /* + * Xen doesn't expose a real (or emulated) Debug Communications Channel + * (DCC) to a domain. Yet the Arm ARM implies this is not an optional + * feature. So some domains may start to probe it. For instance, the + * HVC_DCC driver in Linux (since f35dc083 and at least up to v6.7), + * will try to write some characters and check if the transmit buffer + * has emptied. By setting TX status bit to indicate the transmit buffer + * is full. This we would hint the OS that the DCC is probably not + * working. + * + * Bit 29: TX full + * * Accessible at EL0 only if MDSCR_EL1.TDCC is set to 0. We emulate that * register as RAZ/WI above. So RO at both EL0 and EL1. */ -return handle_ro_raz(regs, regidx, hsr.sysreg.read, hsr, 0); +return handle_ro_read_val(regs, regidx, hsr.sysreg.read, hsr, 0, + 1U << 29); +} +case HSR_SYSREG_DBGDTR_EL0: +/* DBGDTR[TR]X_EL0 share the same encoding */ +case HSR_SYSREG_DBGDTRTX_EL0: +return handle_raz_wi(regs, regidx, hsr.sysreg.read, hsr, 0); HSR_SYSREG_DBG_CASES(DBGBVR): HSR_SYSREG_DBG_CASES(DBGBCR): HSR_SYSREG_DBG_CASES(DBGWVR): @@ -393,7 +408,8 @@ void do_sysreg(struct cpu_user_regs *regs, * * And all other unknown registers. */ -default: +default: goto fail; +fail: { const struct hsr_sysreg sysreg = hsr.sysreg; diff --git a/xen/arch/arm/include/asm/arm64/hsr.h b/xen/arch/arm/include/asm/arm64/hsr.h index e691d41c17..1495ccddea 100644 --- a/xen/arch/arm/include/asm/arm64/hsr.h +++ b/xen/arch/arm/include/asm/arm64/hsr.h @@ -47,6 +47,9 @@ #define HSR_SYSREG_OSDLR_EL1 HSR_SYSREG(2,0,c1,c3,4) #define HSR_SYSREG_DBGPRCR_EL1HSR_SYSREG(2,0,c1,c4,4) #define HSR_SYSREG_MDCCSR_EL0 HSR_SYSREG(2,3,c0,c1,0) +#define HSR_SYSREG_DBGDTR_EL0 HSR_SYSREG(2,3,c0,c4,0) +#define HSR_SYSREG_DBGDTRTX_EL0 HSR_SYSREG(2,3,c0,c5,0) +#define HSR_SYSREG_DBGDTRRX_EL0 HSR_SYSREG(2,3,c0,c5,0) #define HSR_SYSREG_DBGBVRn_EL1(n) HSR_SYSREG(2,0,c0,c##n,4) #define HSR_SYSREG_DBGBCRn_EL1(n) HSR_SYSREG(2,0,c0,c##n,5) diff --git a/xen/arch/arm/include/asm/cpregs.h b/xen/arch/arm/include/asm/cpregs.h index 6b083de204..aec9e8f329 100644 --- a/xen/arch/arm/include/asm/cpregs.h +++ b/xen/arch/arm/include/asm/cpregs.h @@ -75,6 +75,8 @@ #define
[PATCH v2 0/3] xen/arm: Add emulation of Debug Data Transfer Registers
Hi, Refer https://lore.kernel.org/all/alpine.DEB.2.22.394.2312071341540.1265976@ubuntu-linux-20-04-desktop/T/ for the previous discussion on this issue. Also, the linux earlycon hvc driver has been fixed. See https://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty.git/commit/?h=tty-next=0ec058ece2f933aed66b76bd5cb9b5e6764853c3 Changes from v1:- 1. Split the change across 3 patches as per the design consensus. Ayan Kumar Halder (3): xen/arm: Add emulation of Debug Data Transfer Registers xen: arm: Introduce CONFIG_PARTIAL_EMULATION xen/arm: Introduce "partial-emulation" xen command line option docs/misc/xen-command-line.pandoc| 7 + xen/arch/arm/Kconfig | 8 + xen/arch/arm/arm64/vsysreg.c | 32 --- xen/arch/arm/include/asm/arm64/hsr.h | 3 ++ xen/arch/arm/include/asm/cpregs.h| 2 ++ xen/arch/arm/include/asm/regs.h | 6 xen/arch/arm/include/asm/traps.h | 4 +++ xen/arch/arm/traps.c | 21 + xen/arch/arm/vcpreg.c| 47 +++- 9 files changed, 117 insertions(+), 13 deletions(-) -- 2.25.1
Re: [BUG]i2c_hid_acpi broken with 4.17.2 on Framework Laptop 13 AMD
Le lun. 18 déc. 2023 à 18:04, Sébastien Chaumat a écrit : > > > > Le lun. 18 déc. 2023, 17:44, Jan Beulich a écrit : >> >> On 18.12.2023 17:21, Sébastien Chaumat wrote: >> > On 05.12.2023 21:31, Sébastien Chaumat wrote: >> >>> This issue seems that IRQ 7 (the GPIO controller) is natively fasteoi >> >>> (so level type) while in xen it is mapped to oapic-edge instead of >> >>> oapic-level >> >>> as the SSDT indicates : >> >>> >> >>> Device (GPIO) >> >>> >> >>> { >> >>> Name (_HID, "AMDI0030") // _HID: Hardware ID >> >>> Name (_CID, "AMDI0030") // _CID: Compatible ID >> >>> Name (_UID, Zero) // _UID: Unique ID >> >>> Method (_CRS, 0, NotSerialized) // _CRS: Current Resource >> >>> Settings >> >>> { >> >>> Name (RBUF, ResourceTemplate () >> >>> { >> >>> Interrupt (ResourceConsumer, Level, ActiveLow, Shared, >> >>> ,, ) >> >>> { >> >>> 0x0007, >> >>>} >> >>> Any idea why ? >> >> >> >> Information coming from AML is required to be handed down by Dom0 to Xen. >> >> May want checking that (a) Dom0 properly does so and (b) Xen doesn't screw >> >> up in consuming that data. See PHYSDEVOP_setup_gsi. I wonder if this is >> >> specific to it being IRQ7 which GPIO uses, as at the (master) PIC IRQ7 is >> >> also the spurious vector. You may want to retry with the tip of the 4.17 >> >> branch (soon to become 4.17.3) - while it doesn't look very likely to me >> >> that recent backports there were related, it may still be that they make >> >> a difference. >> >> >> > >> > testing with 4.17.3: >> > >> > Adding some printk in PHYSDEVOP_setup_gsi, I see (in xl dmesg) that >> > (XEN) PHYSDEVOP_setup_gsi setup_gsi : gsi: 7 triggering: 1 polarity: 1 >> > >> > but later on in dmesg I see : >> > [1.747958] xen: registering gsi 7 triggering 0 polarity 1 >> > >> > So triggering is flipped from 1 to 0 (cannot find the definition for >> > those values). >> > Could this be the culprit ? >> >> Possibly. Since it would be the kernel to invoke PHYSDEVOP_setup_gsi, it >> looks as if the kernel was confused about which trigger mode to use. Have >> you figured from where the kernel takes the two different values? >> side note : dom0 is PV.
xen | Successful pipeline for RELEASE-4.17.3 | 949a4aad
Pipeline #574461 has passed! Project: xen ( https://gitlab.com/xen-project/xen ) Branch: RELEASE-4.17.3 ( https://gitlab.com/xen-project/xen/-/commits/RELEASE-4.17.3 ) Commit: 949a4aad ( https://gitlab.com/xen-project/xen/-/commit/949a4aad417621638231e4cf9f1b35e8e0328463 ) Commit Message: update Xen version to 4.17.3 Commit Author: Jan Beulich ( https://gitlab.com/jbeulich ) Pipeline #574461 ( https://gitlab.com/xen-project/xen/-/pipelines/574461 ) triggered by Ganis ( https://gitlab.com/ganis ) successfully completed 84 jobs in 2 stages. -- You're receiving this email because of your account on gitlab.com.
[xen-unstable-smoke test] 184168: tolerable all pass - PUSHED
flight 184168 xen-unstable-smoke real [real] http://logs.test-lab.xenproject.org/osstest/logs/184168/ Failures :-/ but no regressions. Tests which did not succeed, but are not blocking: test-amd64-amd64-libvirt 15 migrate-support-checkfail never pass test-arm64-arm64-xl-xsm 15 migrate-support-checkfail never pass test-arm64-arm64-xl-xsm 16 saverestore-support-checkfail never pass test-armhf-armhf-xl 15 migrate-support-checkfail never pass test-armhf-armhf-xl 16 saverestore-support-checkfail never pass version targeted for testing: xen 0cc74376d6823e0883f89556be2a267f2240a558 baseline version: xen a60067f39882a4f40547fc0cd8177bac89cb01b7 Last test of basis 184147 2023-12-15 22:03:52 Z2 days Testing same since 184168 2023-12-18 15:03:58 Z0 days1 attempts People who touched revisions under test: Jan Beulich Maria Celeste Cesario Maria Celeste Cesario Nicola Vetrini Oleksii Kurochko Simone Ballarin jobs: build-arm64-xsm pass build-amd64 pass build-armhf pass build-amd64-libvirt pass test-armhf-armhf-xl pass test-arm64-arm64-xl-xsm pass test-amd64-amd64-xl-qemuu-debianhvm-amd64pass test-amd64-amd64-libvirt pass sg-report-flight on osstest.test-lab.xenproject.org logs: /home/logs/logs images: /home/logs/images Logs, config files, etc. are available at http://logs.test-lab.xenproject.org/osstest/logs Explanation of these reports, and of osstest in general, is at http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README.email;hb=master http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README;hb=master Test harness code can be found at http://xenbits.xen.org/gitweb?p=osstest.git;a=summary Pushing revision : To xenbits.xen.org:/home/xen/git/xen.git a60067f398..0cc74376d6 0cc74376d6823e0883f89556be2a267f2240a558 -> smoke
Re: [PATCH v2 08/15] VMX: convert vmx_basic_msr
On 27/11/2023 12:44 pm, Jan Beulich wrote: > On 24.11.2023 23:41, Andrew Cooper wrote: >> On 24/11/2023 8:41 am, Jan Beulich wrote: >>> ... to a struct field, which is then going to be accompanied by other >>> capability/control data presently living in individual variables. As >>> this structure isn't supposed to be altered post-boot, put it in >>> .data.ro_after_init right away. >>> >>> Suggested-by: Roger Pau Monné >>> Signed-off-by: Jan Beulich >> For (usable) nested virt, we're going to need the VMX MSRs, in their >> architectural form, in struct cpu_policy. And just like CPUID features, >> I want it to end up with nice bitfields to use. >> >> Looking through the rest of this series, vmx_caps ends up almost in >> architectural form. >> >> Could I talk you into having a "struct vmx_msrs" (or similar - 'caps' >> doesn't feel quite right here) in the policy object, and also >> instantiating one instance of it for this purpose here? > I was actually wondering while doing the conversion. The main reason I > didn't go that route right away was that I wasn't really certain whether > what I'd put there would the really be the (largely) final shape it > wants to take there. (One thing you've likely noticed I didn't convert > is _vmx_misc_cap, which right now only exists as a local variable in > vmx_init_vmcs_config().) > >> AFAICT, it would only be a minor deviation to the latter half of this >> series, but it would be an excellent start to fixing nested virt - and >> getting this data in the policy really is the first task in getting the >> ball rolling on nested virt. > How much of a further change it would end up being (or where that change > would occur) depends on another aspect: When put in cpu-policy.h (and I > take it you mean the lib/ instance, not the asm/ one), it would seem > natural and perhaps even necessary to properly introduce bitfields for > each of the MSRs right away. That'll lead to a "raw" field as well. In > VMX code (mostly its cpu_has_* #define-s), I'd then either need to use > .raw (perhaps a little ugly here and there) or go with using the > individual bitfields right away (likely eliminating the need for many of > the constant #define-s), which increases the risk of inadvertent mistakes > (and their overlooking during review). > >> I don't mind about serialising/de-serialsing it - that still has a bit >> of userspace complexity to work out, and depends on some of the cleanup >> still needing a repost. >> >> If you don't want to take the added space in cpu_policy yet, how about >> having the declaration there and just forgo instantiating the subobject >> in the short term? > There's quite a bit of effectively dead space in the struct already; I > think I wouldn't mind instantiating the struct there right away. So long > as you're convinced it's going to be used there in not too distant a > future. > > But: If I go as far, why would I introduce a global instance of the new > struct? Wouldn't it then make more sense to use host_cpu_policy right > away? I probably would keep populating it in vmx_init_vmcs_config() to > limit churn for now, but consumers of the flags could then right away > use the host policy. George has stated an intent to pick nested virt up imminently. I'll have to defer to him on when this will actually start. But, sorting out this data in the policies is the next step, whenever that occurs. If you fancy going all the way to use the raw/host policy then great, but I expect that would be a large amount of extra work, hence the suggestion to just use the "inner" struct in the short term. Conversion to bitfields would want to be separate patch anyway, at which point an A/B compile can confirm whether there was no resulting change. I'm happy if you want to do all of this, but it's a lot of work, and simply having the data in plain architectural uint64_t in the host policy is something that I thought would be a very minor change to your current series, but with a useful step towards nested virt. One open question, before we get too far into this, is still whether to express half of these as MSR-features like ARCH_CAPS. Linux does, and there is a very complex set of dependencies between certain properties, although I have a sneaking suspicion that the dependency logic will needed at runtime as the L1 hypervisor changes the various controls. ~Andrew
Re: [XEN PATCH 3/5] xen/acpi: Use NULL as a null pointer constant
On 2023-12-18 18:05, Jan Beulich wrote: On 14.12.2023 12:44, Nicola Vetrini wrote: --- a/xen/include/acpi/acmacros.h +++ b/xen/include/acpi/acmacros.h @@ -111,7 +111,7 @@ #define ACPI_TO_POINTER(i) ACPI_ADD_PTR (void,(void *) NULL,(acpi_native_uint) i) #define ACPI_TO_INTEGER(p) ACPI_PTR_DIFF (p,(void *) NULL) -#define ACPI_OFFSET(d,f)(acpi_size) ACPI_PTR_DIFF (&(((d *)0)->f),(void *) NULL) +#define ACPI_OFFSET(d,f)(acpi_size) ACPI_PTR_DIFF (&(((d *)NULL)->f),(void *) NULL) #define ACPI_PHYSADDR_TO_PTR(i) ACPI_TO_POINTER(i) #define ACPI_PTR_TO_PHYSADDR(i) ACPI_TO_INTEGER(i) This again is an ACPI CA header, which I'm hesitant to see being changed. Jan Yes, I sent this before the discussion on R11.8. I'm ok with adding all these files derived from ACPI CA to exclude-list.json, so in that case the patch can be ignored. -- Nicola Vetrini, BSc Software Engineer, BUGSENG srl (https://bugseng.com)
Re: [PATCH v4 2/4] x86/spec-ctrl: defer context-switch IBPB until guest entry
On Mon, Dec 18, 2023 at 02:58:01PM +0100, Jan Beulich wrote: > On 18.12.2023 13:39, Roger Pau Monné wrote: > > On Tue, Feb 14, 2023 at 05:11:05PM +0100, Jan Beulich wrote: > >> In order to avoid clobbering Xen's own predictions, defer the barrier as > >> much as possible. Merely mark the CPU as needing a barrier issued the > >> next time we're exiting to guest context. > > > > While I understand that doing the flush in the middle of the guest > > context might not be ideal, as it's my understanding we also > > s/guest context/context switch/? Indeed, sorry. > > needlessly flush Xen predictions, I'm unsure whether this makes any > > difference in practice, and IMO just makes the exit to guest paths > > more complex. > > I need to redirect this question to Andrew, who suggested that doing so > can be expected to make a difference. When we were discussing this, I > could easily see it might make a difference, but I cannot provide hard > proof. That's fine, but with the added complexity in the return to guests paths I think we need some kind of justification for such a change. If it was the other way around I could easily see it as a benefits if just for code clarity reasons. Thanks, Roger.
Re: [PATCH v4 4/4] x86/PV: issue branch prediction barrier when switching 64-bit guest to kernel mode
On Tue, Feb 14, 2023 at 05:12:08PM +0100, Jan Beulich wrote: > Since both kernel and user mode run in ring 3, they run in the same > "predictor mode". That only true when IBRS is enabled, otherwise all CPU modes share the same predictor mode? > While the kernel could take care of this itself, doing > so would be yet another item distinguishing PV from native. Additionally > we're in a much better position to issue the barrier command, and we can > save a #GP (for privileged instruction emulation) this way. > > To allow to recover performance, introduce a new VM assist allowing the > guest kernel to suppress this barrier. Make availability of the assist > dependent upon the command line control, such that kernels have a way to > know whether their request actually took any effect. > > Note that because of its use in PV64_VM_ASSIST_MASK, the declaration of > opt_ibpb_mode_switch can't live in asm/spec_ctrl.h. > > Signed-off-by: Jan Beulich > --- > Is the placement of the clearing of opt_ibpb_ctxt_switch correct in > parse_spec_ctrl()? Shouldn't it live ahead of the "disable_common" > label, as being about guest protection, not Xen's? > > Adding setting of the variable to the "pv" sub-case in parse_spec_ctrl() > didn't seem quite right to me, considering that we default it to the > opposite of opt_ibpb_entry_pv. > --- > v4: Correct the print_details() change. Re-base in particular over > changes earlier in the series. > v3: Leverage exit-IBPB. Introduce separate command line control. > v2: Leverage entry-IBPB. Add VM assist. Re-base. > > --- a/docs/misc/xen-command-line.pandoc > +++ b/docs/misc/xen-command-line.pandoc > @@ -2320,8 +2320,8 @@ By default SSBD will be mitigated at run > ### spec-ctrl (x86) > > `= List of [ , xen=, {pv,hvm}=, > > {msr-sc,rsb,md-clear,ibpb-entry}=|{pv,hvm}=, > -> bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,psfd, > -> eager-fpu,l1d-flush,branch-harden,srb-lock, > +> bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ibpb-mode-switch, > +> ssbd,psfd,eager-fpu,l1d-flush,branch-harden,srb-lock, > > unpriv-mmio}= ]` > > Controls for speculative execution sidechannel mitigations. By default, Xen > @@ -2403,7 +2403,10 @@ default. > > On hardware supporting IBPB (Indirect Branch Prediction Barrier), the `ibpb=` > option can be used to force (the default) or prevent Xen from issuing branch > -prediction barriers on vcpu context switches. > +prediction barriers on vcpu context switches. On such hardware the > +`ibpb-mode-switch` option can be used to control whether, by default, Xen > +would issue branch prediction barriers when 64-bit PV guests switch from > +user to kernel mode. If enabled, guest kernels can op out of this behavior. > > On all hardware, the `eager-fpu=` option can be used to force or prevent Xen > from using fully eager FPU context switches. This is currently implemented > as > --- a/xen/arch/x86/include/asm/domain.h > +++ b/xen/arch/x86/include/asm/domain.h > @@ -742,6 +742,8 @@ static inline void pv_inject_sw_interrup > pv_inject_event(); > } > > +extern int8_t opt_ibpb_mode_switch; > + > #define PV32_VM_ASSIST_MASK ((1UL << VMASST_TYPE_4gb_segments)| \ > (1UL << VMASST_TYPE_4gb_segments_notify) | \ > (1UL << VMASST_TYPE_writable_pagetables) | \ > @@ -753,7 +755,9 @@ static inline void pv_inject_sw_interrup > * but we can't make such requests fail all of the sudden. > */ > #define PV64_VM_ASSIST_MASK (PV32_VM_ASSIST_MASK | \ > - (1UL << VMASST_TYPE_m2p_strict)) > + (1UL << VMASST_TYPE_m2p_strict) | \ > + ((opt_ibpb_mode_switch + 0UL) << \ > + VMASST_TYPE_mode_switch_no_ibpb)) I'm wondering that it's kind of weird to offer the option to PV domUs if opt_ibpb_entry_pv is set, as then the guest mode switch will always (implicitly) do a IBPB as requiring an hypercall and thus take an entry point into Xen. I guess it's worth having it just as a way to signal to Xen that the hypervisor does perform an IBPB, even if the guest cannot disable it. > #define HVM_VM_ASSIST_MASK (1UL << VMASST_TYPE_runstate_update_flag) > > #define arch_vm_assist_valid_mask(d) \ > --- a/xen/arch/x86/pv/domain.c > +++ b/xen/arch/x86/pv/domain.c > @@ -455,6 +455,7 @@ static void _toggle_guest_pt(struct vcpu > void toggle_guest_mode(struct vcpu *v) > { > const struct domain *d = v->domain; > +struct cpu_info *cpu_info = get_cpu_info(); > unsigned long gs_base; > > ASSERT(!is_pv_32bit_vcpu(v)); > @@ -467,15 +468,21 @@ void toggle_guest_mode(struct vcpu *v) > if ( v->arch.flags & TF_kernel_mode ) > v->arch.pv.gs_base_kernel = gs_base; > else > +{ > v->arch.pv.gs_base_user = gs_base; > + > +if (
Re: [XEN PATCH 3/5] xen/acpi: Use NULL as a null pointer constant
On 14.12.2023 12:44, Nicola Vetrini wrote: > --- a/xen/include/acpi/acmacros.h > +++ b/xen/include/acpi/acmacros.h > @@ -111,7 +111,7 @@ > > #define ACPI_TO_POINTER(i) ACPI_ADD_PTR (void,(void *) > NULL,(acpi_native_uint) i) > #define ACPI_TO_INTEGER(p) ACPI_PTR_DIFF (p,(void *) NULL) > -#define ACPI_OFFSET(d,f)(acpi_size) ACPI_PTR_DIFF (&(((d > *)0)->f),(void *) NULL) > +#define ACPI_OFFSET(d,f)(acpi_size) ACPI_PTR_DIFF (&(((d > *)NULL)->f),(void *) NULL) > #define ACPI_PHYSADDR_TO_PTR(i) ACPI_TO_POINTER(i) > #define ACPI_PTR_TO_PHYSADDR(i) ACPI_TO_INTEGER(i) > This again is an ACPI CA header, which I'm hesitant to see being changed. Jan
Re: [BUG]i2c_hid_acpi broken with 4.17.2 on Framework Laptop 13 AMD
Le lun. 18 déc. 2023, 17:44, Jan Beulich a écrit : > On 18.12.2023 17:21, Sébastien Chaumat wrote: > > On 05.12.2023 21:31, Sébastien Chaumat wrote: > >>> This issue seems that IRQ 7 (the GPIO controller) is natively fasteoi > >>> (so level type) while in xen it is mapped to oapic-edge instead of > >>> oapic-level > >>> as the SSDT indicates : > >>> > >>> Device (GPIO) > >>> > >>> { > >>> Name (_HID, "AMDI0030") // _HID: Hardware ID > >>> Name (_CID, "AMDI0030") // _CID: Compatible ID > >>> Name (_UID, Zero) // _UID: Unique ID > >>> Method (_CRS, 0, NotSerialized) // _CRS: Current Resource > Settings > >>> { > >>> Name (RBUF, ResourceTemplate () > >>> { > >>> Interrupt (ResourceConsumer, Level, ActiveLow, > Shared, ,, ) > >>> { > >>> 0x0007, > >>>} > >>> Any idea why ? > >> > >> Information coming from AML is required to be handed down by Dom0 to > Xen. > >> May want checking that (a) Dom0 properly does so and (b) Xen doesn't > screw > >> up in consuming that data. See PHYSDEVOP_setup_gsi. I wonder if this is > >> specific to it being IRQ7 which GPIO uses, as at the (master) PIC IRQ7 > is > >> also the spurious vector. You may want to retry with the tip of the 4.17 > >> branch (soon to become 4.17.3) - while it doesn't look very likely to me > >> that recent backports there were related, it may still be that they make > >> a difference. > >> > > > > testing with 4.17.3: > > > > Adding some printk in PHYSDEVOP_setup_gsi, I see (in xl dmesg) that > > (XEN) PHYSDEVOP_setup_gsi setup_gsi : gsi: 7 triggering: 1 polarity: 1 > > > > but later on in dmesg I see : > > [1.747958] xen: registering gsi 7 triggering 0 polarity 1 > > > > So triggering is flipped from 1 to 0 (cannot find the definition for > > those values). > > Could this be the culprit ? > > Possibly. Since it would be the kernel to invoke PHYSDEVOP_setup_gsi, it > looks as if the kernel was confused about which trigger mode to use. Have > you figured from where the kernel takes the two different values? > > Would you mind pointing me to the definition for those values first ? I did not find what 0/1 means in this context. Thanks
Re: [XEN PATCH 2/5] x86/ioapic: use NULL as a null pointer constant
On 14.12.2023 22:30, Stefano Stabellini wrote: > On Thu, 14 Dec 2023, Nicola Vetrini wrote: >> Resolves violations of MISRA C Rule 11.9. >> No functional change. >> >> Signed-off-by: Nicola Vetrini > > Reviewed-by: Stefano Stabellini Acked-by: Jan Beulich
Re: [XEN PATCH 1/5] xen/hvm: use NULL as a null pointer constant
On 14.12.2023 22:29, Stefano Stabellini wrote: > On Thu, 14 Dec 2023, Nicola Vetrini wrote: >> No functional change. >> >> Signed-off-by: Nicola Vetrini > > Reviewed-by: Stefano Stabellini Acked-by: Jan Beulich Yet it would have been really nice if style had been tidied of the lines which are touched anyway. Jan
Re: [PATCH v2 37/39] xen/rirscv: add minimal amount of stubs to build full Xen
On 24.11.2023 11:30, Oleksii Kurochko wrote: > --- a/xen/arch/riscv/mm.c > +++ b/xen/arch/riscv/mm.c > @@ -1,19 +1,23 @@ > /* SPDX-License-Identifier: GPL-2.0-only */ > > +#include > #include > #include > #include > #include > #include > +#include > #include > > #include > #include > #include > -#include > #include > #include > > +unsigned long frametable_base_pdx __read_mostly; > +unsigned long frametable_virt_end __read_mostly; Nit (style): unsigned long __read_mostly frametable_base_pdx; unsigned long __read_mostly frametable_virt_end; (i.e. attributes generally between type and identifier). Plus __read_mostly or __ro_after_init? > @@ -294,3 +298,49 @@ unsigned long __init calc_phys_offset(void) > phys_offset = load_start - XEN_VIRT_START; > return phys_offset; > } > + > +void put_page(struct page_info *page) > +{ > +assert_failed(__func__); > +} > + > +unsigned long get_upper_mfn_bound(void) > +{ > +/* No memory hotplug yet, so current memory limit is the final one. */ > +return max_page - 1; > +} > + > +void arch_dump_shared_mem_info(void) > +{ > +WARN(); > +} > + > +int populate_pt_range(unsigned long virt, unsigned long nr_mfns) > +{ > +assert_failed(__func__); > +return -1; > +} Whats the pattern between picking WARN(), assert_failed() (which I don't think you should be using anyway; if an assertion, then ASSERT_UNREACHABLE()) and BUG() (as used earlier in stubs living in header files)? > --- /dev/null > +++ b/xen/arch/riscv/stubs.c > @@ -0,0 +1,426 @@ > +/* SPDX-License-Identifier: GPL-2.0-only */ > +#include > +#include > +#include > +#include > +#include > +#include > +#include I think I can see why you need the former of these last two, but do you really need the latter? > +#include > + > +/* smpboot.c */ > + > +cpumask_t cpu_online_map; > +cpumask_t cpu_present_map; > +cpumask_t cpu_possible_map; > + > +/* ID of the PCPU we're running on */ > +DEFINE_PER_CPU(unsigned int, cpu_id); > +/* XXX these seem awfully x86ish... */ > +/* representing HT siblings of each logical CPU */ > +DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_mask); > +/* representing HT and core siblings of each logical CPU */ > +DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_mask); > + > +nodemask_t __read_mostly node_online_map = { { [0] = 1UL } }; > + > +/* time.c */ > + > +unsigned long __read_mostly cpu_khz; /* CPU clock frequency in kHz. */ > + > +s_time_t get_s_time(void) > +{ > +BUG(); > +} > + > +int reprogram_timer(s_time_t timeout) > +{ > +BUG(); > +} > + > +void send_timer_event(struct vcpu *v) > +{ > +BUG(); > +} > + > +void domain_set_time_offset(struct domain *d, int64_t time_offset_seconds) > +{ > +BUG(); > +} > + > +/* shutdown.c */ > + > +void machine_restart(unsigned int delay_millisecs) > +{ > +BUG(); > +} > + > +void machine_halt(void) > +{ > +BUG(); > +} > + > +/* vm_event.c */ > + > +void vm_event_fill_regs(vm_event_request_t *req) > +{ > +BUG(); > +} > + > +void vm_event_set_registers(struct vcpu *v, vm_event_response_t *rsp) > +{ > +BUG(); > +} > + > +void vm_event_monitor_next_interrupt(struct vcpu *v) > +{ > +/* Not supported on RISCV. */ > +} > + > +void vm_event_reset_vmtrace(struct vcpu *v) > +{ > +/* Not supported on RISCV. */ > +} > + > +/* domctl.c */ > + > +long arch_do_domctl(struct xen_domctl *domctl, struct domain *d, > +XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl) > +{ > +BUG(); > +} > + > +void arch_get_domain_info(const struct domain *d, > + struct xen_domctl_getdomaininfo *info) > +{ > +BUG(); > +} > + > +void arch_get_info_guest(struct vcpu *v, vcpu_guest_context_u c) > +{ > +BUG(); > +} > + > +/* monitor.c */ > + > +int arch_monitor_domctl_event(struct domain *d, > + struct xen_domctl_monitor_op *mop) > +{ > +BUG(); > +} > + > +/* smp.c */ > + > +void arch_flush_tlb_mask(const cpumask_t *mask) > +{ > +BUG(); > +} > + > +void smp_send_event_check_mask(const cpumask_t *mask) > +{ > +BUG(); > +} > + > +void smp_send_call_function_mask(const cpumask_t *mask) > +{ > +BUG(); > +} > + > +/* irq.c */ > + > +struct pirq *alloc_pirq_struct(struct domain *d) > +{ > +BUG(); > +} > + > +int pirq_guest_bind(struct vcpu *v, struct pirq *pirq, int will_share) > +{ > +BUG(); > +} > + > +void pirq_guest_unbind(struct domain *d, struct pirq *pirq) > +{ > +BUG(); > +} > + > +void pirq_set_affinity(struct domain *d, int pirq, const cpumask_t *mask) > +{ > +BUG(); > +} > + > +static void ack_none(struct irq_desc *irq) > +{ > +BUG(); > +} > + > +static void end_none(struct irq_desc *irq) > +{ > +BUG(); > +} Much like I said for PPC - I don't think you need the two, as ... > +hw_irq_controller no_irq_type = { > +.typename = "none", > +.startup = irq_startup_none, > +.shutdown = irq_shutdown_none, > +.enable = irq_enable_none, > +
Re: [BUG]i2c_hid_acpi broken with 4.17.2 on Framework Laptop 13 AMD
On 18.12.2023 17:21, Sébastien Chaumat wrote: > On 05.12.2023 21:31, Sébastien Chaumat wrote: >>> This issue seems that IRQ 7 (the GPIO controller) is natively fasteoi >>> (so level type) while in xen it is mapped to oapic-edge instead of >>> oapic-level >>> as the SSDT indicates : >>> >>> Device (GPIO) >>> >>> { >>> Name (_HID, "AMDI0030") // _HID: Hardware ID >>> Name (_CID, "AMDI0030") // _CID: Compatible ID >>> Name (_UID, Zero) // _UID: Unique ID >>> Method (_CRS, 0, NotSerialized) // _CRS: Current Resource Settings >>> { >>> Name (RBUF, ResourceTemplate () >>> { >>> Interrupt (ResourceConsumer, Level, ActiveLow, Shared, ,, ) >>> { >>> 0x0007, >>>} >>> Any idea why ? >> >> Information coming from AML is required to be handed down by Dom0 to Xen. >> May want checking that (a) Dom0 properly does so and (b) Xen doesn't screw >> up in consuming that data. See PHYSDEVOP_setup_gsi. I wonder if this is >> specific to it being IRQ7 which GPIO uses, as at the (master) PIC IRQ7 is >> also the spurious vector. You may want to retry with the tip of the 4.17 >> branch (soon to become 4.17.3) - while it doesn't look very likely to me >> that recent backports there were related, it may still be that they make >> a difference. >> > > testing with 4.17.3: > > Adding some printk in PHYSDEVOP_setup_gsi, I see (in xl dmesg) that > (XEN) PHYSDEVOP_setup_gsi setup_gsi : gsi: 7 triggering: 1 polarity: 1 > > but later on in dmesg I see : > [1.747958] xen: registering gsi 7 triggering 0 polarity 1 > > So triggering is flipped from 1 to 0 (cannot find the definition for > those values). > Could this be the culprit ? Possibly. Since it would be the kernel to invoke PHYSDEVOP_setup_gsi, it looks as if the kernel was confused about which trigger mode to use. Have you figured from where the kernel takes the two different values? Jan
xen | Successful pipeline for staging | 0cc74376
Pipeline #343260 has passed! Project: xen ( https://gitlab.com/xen-project/xen ) Branch: staging ( https://gitlab.com/xen-project/xen/-/commits/staging ) Commit: 0cc74376 ( https://gitlab.com/xen-project/xen/-/commit/0cc74376d6823e0883f89556be2a267f2240a558 ) Commit Message: x86/hvm: address a violation of MISRA C:2012 Ru... Commit Author: Maria Celeste Cesario Committed by: Jan Beulich ( https://gitlab.com/jbeulich ) Pipeline #343260 ( https://gitlab.com/xen-project/xen/-/pipelines/343260 ) triggered by Ganis ( https://gitlab.com/ganis ) successfully completed 129 jobs in 3 stages. -- You're receiving this email because of your account on gitlab.com.
Re: [BUG]i2c_hid_acpi broken with 4.17.2 on Framework Laptop 13 AMD
> >>> On 05.12.2023 21:31, Sébastien Chaumat wrote: > > [2.464598] amd_gpio AMDI0030:00: failed to enable wake-up interrupt > > Is it expected that IRQ7 goes from fasteoi (kernel 6.6.4 ) to > ioapic-edge and IRQ9 to ioapic-level ? > > IR-IO-APIC7-fasteoi pinctrl_amd > IR-IO-APIC9-fasteoi acpi > > to (xen 4.18.0) > > xen-pirq -ioapic-edge pinctrl_amd > xen-pirq -ioapic-level acpi > > ? > > > > This look similar to > > https://yhbt.net/lore/all/20201006044941.fdjsp346kc5thyzy@Rk/t/ > > > > This issue seems that IRQ 7 (the GPIO controller) is natively fasteoi > > (so level type) while in xen it is mapped to oapic-edge instead of > > oapic-level > > as the SSDT indicates : > > > > Device (GPIO) > > > > { > > Name (_HID, "AMDI0030") // _HID: Hardware ID > > Name (_CID, "AMDI0030") // _CID: Compatible ID > > Name (_UID, Zero) // _UID: Unique ID > > Method (_CRS, 0, NotSerialized) // _CRS: Current Resource Settings > > { > > Name (RBUF, ResourceTemplate () > > { > > Interrupt (ResourceConsumer, Level, ActiveLow, Shared, ,, ) > > { > > 0x0007, > >} > > Any idea why ? > > Information coming from AML is required to be handed down by Dom0 to Xen. > May want checking that (a) Dom0 properly does so and (b) Xen doesn't screw > up in consuming that data. See PHYSDEVOP_setup_gsi. I wonder if this is > specific to it being IRQ7 which GPIO uses, as at the (master) PIC IRQ7 is > also the spurious vector. You may want to retry with the tip of the 4.17 > branch (soon to become 4.17.3) - while it doesn't look very likely to me > that recent backports there were related, it may still be that they make > a difference. > testing with 4.17.3: Adding some printk in PHYSDEVOP_setup_gsi, I see (in xl dmesg) that (XEN) PHYSDEVOP_setup_gsi setup_gsi : gsi: 7 triggering: 1 polarity: 1 but later on in dmesg I see : [1.747958] xen: registering gsi 7 triggering 0 polarity 1 So triggering is flipped from 1 to 0 (cannot find the definition for those values). Could this be the culprit ?
Re: [RFC PATCH v1 1/1] xen/Makefile: introduce ARCH_FIXED_CONFIG for randconfig
On 18.12.2023 17:07, Andrew Cooper wrote: > On 11/12/2023 3:56 pm, Jan Beulich wrote: >> On 07.12.2023 21:17, Andrew Cooper wrote: >>> On 07/12/2023 5:03 pm, Oleksii Kurochko wrote: ARCH_FIXED_CONFIG is required in the case of randconfig and CI for configs that aren't ready or are not supposed to be implemented for specific architecture. These configs should always be disabled to prevent randconfig related tests from failing. Signed-off-by: Oleksii Kurochko --- xen/Makefile | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/xen/Makefile b/xen/Makefile index ca571103c8..8ae8fe1480 100644 --- a/xen/Makefile +++ b/xen/Makefile @@ -336,11 +336,14 @@ ifeq ($(config-build),y) # *config targets only - make sure prerequisites are updated, and descend # in tools/kconfig to make the *config target +ARCH_FORCED_CONFIG := $(srctree)/arch/$(SRCARCH)/configs/randomforced.config + # Create a file for KCONFIG_ALLCONFIG which depends on the environment. # This will be use by kconfig targets allyesconfig/allmodconfig/allnoconfig/randconfig filechk_kconfig_allconfig = \ $(if $(findstring n,$(XEN_HAS_CHECKPOLICY)), echo 'CONFIG_XSM_FLASK_POLICY=n';) \ -$(if $(KCONFIG_ALLCONFIG), cat $(KCONFIG_ALLCONFIG);) \ +$(if $(KCONFIG_ALLCONFIG), cat $(KCONFIG_ALLCONFIG); \ +$(if $(wildcard $(ARCH_FORCED_CONFIG)), cat $(ARCH_FORCED_CONFIG);) ) \ : .allconfig.tmp: FORCE >>> We already have infrastructure for this. What's wrong with >>> EXTRA_FIXED_RANDCONFIG? >> What I don't understand here is why dealing with the issue would want >> limiting to gitlab-CI. Anyone could run randconfig on their own, and >> imo it would be helpful if the same issue(s) could be prevented there, >> too. Hence my earlier suggestion to have a snippet which can be used >> by "interested" parties. And once dealt with in e.g. the makefile >> there should not be a need for any overrides in the CI config anymore. > > This is trying to find a solution to a problem which doesn't exist. > > RISC-V and PPC are experimental in Xen. Noone else is going to come and > randconfig them until they're rather more production ready, and a > prerequisite of that is removing this list of exclusions. > > Until you can actually find an interested party to comment, I think this > is just churn for no useful improvement. If nothing else, calling it > randomforced.config isn't appropriate given the explanation of what this > target is used for... "random" in the name can't possibly be right anyway. Such collection of fixed settings would also be relevant to e.g. all{yes,no}config. Yet that's still not the same as any kind of "default" config, which the two architectures presently kind of abuse for the purpose of defining required-fixed settings. Jan
Re: [PATCH v4 3/4] x86: limit issuing of IBPB during context switch
On 18.12.2023 16:19, Roger Pau Monné wrote: > On Tue, Feb 14, 2023 at 05:11:40PM +0100, Jan Beulich wrote: >> --- a/xen/arch/x86/domain.c >> +++ b/xen/arch/x86/domain.c >> @@ -2005,17 +2005,26 @@ void context_switch(struct vcpu *prev, s >> } >> else >> { >> +unsigned int feat_sc_rsb = X86_FEATURE_SC_RSB_HVM; >> + >> __context_switch(); >> >> /* Re-enable interrupts before restoring state which may fault. */ >> local_irq_enable(); >> >> if ( is_pv_domain(nextd) ) >> +{ >> load_segments(next); >> >> +feat_sc_rsb = X86_FEATURE_SC_RSB_PV; >> +} >> + >> ctxt_switch_levelling(next); >> >> -if ( opt_ibpb_ctxt_switch && !is_idle_domain(nextd) ) >> +if ( opt_ibpb_ctxt_switch && !is_idle_domain(nextd) && >> + (!(prevd->arch.spec_ctrl_flags & SCF_entry_ibpb) || >> + /* is_idle_domain(prevd) || */ > > I would rather add a comment to note that the idle domain always has > SCF_entry_ibpb clear, rather than leaving this commented check in the > condition. > >> + !boot_cpu_has(feat_sc_rsb)) ) Oh, for completeness: For v5 I have this @@ -2092,17 +2092,26 @@ void context_switch(struct vcpu *prev, s } else { +unsigned int feat_sc_rsb = X86_FEATURE_SC_RSB_HVM; + __context_switch(); /* Re-enable interrupts before restoring state which may fault. */ local_irq_enable(); if ( is_pv_domain(nextd) ) +{ load_segments(next); +feat_sc_rsb = X86_FEATURE_SC_RSB_PV; +} + ctxt_switch_levelling(next); -if ( opt_ibpb_ctxt_switch && !is_idle_domain(nextd) ) +if ( opt_ibpb_ctxt_switch && !is_idle_domain(nextd) && + (!(prevd->arch.spec_ctrl_flags & SCF_entry_ibpb) || + /* is_idle_domain(prevd) || */ + (!cpu_has_auto_ibrs && !boot_cpu_has(feat_sc_rsb))) ) { static DEFINE_PER_CPU(unsigned int, last); unsigned int *last_id = _cpu(last); i.e. with the cpu_has_auto_ibrs check added. Jan
Re: [PATCH v4 3/4] x86: limit issuing of IBPB during context switch
On 18.12.2023 16:19, Roger Pau Monné wrote: > On Tue, Feb 14, 2023 at 05:11:40PM +0100, Jan Beulich wrote: >> When the outgoing vCPU had IBPB issued and RSB overwritten upon entering >> Xen, then there's no need for a 2nd barrier during context switch. >> >> Note that SCF_entry_ibpb is always clear for the idle domain, so no >> explicit idle domain check is needed to augment the feature check >> (which is simply inapplicable to "idle"). >> >> Signed-off-by: Jan Beulich > > Acked-by: Roger Pau Monné Thanks. However, aiui the plan still is for Andrew to pick up this series and integrate it with other work he has in progress (or he is planning to do). >> --- a/xen/arch/x86/domain.c >> +++ b/xen/arch/x86/domain.c >> @@ -2005,17 +2005,26 @@ void context_switch(struct vcpu *prev, s >> } >> else >> { >> +unsigned int feat_sc_rsb = X86_FEATURE_SC_RSB_HVM; >> + >> __context_switch(); >> >> /* Re-enable interrupts before restoring state which may fault. */ >> local_irq_enable(); >> >> if ( is_pv_domain(nextd) ) >> +{ >> load_segments(next); >> >> +feat_sc_rsb = X86_FEATURE_SC_RSB_PV; >> +} >> + >> ctxt_switch_levelling(next); >> >> -if ( opt_ibpb_ctxt_switch && !is_idle_domain(nextd) ) >> +if ( opt_ibpb_ctxt_switch && !is_idle_domain(nextd) && >> + (!(prevd->arch.spec_ctrl_flags & SCF_entry_ibpb) || >> + /* is_idle_domain(prevd) || */ > > I would rather add a comment to note that the idle domain always has > SCF_entry_ibpb clear, rather than leaving this commented check in the > condition. While I think I can see your point, I like it this way to match the other !is_idle_domain() that's here. >> + !boot_cpu_has(feat_sc_rsb)) ) > > I do wonder if it would be more fail safe (and easier to expand going > forward) if we introduce a new cpu_info field to track the CPU state: > relevant here would be whether RSB has been overwritten and IBPB > executed. Such state would be cleared on each return from guest path. To be honest - I'm not sure whether that would help or make things more fragile. More state also means more things which can become incorrect / inconsistent. Jan
Re: [RFC PATCH v1 1/1] xen/Makefile: introduce ARCH_FIXED_CONFIG for randconfig
On 11/12/2023 3:56 pm, Jan Beulich wrote: > On 07.12.2023 21:17, Andrew Cooper wrote: >> On 07/12/2023 5:03 pm, Oleksii Kurochko wrote: >>> ARCH_FIXED_CONFIG is required in the case of randconfig >>> and CI for configs that aren't ready or are not >>> supposed to be implemented for specific architecture. >>> These configs should always be disabled to prevent randconfig >>> related tests from failing. >>> >>> Signed-off-by: Oleksii Kurochko >>> --- >>> xen/Makefile | 5 - >>> 1 file changed, 4 insertions(+), 1 deletion(-) >>> >>> diff --git a/xen/Makefile b/xen/Makefile >>> index ca571103c8..8ae8fe1480 100644 >>> --- a/xen/Makefile >>> +++ b/xen/Makefile >>> @@ -336,11 +336,14 @@ ifeq ($(config-build),y) >>> # *config targets only - make sure prerequisites are updated, and descend >>> # in tools/kconfig to make the *config target >>> >>> +ARCH_FORCED_CONFIG := >>> $(srctree)/arch/$(SRCARCH)/configs/randomforced.config >>> + >>> # Create a file for KCONFIG_ALLCONFIG which depends on the environment. >>> # This will be use by kconfig targets >>> allyesconfig/allmodconfig/allnoconfig/randconfig >>> filechk_kconfig_allconfig = \ >>> $(if $(findstring n,$(XEN_HAS_CHECKPOLICY)), echo >>> 'CONFIG_XSM_FLASK_POLICY=n';) \ >>> -$(if $(KCONFIG_ALLCONFIG), cat $(KCONFIG_ALLCONFIG);) \ >>> +$(if $(KCONFIG_ALLCONFIG), cat $(KCONFIG_ALLCONFIG); \ >>> +$(if $(wildcard $(ARCH_FORCED_CONFIG)), cat $(ARCH_FORCED_CONFIG);) ) \ >>> : >>> >>> .allconfig.tmp: FORCE >> We already have infrastructure for this. What's wrong with >> EXTRA_FIXED_RANDCONFIG? > What I don't understand here is why dealing with the issue would want > limiting to gitlab-CI. Anyone could run randconfig on their own, and > imo it would be helpful if the same issue(s) could be prevented there, > too. Hence my earlier suggestion to have a snippet which can be used > by "interested" parties. And once dealt with in e.g. the makefile > there should not be a need for any overrides in the CI config anymore. This is trying to find a solution to a problem which doesn't exist. RISC-V and PPC are experimental in Xen. Noone else is going to come and randconfig them until they're rather more production ready, and a prerequisite of that is removing this list of exclusions. Until you can actually find an interested party to comment, I think this is just churn for no useful improvement. If nothing else, calling it randomforced.config isn't appropriate given the explanation of what this target is used for... ~Andrew
Re: [PATCH v4 1/4] x86/spec-ctrl: add logic to issue IBPB on exit to guest
On 18.12.2023 16:43, Roger Pau Monné wrote: > On Mon, Dec 18, 2023 at 02:50:27PM +0100, Jan Beulich wrote: >> On 18.12.2023 14:46, Jan Beulich wrote: >>> On 18.12.2023 13:11, Roger Pau Monné wrote: Hello, I'm not as expert as Andrew in all the speculation stuff, but I will try to provide some feedback. On Tue, Feb 14, 2023 at 05:10:42PM +0100, Jan Beulich wrote: > In order to be able to defer the context switch IBPB to the last > possible point, add logic to the exit-to-guest paths to issue the > barrier there, including the "IBPB doesn't flush the RSB/RAS" > workaround. Since alternatives, for now at least, can't nest, emit JMP > to skip past both constructs where both are needed. This may be more > efficient anyway, as the sequence of NOPs is pretty long. Could you elaborate on the reason why deferring the IBPB to the exit to guest path is helpful? So far it just seem to make the logic more complex without nay justification (at least in the changelog). >>> >>> I've added "(to leave behind as little as possible)" ahead of the 1st >>> comma - is that sufficient, do you think? >> >> Actually, the next patch supplies better context, i.e. is more / also >> about avoiding to clobber Xen's own predictions. > > Right, that I got from next patch, but given context switch is already > a quite heavy operation, does avoiding the cleaning of the branch > predictor make that much of a difference? > > IMO it needs good justification given it's a change that makes the > logic harder to follow, so if it turns out there's no difference I > would rather leave the IBPB at context switch. As per another reply, I guess we want to discuss this with Andrew, so maybe a good thing to try to remember to put up on the next x86 meeting we're going to have. Jan
Re: [PATCH v4 1/4] x86/spec-ctrl: add logic to issue IBPB on exit to guest
On 18.12.2023 16:40, Roger Pau Monné wrote: > On Mon, Dec 18, 2023 at 02:46:37PM +0100, Jan Beulich wrote: >> On 18.12.2023 13:11, Roger Pau Monné wrote: >>> Hello, >>> >>> I'm not as expert as Andrew in all the speculation stuff, but I will >>> try to provide some feedback. >>> >>> On Tue, Feb 14, 2023 at 05:10:42PM +0100, Jan Beulich wrote: In order to be able to defer the context switch IBPB to the last possible point, add logic to the exit-to-guest paths to issue the barrier there, including the "IBPB doesn't flush the RSB/RAS" workaround. Since alternatives, for now at least, can't nest, emit JMP to skip past both constructs where both are needed. This may be more efficient anyway, as the sequence of NOPs is pretty long. >>> >>> Could you elaborate on the reason why deferring the IBPB to the exit >>> to guest path is helpful? So far it just seem to make the logic more >>> complex without nay justification (at least in the changelog). >> >> I've added "(to leave behind as little as possible)" ahead of the 1st >> comma - is that sufficient, do you think? > > Please bear with me, but I'm still uncertain. > > Even if IBRS is not enabled, and such indirect branch predictions are > at the same predictor mode, how would that be of any use to a guest? > My understanding was that the attacker is the one that has to control > the indirect branch predictor contents in order to attack the > hypervisor or other guests. Right; see my later reply. --- I have to admit that I'm not really certain about the placement of the IBPB wrt the MSR_SPEC_CTRL writes. For now I've simply used "opposite of entry". >>> >>> Maybe it would easier to just add the MSR_PRED_CMD PRED_CMD_IBPB write >>> to the vmcs MSR load list? >>> >>> It's a one-time only AFAICT, as you would only want to do this for >>> context-switch AFAICT. >> >> That would be a back and forth of adding and removing the MSR to/from >> that list then, which I'm not convinced is helpful. With these special >> MSRs I would further be uncertain as to their effect when used via one >> of these lists. > > Hm, we do seem to already use MSR_PRED_CMD with such lists, so I would > assume they work just fine. Ah, yes, I forgot about that. Still it would be a back and forth if we wanted the MSR on the list only after a context switch, but not anymore for later VM entry. Also iirc these lists are VMX-only? Jan > Anyway, was mostly a recommendation towards clarity, because I think > the return to guest context assembly is getting rather convoluted, and > it's IMO critical for it to be easy to follow. > > Thanks, Roger.
Re: [PATCH v4 1/4] x86/spec-ctrl: add logic to issue IBPB on exit to guest
On Mon, Dec 18, 2023 at 02:50:27PM +0100, Jan Beulich wrote: > On 18.12.2023 14:46, Jan Beulich wrote: > > On 18.12.2023 13:11, Roger Pau Monné wrote: > >> Hello, > >> > >> I'm not as expert as Andrew in all the speculation stuff, but I will > >> try to provide some feedback. > >> > >> On Tue, Feb 14, 2023 at 05:10:42PM +0100, Jan Beulich wrote: > >>> In order to be able to defer the context switch IBPB to the last > >>> possible point, add logic to the exit-to-guest paths to issue the > >>> barrier there, including the "IBPB doesn't flush the RSB/RAS" > >>> workaround. Since alternatives, for now at least, can't nest, emit JMP > >>> to skip past both constructs where both are needed. This may be more > >>> efficient anyway, as the sequence of NOPs is pretty long. > >> > >> Could you elaborate on the reason why deferring the IBPB to the exit > >> to guest path is helpful? So far it just seem to make the logic more > >> complex without nay justification (at least in the changelog). > > > > I've added "(to leave behind as little as possible)" ahead of the 1st > > comma - is that sufficient, do you think? > > Actually, the next patch supplies better context, i.e. is more / also > about avoiding to clobber Xen's own predictions. Right, that I got from next patch, but given context switch is already a quite heavy operation, does avoiding the cleaning of the branch predictor make that much of a difference? IMO it needs good justification given it's a change that makes the logic harder to follow, so if it turns out there's no difference I would rather leave the IBPB at context switch. Thanks, Roger.
Re: Possible bug in Xen
On 18/12/2023 3:34 pm, Joe Tretter wrote: > Hello, > > I discussed the below problem with the QubesOS team on Github > (https://github.com/QubesOS/qubes-issues/issues/4493) and they suggest > that this seems to be a problem with Xen, and suggested that I post it > to this e-mail address. > > I have problems restoring backups in QubesOS release 4.1.2 on one of > my machines. > Other users reported the issue too, but no QubesOS developer seemed to > be able to reproduce it, therefore nothing happened for a while and > the assumption has been that it's some sort of hardware problem. > > I analyzed the problem down to the "scrypt" tool > (https://www.tarsnap.com/scrypt.html) falsely complaining that the > password would be wrong. > I proceeded and re-compiled the "scrypt" tool in it's latest version > and found that the self-tests show intermittent erratic behavior. > > This issue only happens with QubesOS/Xen Kernel, if I boot into a > kernel without Xen, the problem can't be observed. > The problem can only be observed on one of my machines which is a Dell > Inspiron 5675 with an AMD Ryzen 7 1700 Eight-Core Processor. > > I hope you can help with this issue. So the issue is specific to your AMD Zen1 system, and only when Xen is in the mix. Can you try booting Xen with spec-ctrl=eager-fpu and see if that changes the behaviour ? ~Andrew
Re: [PATCH v4 1/4] x86/spec-ctrl: add logic to issue IBPB on exit to guest
On Mon, Dec 18, 2023 at 02:46:37PM +0100, Jan Beulich wrote: > On 18.12.2023 13:11, Roger Pau Monné wrote: > > Hello, > > > > I'm not as expert as Andrew in all the speculation stuff, but I will > > try to provide some feedback. > > > > On Tue, Feb 14, 2023 at 05:10:42PM +0100, Jan Beulich wrote: > >> In order to be able to defer the context switch IBPB to the last > >> possible point, add logic to the exit-to-guest paths to issue the > >> barrier there, including the "IBPB doesn't flush the RSB/RAS" > >> workaround. Since alternatives, for now at least, can't nest, emit JMP > >> to skip past both constructs where both are needed. This may be more > >> efficient anyway, as the sequence of NOPs is pretty long. > > > > Could you elaborate on the reason why deferring the IBPB to the exit > > to guest path is helpful? So far it just seem to make the logic more > > complex without nay justification (at least in the changelog). > > I've added "(to leave behind as little as possible)" ahead of the 1st > comma - is that sufficient, do you think? Please bear with me, but I'm still uncertain. Even if IBRS is not enabled, and such indirect branch predictions are at the same predictor mode, how would that be of any use to a guest? My understanding was that the attacker is the one that has to control the indirect branch predictor contents in order to attack the hypervisor or other guests. > >> --- > >> I have to admit that I'm not really certain about the placement of the > >> IBPB wrt the MSR_SPEC_CTRL writes. For now I've simply used "opposite of > >> entry". > > > > Maybe it would easier to just add the MSR_PRED_CMD PRED_CMD_IBPB write > > to the vmcs MSR load list? > > > > It's a one-time only AFAICT, as you would only want to do this for > > context-switch AFAICT. > > That would be a back and forth of adding and removing the MSR to/from > that list then, which I'm not convinced is helpful. With these special > MSRs I would further be uncertain as to their effect when used via one > of these lists. Hm, we do seem to already use MSR_PRED_CMD with such lists, so I would assume they work just fine. Anyway, was mostly a recommendation towards clarity, because I think the return to guest context assembly is getting rather convoluted, and it's IMO critical for it to be easy to follow. Thanks, Roger.
Re: [PATCH v2 10/14] aio: remove aio_context_acquire()/aio_context_release() API
Am 05.12.2023 um 19:20 hat Stefan Hajnoczi geschrieben: > Delete these functions because nothing calls these functions anymore. > > I introduced these APIs in commit 98563fc3ec44 ("aio: add > aio_context_acquire() and aio_context_release()") in 2014. It's with a > sigh of relief that I delete these APIs almost 10 years later. > > Thanks to Paolo Bonzini's vision for multi-queue QEMU, we got an > understanding of where the code needed to go in order to remove the > limitations that the original dataplane and the IOThread/AioContext > approach that followed it. > > Emanuele Giuseppe Esposito had the splendid determination to convert > large parts of the codebase so that they no longer needed the AioContext > lock. This was a painstaking process, both in the actual code changes > required and the iterations of code review that Emanuele eked out of > Kevin and me over many months. > > Kevin Wolf tackled multitudes of graph locking conversions to protect > in-flight I/O from run-time changes to the block graph as well as the > clang Thread Safety Analysis annotations that allow the compiler to > check whether the graph lock is being used correctly. > > And me, well, I'm just here to add some pizzazz to the QEMU multi-queue > block layer :). Thank you to everyone who helped with this effort, > including Eric Blake, code reviewer extraordinaire, and others who I've > forgotten to mention. > > Signed-off-by: Stefan Hajnoczi > Reviewed-by: Eric Blake Reviewed-by: Kevin Wolf
Re: [PATCH v2 08/14] scsi: remove AioContext locking
Am 05.12.2023 um 19:20 hat Stefan Hajnoczi geschrieben: > The AioContext lock no longer has any effect. Remove it. > > Signed-off-by: Stefan Hajnoczi > Reviewed-by: Eric Blake Reviewed-by: Kevin Wolf
Re: [PATCH v2 09/14] aio-wait: draw equivalence between AIO_WAIT_WHILE() and AIO_WAIT_WHILE_UNLOCKED()
Am 05.12.2023 um 19:20 hat Stefan Hajnoczi geschrieben: > Now that the AioContext lock no longer exists, AIO_WAIT_WHILE() and > AIO_WAIT_WHILE_UNLOCKED() are equivalent. > > A future patch will get rid of AIO_WAIT_WHILE_UNLOCKED(). > > Signed-off-by: Stefan Hajnoczi > Reviewed-by: Eric Blake Reviewed-by: Kevin Wolf
Re: [PATCH v2 07/14] block: remove bdrv_co_lock()
Am 05.12.2023 um 19:20 hat Stefan Hajnoczi geschrieben: > The bdrv_co_lock() and bdrv_co_unlock() functions are already no-ops. > Remove them. > > Signed-off-by: Stefan Hajnoczi Reviewed-by: Kevin Wolf
Re: [PATCH v2 02/14] scsi: assert that callbacks run in the correct AioContext
Am 05.12.2023 um 19:19 hat Stefan Hajnoczi geschrieben: > Since the removal of AioContext locking, the correctness of the code > relies on running requests from a single AioContext at any given time. > > Add assertions that verify that callbacks are invoked in the correct > AioContext. > > Signed-off-by: Stefan Hajnoczi Reviewed-by: Kevin Wolf
Re: [PATCH v2 38/39] xen/riscv: enable full Xen build
On 24.11.2023 11:30, Oleksii Kurochko wrote: > Signed-off-by: Oleksii Kurochko Reviewed-by: Jan Beulich with ... > --- a/xen/arch/riscv/configs/tiny64_defconfig > +++ b/xen/arch/riscv/configs/tiny64_defconfig > @@ -24,7 +24,6 @@ > # CONFIG_COVERAGE is not set > # CONFIG_UBSAN is not set > # CONFIG_NEEDS_LIBELF is not set > - > CONFIG_RISCV_64=y > CONFIG_DEBUG=y > CONFIG_DEBUG_INFO=y ... this unrelated (and perhaps even unhelpful) change dropped. Jan
Re: [PATCH] x86: don't open-code max_page calculation nor pfn_to_paddr()
On 18/12/2023 3:13 pm, Jan Beulich wrote: > As observed by Roger while reviewing a somewhat related change, there's > no need here either to open-code the (largely, i.e. once setup_max_pdx() > was called) fixed relationship between max_pdx and max_page. Further we > can avoid open-coding pfn_to_paddr() here. > > Signed-off-by: Jan Beulich Acked-by: Andrew Cooper
Re: [PATCH v4 3/4] x86: limit issuing of IBPB during context switch
On Tue, Feb 14, 2023 at 05:11:40PM +0100, Jan Beulich wrote: > When the outgoing vCPU had IBPB issued and RSB overwritten upon entering > Xen, then there's no need for a 2nd barrier during context switch. > > Note that SCF_entry_ibpb is always clear for the idle domain, so no > explicit idle domain check is needed to augment the feature check > (which is simply inapplicable to "idle"). > > Signed-off-by: Jan Beulich Acked-by: Roger Pau Monné > --- > v4: Tighten the condition. > v3: Fold into series. > --- > I think in principle we could limit the impact from finding the idle > domain as "prevd", by having __context_switch() tell us what kind > domain's vCPU was switched out (it could still be "idle", but in fewer > cases). > > --- a/xen/arch/x86/domain.c > +++ b/xen/arch/x86/domain.c > @@ -2005,17 +2005,26 @@ void context_switch(struct vcpu *prev, s > } > else > { > +unsigned int feat_sc_rsb = X86_FEATURE_SC_RSB_HVM; > + > __context_switch(); > > /* Re-enable interrupts before restoring state which may fault. */ > local_irq_enable(); > > if ( is_pv_domain(nextd) ) > +{ > load_segments(next); > > +feat_sc_rsb = X86_FEATURE_SC_RSB_PV; > +} > + > ctxt_switch_levelling(next); > > -if ( opt_ibpb_ctxt_switch && !is_idle_domain(nextd) ) > +if ( opt_ibpb_ctxt_switch && !is_idle_domain(nextd) && > + (!(prevd->arch.spec_ctrl_flags & SCF_entry_ibpb) || > + /* is_idle_domain(prevd) || */ I would rather add a comment to note that the idle domain always has SCF_entry_ibpb clear, rather than leaving this commented check in the condition. > + !boot_cpu_has(feat_sc_rsb)) ) I do wonder if it would be more fail safe (and easier to expand going forward) if we introduce a new cpu_info field to track the CPU state: relevant here would be whether RSB has been overwritten and IBPB executed. Such state would be cleared on each return from guest path. Thanks, Roger.
[PATCH] x86: don't open-code max_page calculation nor pfn_to_paddr()
As observed by Roger while reviewing a somewhat related change, there's no need here either to open-code the (largely, i.e. once setup_max_pdx() was called) fixed relationship between max_pdx and max_page. Further we can avoid open-coding pfn_to_paddr() here. Signed-off-by: Jan Beulich --- a/xen/arch/x86/setup.c +++ b/xen/arch/x86/setup.c @@ -1569,7 +1569,7 @@ void asmlinkage __init noreturn __start_ continue; } map_e = e; -e = (pdx_to_pfn(max_pdx - 1) + 1ULL) << PAGE_SHIFT; +e = pfn_to_paddr(max_page); printk(XENLOG_WARNING "Ignoring inaccessible memory range" " %013"PRIx64"-%013"PRIx64"\n", e, map_e);
[XEN PATCH v2] xen: move declaration of first_valid_mfn to xen/numa.h
Such declaration is moved in order to provide it for Arm and PPC, whilst not violating MISRA C:2012 Rule 8.4 in common/page_alloc.c: "A compatible declaration shall be visible when an object or function with external linkage is defined". Signed-off-by: Julien Grall Signed-off-by: Nicola Vetrini --- Changes in v2: This patch is a rework of a previous one appeared in this series [1], of which patches 1 and 2 have been committed already. The updated patch was provided by Julien in this thread [2]. I added the commit message and the rest of the information. [1] https://lore.kernel.org/xen-devel/cover.1702285639.git.nicola.vetr...@bugseng.com/T/#mee6def855787d932fe2f10d5648d437dcb6f046c [2] https://lore.kernel.org/xen-devel/cover.1702285639.git.nicola.vetr...@bugseng.com/T/#m3c5b141b806530b5920bb5e9dd53631195560317 --- xen/arch/arm/include/asm/numa.h | 6 -- xen/arch/ppc/include/asm/numa.h | 6 -- xen/common/page_alloc.c | 6 -- xen/include/xen/numa.h | 2 ++ 4 files changed, 6 insertions(+), 14 deletions(-) diff --git a/xen/arch/arm/include/asm/numa.h b/xen/arch/arm/include/asm/numa.h index e2bee2bd8223..a2c1da4a82f7 100644 --- a/xen/arch/arm/include/asm/numa.h +++ b/xen/arch/arm/include/asm/numa.h @@ -11,12 +11,6 @@ typedef u8 nodeid_t; #define cpu_to_node(cpu) 0 #define node_to_cpumask(node) (cpu_online_map) -/* - * TODO: make first_valid_mfn static when NUMA is supported on Arm, this - * is required because the dummy helpers are using it. - */ -extern mfn_t first_valid_mfn; - /* XXX: implement NUMA support */ #define node_spanned_pages(nid) (max_page - mfn_x(first_valid_mfn)) #define node_start_pfn(nid) (mfn_x(first_valid_mfn)) diff --git a/xen/arch/ppc/include/asm/numa.h b/xen/arch/ppc/include/asm/numa.h index 7fdf66c3da74..204180ad5b98 100644 --- a/xen/arch/ppc/include/asm/numa.h +++ b/xen/arch/ppc/include/asm/numa.h @@ -10,12 +10,6 @@ typedef uint8_t nodeid_t; #define cpu_to_node(cpu) 0 #define node_to_cpumask(node) (cpu_online_map) -/* - * TODO: make first_valid_mfn static when NUMA is supported on PPC, this - * is required because the dummy helpers are using it. - */ -extern mfn_t first_valid_mfn; - /* XXX: implement NUMA support */ #define node_spanned_pages(nid) (max_page - mfn_x(first_valid_mfn)) #define node_start_pfn(nid) (mfn_x(first_valid_mfn)) diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c index 9b5df74fddab..d874525916ea 100644 --- a/xen/common/page_alloc.c +++ b/xen/common/page_alloc.c @@ -255,8 +255,10 @@ static PAGE_LIST_HEAD(page_broken_list); */ /* - * first_valid_mfn is exported because it is use in ARM specific NUMA - * helpers. See comment in arch/arm/include/asm/numa.h. + * first_valid_mfn is exported because it is used when !CONFIG_NUMA. + * + * TODO: Consider if we can conditionally export first_valid_mfn based + * on whether NUMA is selected. */ mfn_t first_valid_mfn = INVALID_MFN_INITIALIZER; diff --git a/xen/include/xen/numa.h b/xen/include/xen/numa.h index 287e81ff..a10d4b1778a0 100644 --- a/xen/include/xen/numa.h +++ b/xen/include/xen/numa.h @@ -108,6 +108,8 @@ extern void numa_set_processor_nodes_parsed(nodeid_t node); #else +extern mfn_t first_valid_mfn; + static inline nodeid_t mfn_to_nid(mfn_t mfn) { return 0; -- 2.34.1
Re: [PATCH 4/9] ACPI: address violations of MISRA C:2012 Rule 11.8
On 14/12/23 17:36, Jan Beulich wrote: On 14.12.2023 13:07, Simone Ballarin wrote: --- a/xen/include/acpi/acmacros.h +++ b/xen/include/acpi/acmacros.h @@ -116,7 +116,7 @@ #define ACPI_PTR_TO_PHYSADDR(i) ACPI_TO_INTEGER(i) #ifndef ACPI_MISALIGNMENT_NOT_SUPPORTED -#define ACPI_COMPARE_NAME(a,b) (*ACPI_CAST_PTR (u32,(a)) == *ACPI_CAST_PTR (u32,(b))) +#define ACPI_COMPARE_NAME(a,b) (*ACPI_CAST_PTR (const u32,(a)) == *ACPI_CAST_PTR (const u32,(b))) Hmm, I'm a little hesitant to take changes to this header. We've inherited it from Linux, who in turn inherited / imported it from ACPI CA. #else #define ACPI_COMPARE_NAME(a,b) (!ACPI_STRNCMP (ACPI_CAST_PTR (char,(a)), ACPI_CAST_PTR (char,(b)), ACPI_NAME_SIZE)) #endif What about this alternative code, btw? Jan If the file is not supposed to be changed, I would consider adding it in docs/misra/exclude-list*. For the moment, I think it is better to drop the change. -- Simone Ballarin, M.Sc. Field Application Engineer, BUGSENG (https://bugseng.com)
Re: [PATCH] x86: allow non-BIGMEM configs to boot on >= 16Tb systems
On 18.12.2023 12:13, Roger Pau Monné wrote: > On Mon, Dec 18, 2023 at 09:26:24AM +0100, Jan Beulich wrote: >> On 15.12.2023 15:54, Roger Pau Monné wrote: >>> On Wed, Jun 07, 2023 at 08:17:30AM +0200, Jan Beulich wrote: While frame table setup, directmap init, and boot allocator population respect all intended bounds, the logic passing memory to the heap allocator which wasn't passed to the boot allocator fails to respect max_{pdx,pfn}. This then typically triggers the BUG() in free_heap_pages() after checking page state, because of hitting a struct page_info instance which was set to all ~0. Of course all the memory above the 16Tb boundary is still going to remain unused; using it requires BIGMEM=y. And of course this fix similarly ought to help BIGMEM=y configurations on >= 123Tb systems (where all the memory beyond that boundary continues to be unused). Fixes: bac263ba ("x86-64: reduce range spanned by 1:1 mapping and frame table indexes") Signed-off-by: Jan Beulich >>> >>> Acked-by: Roger Pau Monné >> >> Thanks. >> --- a/xen/arch/x86/setup.c +++ b/xen/arch/x86/setup.c @@ -1722,15 +1722,16 @@ void __init noreturn __start_xen(unsigne if ( max_page - 1 > virt_to_mfn(HYPERVISOR_VIRT_END - 1) ) { -unsigned long limit = virt_to_mfn(HYPERVISOR_VIRT_END - 1); +unsigned long lo = virt_to_mfn(HYPERVISOR_VIRT_END - 1); +unsigned long hi = pdx_to_pfn(max_pdx - 1) + 1; >>> >>> Maybe use max_page to avoid the pdx_to_pfn() call? (And is also more >>> in context with the condition on the outside if). >> >> You mean >> >> unsigned long hi = min(pdx_to_pfn(max_pdx - 1) + 1, max_page); >> >> ? I could switch to that, yes. I wouldn't feel well switching to using >> just max_page, especially with me having nowhere to (reasonably) test. > > Isn't max_page derived from max_pdx (see setup_max_pdx()), and > hence we could avoid the pdx_to_pfn() conversion by just using it? > > max_page = pdx_to_pfn(max_pdx - 1) + 1; > > So hi == max_page in your proposed code. > > Maybe there are further restrictions applied to max_pdx that are not > propagated into max_page, the meaning of all those variables is very > opaque, and hard to follow in the source code. Looking more closely, the two appear to be properly in sync once setup_max_pdx() was called the first time. I guess I was in part mislead by e = (pdx_to_pfn(max_pdx - 1) + 1ULL) << PAGE_SHIFT; just a few lines past an update to both variables. I'll switch to max_page here, and I may also make a patch to tidy the line quoted above. Jan
[PATCH v2 3/3] x86: detect PIT aliasing on ports other than 0x4[0-3]
... in order to also deny Dom0 access through the alias ports. Without this it is only giving the impression of denying access to PIT. Unlike for CMOS/RTC, do detection pretty early, to avoid disturbing normal operation later on (even if typically we won't use much of the PIT). Like for CMOS/RTC a fundamental assumption of the probing is that reads from the probed alias port won't have side effects (beyond such that PIT reads have anyway) in case it does not alias the PIT's. At to the port 0x61 accesses: Unlike other accesses we do, this masks off the top four bits (in addition to the bottom two ones), following Intel chipset documentation saying that these (read-only) bits should only be written with zero. Signed-off-by: Jan Beulich --- If Xen was running on top of another instance of itself (in HVM mode, not PVH, i.e. not as a shim), prior to 14f42af3f52d ('x86/vPIT: account for "counter stopped" time') I'm afraid our vPIT logic would not have allowed the "Try to further make sure ..." check to pass in the Xen running on top: We don't respect the gate bit being clear when handling counter reads. (There are more unhandled [and unmentioned as being so] aspects of PIT behavior though, yet it's unclear in how far addressing at least some of them would be useful.) --- v2: Use new command line option. Re-base over changes to earlier patches. Use ISOLATE_LSB(). --- a/xen/arch/x86/dom0_build.c +++ b/xen/arch/x86/dom0_build.c @@ -495,7 +495,11 @@ int __init dom0_setup_permissions(struct rc |= ioports_deny_access(d, 0x4D0, 0x4D1); /* Interval Timer (PIT). */ -rc |= ioports_deny_access(d, 0x40, 0x43); +for ( offs = 0, i = ISOLATE_LSB(pit_alias_mask) ?: 4; + offs <= pit_alias_mask; offs += i ) +if ( !(offs & ~pit_alias_mask) ) +rc |= ioports_deny_access(d, 0x40 + offs, 0x43 + offs); + /* PIT Channel 2 / PC Speaker Control. */ rc |= ioports_deny_access(d, 0x61, 0x61); --- a/xen/arch/x86/include/asm/setup.h +++ b/xen/arch/x86/include/asm/setup.h @@ -47,6 +47,7 @@ extern unsigned long highmem_start; #endif extern unsigned int i8259A_alias_mask; +extern unsigned int pit_alias_mask; extern int8_t opt_smt; extern int8_t opt_probe_port_aliases; --- a/xen/arch/x86/time.c +++ b/xen/arch/x86/time.c @@ -425,6 +425,72 @@ static struct platform_timesource __init .resume = resume_pit, }; +unsigned int __initdata pit_alias_mask; + +static void __init probe_pit_alias(void) +{ +unsigned int mask = 0x1c; +uint8_t val = 0; + +if ( !opt_probe_port_aliases ) +return; + +/* + * Use channel 2 in mode 0 for probing. In this mode even a non-initial + * count is loaded independent of counting being / becoming enabled. Thus + * we have a 16-bit value fully under our control, to write and then check + * whether we can also read it back unaltered. + */ + +/* Turn off speaker output and disable channel 2 counting. */ +outb(inb(0x61) & 0x0c, 0x61); + +outb((2 << 6) | (3 << 4) | (0 << 1), PIT_MODE); /* Mode 0, LSB/MSB. */ + +do { +uint8_t val2; +unsigned int offs; + +outb(val, PIT_CH2); +outb(val ^ 0xff, PIT_CH2); + +/* Wait for the Null Count bit to clear. */ +do { +/* Latch status. */ +outb((3 << 6) | (1 << 5) | (1 << 3), PIT_MODE); + +/* Try to make sure we're actually having a PIT here. */ +val2 = inb(PIT_CH2); +if ( (val2 & ~(3 << 6)) != ((3 << 4) | (0 << 1)) ) +return; +} while ( val2 & (1 << 6) ); + +/* + * Try to further make sure we're actually having a PIT here. + * + * NB: Deliberately |, not ||, as we always want both reads. + */ +val2 = inb(PIT_CH2); +if ( (val2 ^ val) | (inb(PIT_CH2) ^ val ^ 0xff) ) +return; + +for ( offs = ISOLATE_LSB(mask); offs <= mask; offs <<= 1 ) +{ +if ( !(mask & offs) ) +continue; +val2 = inb(PIT_CH2 + offs); +if ( (val2 ^ val) | (inb(PIT_CH2 + offs) ^ val ^ 0xff) ) +mask &= ~offs; +} +} while ( mask && (val += 0x0b) ); /* Arbitrary uneven number. */ + +if ( mask ) +{ +dprintk(XENLOG_INFO, "PIT aliasing mask: %02x\n", mask); +pit_alias_mask = mask; +} +} + / * PLATFORM TIMER 2: HIGH PRECISION EVENT TIMER (HPET) */ @@ -2414,6 +2480,8 @@ void __init early_time_init(void) } preinit_pit(); +probe_pit_alias(); + tmp = init_platform_timer(); plt_tsc.frequency = tmp;
[PATCH v2 2/3] x86: detect PIC aliasing on ports other than 0x[2A][01]
... in order to also deny Dom0 access through the alias ports. Without this it is only giving the impression of denying access to both PICs. Unlike for CMOS/RTC, do detection very early, to avoid disturbing normal operation later on. Like for CMOS/RTC a fundamental assumption of the probing is that reads from the probed alias port won't have side effects in case it does not alias the respective PIC's one. Signed-off-by: Jan Beulich --- v2: Use new command line option. s/pic/8252A/. Re-base over new earlier patch. Use ISOLATE_LSB(). --- a/xen/arch/x86/dom0_build.c +++ b/xen/arch/x86/dom0_build.c @@ -467,7 +467,7 @@ static void __init process_dom0_ioports_ int __init dom0_setup_permissions(struct domain *d) { unsigned long mfn; -unsigned int i; +unsigned int i, offs; int rc; if ( pv_shim ) @@ -480,10 +480,16 @@ int __init dom0_setup_permissions(struct /* Modify I/O port access permissions. */ -/* Master Interrupt Controller (PIC). */ -rc |= ioports_deny_access(d, 0x20, 0x21); -/* Slave Interrupt Controller (PIC). */ -rc |= ioports_deny_access(d, 0xA0, 0xA1); +for ( offs = 0, i = ISOLATE_LSB(i8259A_alias_mask) ?: 2; + offs <= i8259A_alias_mask; offs += i ) +{ +if ( offs & ~i8259A_alias_mask ) +continue; +/* Master Interrupt Controller (PIC). */ +rc |= ioports_deny_access(d, 0x20 + offs, 0x21 + offs); +/* Slave Interrupt Controller (PIC). */ +rc |= ioports_deny_access(d, 0xA0 + offs, 0xA1 + offs); +} /* ELCR of both PICs. */ rc |= ioports_deny_access(d, 0x4D0, 0x4D1); --- a/xen/arch/x86/i8259.c +++ b/xen/arch/x86/i8259.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -333,6 +334,58 @@ void __init make_8259A_irq(unsigned int irq_to_desc(irq)->handler = _irq_type; } +unsigned int __initdata i8259A_alias_mask; + +static void __init probe_8259A_alias(void) +{ +unsigned int mask = 0x1e; +uint8_t val = 0; + +if ( !opt_probe_port_aliases ) +return; + +/* + * The only properly r/w register is OCW1. While keeping the master + * fully masked (thus also masking anything coming through the slave), + * write all possible 256 values to the slave's base port, and check + * whether the same value can then be read back through any of the + * possible alias ports. Probing just the slave of course builds on the + * assumption that aliasing is identical for master and slave. + */ + +outb(0xff, 0x21); /* Fully mask master. */ + +do { +unsigned int offs; + +outb(val, 0xa1); + +/* Try to make sure we're actually having a PIC here. */ +if ( inb(0xa1) != val ) +{ +mask = 0; +break; +} + +for ( offs = ISOLATE_LSB(mask); offs <= mask; offs <<= 1 ) +{ +if ( !(mask & offs) ) +continue; +if ( inb(0xa1 + offs) != val ) +mask &= ~offs; +} +} while ( mask && (val += 0x0d) ); /* Arbitrary uneven number. */ + +outb(cached_A1, 0xa1); /* Restore slave IRQ mask. */ +outb(cached_21, 0x21); /* Restore master IRQ mask. */ + +if ( mask ) +{ +dprintk(XENLOG_INFO, "PIC aliasing mask: %02x\n", mask); +i8259A_alias_mask = mask; +} +} + static struct irqaction __read_mostly cascade = { no_action, "cascade", NULL}; void __init init_IRQ(void) @@ -343,6 +396,8 @@ void __init init_IRQ(void) init_8259A(0); +probe_8259A_alias(); + for (irq = 0; platform_legacy_irq(irq); irq++) { struct irq_desc *desc = irq_to_desc(irq); --- a/xen/arch/x86/include/asm/setup.h +++ b/xen/arch/x86/include/asm/setup.h @@ -46,6 +46,8 @@ extern uint8_t kbd_shift_flags; extern unsigned long highmem_start; #endif +extern unsigned int i8259A_alias_mask; + extern int8_t opt_smt; extern int8_t opt_probe_port_aliases;
[PATCH v2 1/3] x86: allow to suppress port-alias probing
By default there's already no use for this when we run in shim mode. Plus there may also be a need to suppress the probing in case of issues with it. Before introducing further port alias probing, introduce a command line option allowing to bypass it, default it to on when in shim mode, and gate RTC/CMOS port alias probing on it. Requested-by: Roger Pau Monné Signed-off-by: Jan Beulich --- While "probe-port-aliases" is longish, shorter forms (e.g. "port-probe") partially lose the intended meaning. --- v2: New. --- a/docs/misc/xen-command-line.pandoc +++ b/docs/misc/xen-command-line.pandoc @@ -2000,6 +2000,17 @@ INVPCID is supported and not disabled vi This is a mask of C-states which are to be used preferably. This option is applicable only on hardware were certain C-states are exclusive of one another. +### probe-port-aliases (x86) +> `= ` + +> Default: `true` outside of shim mode, `false` in shim mode + +Certain devices accessible by I/O ports may be accessible also through "alias" +ports (originally a result of incomplete address decoding). When such devices +are solely under Xen's control, Xen disallows even Dom0 access to the "primary" +ports. When alias probing is active and aliases are detected, "alias" ports +would then be treated similar to the "primary" ones. + ### psr (Intel) > `= List of ( cmt: | rmid_max: | cat: | > cos_max: | cdp: )` --- a/xen/arch/x86/include/asm/setup.h +++ b/xen/arch/x86/include/asm/setup.h @@ -47,6 +47,7 @@ extern unsigned long highmem_start; #endif extern int8_t opt_smt; +extern int8_t opt_probe_port_aliases; #ifdef CONFIG_SHADOW_PAGING extern bool opt_dom0_shadow; --- a/xen/arch/x86/setup.c +++ b/xen/arch/x86/setup.c @@ -75,6 +75,9 @@ static bool __initdata opt_invpcid = tru boolean_param("invpcid", opt_invpcid); bool __read_mostly use_invpcid; +int8_t __initdata opt_probe_port_aliases = -1; +boolean_param("probe-port-aliases", opt_probe_port_aliases); + /* Only used in asm code and within this source file */ unsigned long asmlinkage __read_mostly cr4_pv32_mask; @@ -1844,6 +1847,9 @@ void asmlinkage __init noreturn __start_ /* Low mappings were only needed for some BIOS table parsing. */ zap_low_mappings(); +if ( opt_probe_port_aliases < 0 ) +opt_probe_port_aliases = !pv_shim; + init_apic_mappings(); normalise_cpu_order(); --- a/xen/arch/x86/time.c +++ b/xen/arch/x86/time.c @@ -1253,7 +1253,8 @@ static int __init cf_check probe_cmos_al { unsigned int offs; -if ( acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_CMOS_RTC ) +if ( (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_CMOS_RTC) || + !opt_probe_port_aliases ) return 0; for ( offs = 2; offs < 8; offs <<= 1 )
[PATCH v2 0/3] x86: Dom0 I/O port access permissions
Following on from the CMOS/RTC port aliasing change, there are some more missing restrictions; in particular there's more port aliasing to be aware of. But first of all introduce a command line option to allow suppressing this probing of aliases, as was requested. Of course an alternative to all of this would be to do away with all policy-only ioports_deny_access() in dom0_setup_permissions(), leaving in place only ones which are truly required for functionality reasons. 1: allow to suppress port-alias probing 2: detect PIC aliasing on ports other than 0x[2A][01] 3: detect PIT aliasing on ports other than 0x4[0-3] Jan
Re: [PATCH V8 00/12] fix migration of suspended runstate
On 12/18/2023 12:14 AM, Peter Xu wrote: > On Wed, Dec 13, 2023 at 10:35:33AM -0500, Steven Sistare wrote: >> Hi Peter, all have RB's, with all i's dotted and t's crossed - steve > > Yes this seems to be more migration related so maybe good candidate for a > pull from migration submodule. > > But since this is still solving a generic issue, I'm copying a few more > people from get_maintainers.pl that this series touches, just in case > they'll have something to say before dev cycle starts. The key aspects are summarized by the cover letter and the commit messages pasted below for the first 6 patches: https://lore.kernel.org/qemu-devel/1702481421-375368-1-git-send-email-steven.sist...@oracle.com --- [PATCH V8 00/12] fix migration of suspended runstate Migration of a guest in the suspended runstate is broken. The incoming migration code automatically tries to wake the guest, which is wrong; the guest should end migration in the same runstate it started. Further, after saving a snapshot in the suspended state and loading it, the vm_start fails. The runstate is RUNNING, but the guest is not. --- [PATCH V8 01/12] cpus: vm_was_suspended Add a state variable to remember if a vm previously transitioned into a suspended state. --- [PATCH V8 02/12] cpus: stop vm in suspended runstate Currently, a vm in the suspended state is not completely stopped. The VCPUs have been paused, but the cpu clock still runs, and runstate notifiers for the transition to stopped have not been called. This causes problems for live migration. Stale cpu timers_state is saved to the migration stream, causing time errors in the guest when it wakes from suspend, and state that would have been modified by runstate notifiers is wrong. Modify vm_stop to completely stop the vm if the current state is suspended, transition to RUN_STATE_PAUSED, and remember that the machine was suspended. Modify vm_start to restore the suspended state. This affects all callers of vm_stop and vm_start, notably, the qapi stop and cont commands. For example: (qemu) info status VM status: paused (suspended) (qemu) stop (qemu) info status VM status: paused (qemu) system_wakeup Error: Unable to wake up: guest is not in suspended state (qemu) cont (qemu) info status VM status: paused (suspended) (qemu) system_wakeup (qemu) info status VM status: running --- [PATCH V8 03/12] cpus: check running not RUN_STATE_RUNNING When a vm transitions from running to suspended, runstate notifiers are not called, so the notifiers still think the vm is running. Hence, when we call vm_start to restore the suspended state, we call vm_state_notify with running=1. However, some notifiers check for RUN_STATE_RUNNING. They must check the running boolean instead. No functional change. --- [PATCH V8 04/12] cpus: vm_resume Define the vm_resume helper, for use in subsequent patches. --- [PATCH V8 05/12] migration: propagate suspended runstate If the outgoing machine was previously suspended, propagate that to the incoming side via global_state, so a subsequent vm_start restores the suspended state. To maintain backward and forward compatibility, reclaim some space from the runstate member. --- [PATCH V8 06/12] migration: preserve suspended runstate A guest that is migrated in the suspended state automaticaly wakes and continues execution. This is wrong; the guest should end migration in the same state it started. The root cause is that the outgoing migration code automatically wakes the guest, then saves the RUNNING runstate in global_state_store(), hence the incoming migration code thinks the guest is running and continues the guest if autostart is true. On the outgoing side, delete the call to qemu_system_wakeup_request(). Now that vm_stop completely stops a vm in the suspended state (from the preceding patches), the existing call to vm_stop_force_state is sufficient to correctly migrate all vmstate. On the incoming side, call vm_start if the pre-migration state was running or suspended. For the latter, vm_start correctly restores the suspended state, and a future system_wakeup monitor request will cause the vm to resume running. ---
[PATCH v4 5/5] x86/vIRQ: split PCI link load state checking from actual loading
Move the checking into a check hook, and add checking of the padding fields as well. Signed-off-by: Jan Beulich --- v4: New. --- a/xen/arch/x86/hvm/irq.c +++ b/xen/arch/x86/hvm/irq.c @@ -749,6 +749,30 @@ static int cf_check irq_load_isa(struct return 0; } +static int cf_check irq_check_link(const struct domain *d, + hvm_domain_context_t *h) +{ +const struct hvm_hw_pci_link *pci_link = hvm_get_entry(PCI_LINK, h); +unsigned int link; + +if ( !pci_link ) +return -ENODATA; + +for ( link = 0; link < ARRAY_SIZE(pci_link->pad0); link++ ) +if ( pci_link->pad0[link] ) +return -EINVAL; + +for ( link = 0; link < ARRAY_SIZE(pci_link->route); link++ ) +if ( pci_link->route[link] > 15 ) +{ +printk(XENLOG_G_ERR + "HVM restore: PCI-ISA link %u out of range (%u)\n", + link, pci_link->route[link]); +return -EINVAL; +} + +return 0; +} static int cf_check irq_load_link(struct domain *d, hvm_domain_context_t *h) { @@ -759,16 +783,6 @@ static int cf_check irq_load_link(struct if ( hvm_load_entry(PCI_LINK, h, _irq->pci_link) != 0 ) return -EINVAL; -/* Sanity check */ -for ( link = 0; link < 4; link++ ) -if ( hvm_irq->pci_link.route[link] > 15 ) -{ -printk(XENLOG_G_ERR - "HVM restore: PCI-ISA link %u out of range (%u)\n", - link, hvm_irq->pci_link.route[link]); -return -EINVAL; -} - /* Adjust the GSI assert counts for the link outputs. * This relies on the PCI and ISA IRQ state being loaded first */ for ( link = 0; link < 4; link++ ) @@ -788,5 +802,5 @@ HVM_REGISTER_SAVE_RESTORE(PCI_IRQ, irq_s 1, HVMSR_PER_DOM); HVM_REGISTER_SAVE_RESTORE(ISA_IRQ, irq_save_isa, NULL, irq_load_isa, 1, HVMSR_PER_DOM); -HVM_REGISTER_SAVE_RESTORE(PCI_LINK, irq_save_link, NULL, irq_load_link, - 1, HVMSR_PER_DOM); +HVM_REGISTER_SAVE_RESTORE(PCI_LINK, irq_save_link, irq_check_link, + irq_load_link, 1, HVMSR_PER_DOM);
[PATCH v4 4/5] x86/vPIC: check values loaded from state save record
Loading is_master from the state save record can lead to out-of-bounds accesses via at least the two container_of() uses by vpic_domain() and __vpic_lock(). Make sure the value is consistent with the instance being loaded. For ->int_output (which for whatever reason isn't a 1-bit bitfield), besides bounds checking also take ->init_state into account. For ELCR follow what vpic_intercept_elcr_io()'s write path and vpic_reset() do, i.e. don't insist on the internal view of the value to be saved. Move the instance range check as well, leaving just an assertion in the load handler. Signed-off-by: Jan Beulich Reviewed-by: Roger Pau Monné --- v3: vpic_domain() fix and vpic_elcr_mask() adjustment split out. Re-base over rename in earlier patch. v2: Introduce separate checking function; switch to refusing to load bogus values. Re-base. --- a/xen/arch/x86/hvm/vpic.c +++ b/xen/arch/x86/hvm/vpic.c @@ -429,6 +429,38 @@ static int cf_check vpic_save(struct vcp return 0; } +static int cf_check vpic_check(const struct domain *d, hvm_domain_context_t *h) +{ +unsigned int inst = hvm_load_instance(h); +const struct hvm_hw_vpic *s; + +if ( !has_vpic(d) ) +return -ENODEV; + +/* Which PIC is this? */ +if ( inst >= ARRAY_SIZE(d->arch.hvm.vpic) ) +return -ENOENT; + +s = hvm_get_entry(PIC, h); +if ( !s ) +return -ENODATA; + +/* + * Check to-be-loaded values are within valid range, for them to represent + * actually reachable state. Uses of some of the values elsewhere assume + * this is the case. + */ +if ( s->int_output > 1 ) +return -EDOM; + +if ( s->is_master != !inst || + (s->int_output && s->init_state) || + (s->elcr & ~vpic_elcr_mask(s, 1)) ) +return -EINVAL; + +return 0; +} + static int cf_check vpic_load(struct domain *d, hvm_domain_context_t *h) { struct hvm_hw_vpic *s; @@ -438,18 +470,21 @@ static int cf_check vpic_load(struct dom return -ENODEV; /* Which PIC is this? */ -if ( inst > 1 ) -return -ENOENT; +ASSERT(inst < ARRAY_SIZE(d->arch.hvm.vpic)); s = >arch.hvm.vpic[inst]; /* Load the state */ if ( hvm_load_entry(PIC, h, s) != 0 ) return -EINVAL; +if ( s->is_master ) +s->elcr |= 1 << 2; + return 0; } -HVM_REGISTER_SAVE_RESTORE(PIC, vpic_save, NULL, vpic_load, 2, HVMSR_PER_DOM); +HVM_REGISTER_SAVE_RESTORE(PIC, vpic_save, vpic_check, vpic_load, 2, + HVMSR_PER_DOM); void vpic_reset(struct domain *d) {
[PATCH v4 3/5] x86/vPIT: check values loaded from state save record
In particular pit_latch_status() and speaker_ioport_read() perform calculations which assume in-bounds values. Several of the state save record fields can hold wider ranges, though. Refuse to load values which cannot result from normal operation, except mode, the init state of which (see also below) cannot otherwise be reached. Note that ->gate should only be possible to be zero for channel 2; enforce that as well. Adjust pit_reset()'s writing of ->mode as well, to not unduly affect the value pit_latch_status() may calculate. The chosen mode of 7 is still one which cannot be established by writing the control word. Note that with or without this adjustment effectively all switch() statements using mode as the control expression aren't quite right when the PIT is still in that init state; there is an apparent assumption that before these can sensibly be invoked, the guest would init the PIT (i.e. in particular set the mode). Signed-off-by: Jan Beulich Reviewed-by: Roger Pau Monné --- For mode we could refuse to load values in the [0x08,0xfe] range; I'm not certain that's going to be overly helpful. For count I was considering to clip the saved value to 16 bits (i.e. to convert the internally used 0x1 back to the architectural 0x), but pit_save() doesn't easily lend itself to such a "fixup". If desired perhaps better a separate change anyway. --- v3: Slightly adjust two comments. Re-base over rename in earlier patch. v2: Introduce separate checking function; switch to refusing to load bogus values. Re-base. --- a/xen/arch/x86/emul-i8254.c +++ b/xen/arch/x86/emul-i8254.c @@ -47,6 +47,7 @@ #define RW_STATE_MSB 2 #define RW_STATE_WORD0 3 #define RW_STATE_WORD1 4 +#define RW_STATE_NUM 5 #define get_guest_time(v) \ (is_hvm_vcpu(v) ? hvm_get_guest_time(v) : (u64)get_s_time()) @@ -427,6 +428,47 @@ static int cf_check pit_save(struct vcpu return rc; } +static int cf_check pit_check(const struct domain *d, hvm_domain_context_t *h) +{ +const struct hvm_hw_pit *hw; +unsigned int i; + +if ( !has_vpit(d) ) +return -ENODEV; + +hw = hvm_get_entry(PIT, h); +if ( !hw ) +return -ENODATA; + +/* + * Check to-be-loaded values are within valid range, for them to represent + * actually reachable state. Uses of some of the values elsewhere assume + * this is the case. Note that the channels' mode fields aren't checked; + * Xen prior to 4.19 might save them as 0xff. + */ +if ( hw->speaker_data_on > 1 || hw->pad0 ) +return -EDOM; + +for ( i = 0; i < ARRAY_SIZE(hw->channels); ++i ) +{ +const struct hvm_hw_pit_channel *ch = >channels[i]; + +if ( ch->count > 0x1 || + ch->count_latched >= RW_STATE_NUM || + ch->read_state >= RW_STATE_NUM || + ch->write_state >= RW_STATE_NUM || + ch->rw_mode > RW_STATE_WORD0 || + ch->gate > 1 || + ch->bcd > 1 ) +return -EDOM; + +if ( i != 2 && !ch->gate ) +return -EINVAL; +} + +return 0; +} + static int cf_check pit_load(struct domain *d, hvm_domain_context_t *h) { PITState *pit = domain_vpit(d); @@ -443,6 +485,14 @@ static int cf_check pit_load(struct doma goto out; } +for ( i = 0; i < ARRAY_SIZE(pit->hw.channels); ++i ) +{ +struct hvm_hw_pit_channel *ch = >hw.channels[i]; + +if ( (ch->mode &= 7) > 5 ) +ch->mode -= 4; +} + /* * Recreate platform timers from hardware state. There will be some * time jitter here, but the wall-clock will have jumped massively, so @@ -458,7 +508,7 @@ static int cf_check pit_load(struct doma return rc; } -HVM_REGISTER_SAVE_RESTORE(PIT, pit_save, NULL, pit_load, 1, HVMSR_PER_DOM); +HVM_REGISTER_SAVE_RESTORE(PIT, pit_save, pit_check, pit_load, 1, HVMSR_PER_DOM); #endif /* The intercept action for PIT DM retval: 0--not handled; 1--handled. */ @@ -575,7 +625,7 @@ void pit_reset(struct domain *d) for ( i = 0; i < 3; i++ ) { s = >hw.channels[i]; -s->mode = 0xff; /* the init mode */ +s->mode = 7; /* unreachable sentinel */ s->gate = (i != 2); pit_load_count(pit, i, 0); }
[PATCH v4 2/5] x86/HVM: adjust save/restore hook registration for optional check handler
Register NULL uniformly as a first step. Signed-off-by: Jan Beulich Acked-by: Andrew Cooper --- v2: New. --- a/xen/arch/x86/cpu/mcheck/vmce.c +++ b/xen/arch/x86/cpu/mcheck/vmce.c @@ -374,7 +374,7 @@ static int cf_check vmce_load_vcpu_ctxt( return err ?: vmce_restore_vcpu(v, ); } -HVM_REGISTER_SAVE_RESTORE(VMCE_VCPU, vmce_save_vcpu_ctxt, +HVM_REGISTER_SAVE_RESTORE(VMCE_VCPU, vmce_save_vcpu_ctxt, NULL, vmce_load_vcpu_ctxt, 1, HVMSR_PER_VCPU); #endif --- a/xen/arch/x86/emul-i8254.c +++ b/xen/arch/x86/emul-i8254.c @@ -458,7 +458,7 @@ static int cf_check pit_load(struct doma return rc; } -HVM_REGISTER_SAVE_RESTORE(PIT, pit_save, pit_load, 1, HVMSR_PER_DOM); +HVM_REGISTER_SAVE_RESTORE(PIT, pit_save, NULL, pit_load, 1, HVMSR_PER_DOM); #endif /* The intercept action for PIT DM retval: 0--not handled; 1--handled. */ --- a/xen/arch/x86/hvm/hpet.c +++ b/xen/arch/x86/hvm/hpet.c @@ -692,7 +692,7 @@ static int cf_check hpet_load(struct dom return 0; } -HVM_REGISTER_SAVE_RESTORE(HPET, hpet_save, hpet_load, 1, HVMSR_PER_DOM); +HVM_REGISTER_SAVE_RESTORE(HPET, hpet_save, NULL, hpet_load, 1, HVMSR_PER_DOM); static void hpet_set(HPETState *h) { --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -793,7 +793,7 @@ static int cf_check hvm_load_tsc_adjust( return 0; } -HVM_REGISTER_SAVE_RESTORE(TSC_ADJUST, hvm_save_tsc_adjust, +HVM_REGISTER_SAVE_RESTORE(TSC_ADJUST, hvm_save_tsc_adjust, NULL, hvm_load_tsc_adjust, 1, HVMSR_PER_VCPU); static int cf_check hvm_save_cpu_ctxt(struct vcpu *v, hvm_domain_context_t *h) @@ -1189,7 +1189,7 @@ static int cf_check hvm_load_cpu_ctxt(st return 0; } -HVM_REGISTER_SAVE_RESTORE(CPU, hvm_save_cpu_ctxt, hvm_load_cpu_ctxt, 1, +HVM_REGISTER_SAVE_RESTORE(CPU, hvm_save_cpu_ctxt, NULL, hvm_load_cpu_ctxt, 1, HVMSR_PER_VCPU); #define HVM_CPU_XSAVE_SIZE(xcr0) (offsetof(struct hvm_hw_cpu_xsave, \ @@ -1538,6 +1538,7 @@ static int __init cf_check hvm_register_ hvm_register_savevm(CPU_XSAVE_CODE, "CPU_XSAVE", hvm_save_cpu_xsave_states, +NULL, hvm_load_cpu_xsave_states, HVM_CPU_XSAVE_SIZE(xfeature_mask) + sizeof(struct hvm_save_descriptor), @@ -1546,6 +1547,7 @@ static int __init cf_check hvm_register_ hvm_register_savevm(CPU_MSR_CODE, "CPU_MSR", hvm_save_cpu_msrs, +NULL, hvm_load_cpu_msrs, HVM_CPU_MSR_SIZE(ARRAY_SIZE(msrs_to_send)) + sizeof(struct hvm_save_descriptor), --- a/xen/arch/x86/hvm/irq.c +++ b/xen/arch/x86/hvm/irq.c @@ -784,9 +784,9 @@ static int cf_check irq_load_link(struct return 0; } -HVM_REGISTER_SAVE_RESTORE(PCI_IRQ, irq_save_pci, irq_load_pci, +HVM_REGISTER_SAVE_RESTORE(PCI_IRQ, irq_save_pci, NULL, irq_load_pci, 1, HVMSR_PER_DOM); -HVM_REGISTER_SAVE_RESTORE(ISA_IRQ, irq_save_isa, irq_load_isa, +HVM_REGISTER_SAVE_RESTORE(ISA_IRQ, irq_save_isa, NULL, irq_load_isa, 1, HVMSR_PER_DOM); -HVM_REGISTER_SAVE_RESTORE(PCI_LINK, irq_save_link, irq_load_link, +HVM_REGISTER_SAVE_RESTORE(PCI_LINK, irq_save_link, NULL, irq_load_link, 1, HVMSR_PER_DOM); --- a/xen/arch/x86/hvm/mtrr.c +++ b/xen/arch/x86/hvm/mtrr.c @@ -773,7 +773,7 @@ static int cf_check hvm_load_mtrr_msr(st return 0; } -HVM_REGISTER_SAVE_RESTORE(MTRR, hvm_save_mtrr_msr, hvm_load_mtrr_msr, 1, +HVM_REGISTER_SAVE_RESTORE(MTRR, hvm_save_mtrr_msr, NULL, hvm_load_mtrr_msr, 1, HVMSR_PER_VCPU); void memory_type_changed(struct domain *d) --- a/xen/arch/x86/hvm/pmtimer.c +++ b/xen/arch/x86/hvm/pmtimer.c @@ -300,7 +300,7 @@ static int cf_check acpi_load(struct dom return 0; } -HVM_REGISTER_SAVE_RESTORE(PMTIMER, acpi_save, acpi_load, +HVM_REGISTER_SAVE_RESTORE(PMTIMER, acpi_save, NULL, acpi_load, 1, HVMSR_PER_DOM); int pmtimer_change_ioport(struct domain *d, uint64_t version) --- a/xen/arch/x86/hvm/rtc.c +++ b/xen/arch/x86/hvm/rtc.c @@ -797,7 +797,7 @@ static int cf_check rtc_load(struct doma return 0; } -HVM_REGISTER_SAVE_RESTORE(RTC, rtc_save, rtc_load, 1, HVMSR_PER_DOM); +HVM_REGISTER_SAVE_RESTORE(RTC, rtc_save, NULL, rtc_load, 1, HVMSR_PER_DOM); void rtc_reset(struct domain *d) { --- a/xen/arch/x86/hvm/save.c +++ b/xen/arch/x86/hvm/save.c @@ -88,6 +88,7 @@ static struct { void __init hvm_register_savevm(uint16_t typecode, const char *name, hvm_save_handler save_state, +hvm_check_handler check_state, hvm_load_handler load_state, size_t size, int kind) { @@
[PATCH v4 1/5] x86/HVM: split restore state checking from state loading
..., at least as reasonably feasible without making a check hook mandatory (in particular strict vs relaxed/zero-extend length checking can't be done early this way). Note that only one of the two uses of "real" hvm_load() is accompanied with a "checking" one. The other directly consumes hvm_save() output, which ought to be well-formed. This means that while input data related checks don't need repeating in the "load" function when already done by the "check" one (albeit assertions to this effect may be desirable), domain state related checks (e.g. has_xyz(d)) will be required in both places. Suggested-by: Roger Pau Monné Signed-off-by: Jan Beulich --- Now that this re-arranges hvm_load() anyway, wouldn't it be better to down the vCPU-s ahead of calling arch_hvm_load() (which is now easy to arrange for)? Do we really need all the copying involved in use of _hvm_read_entry() (backing hvm_load_entry()? Zero-extending loads are likely easier to handle that way, but for strict loads all we gain is a reduced risk of unaligned accesses (compared to simply pointing into h->data[]). --- v4: Fold hvm_check() into hvm_load(). v2: New. --- a/xen/arch/x86/domctl.c +++ b/xen/arch/x86/domctl.c @@ -379,8 +379,12 @@ long arch_do_domctl( if ( copy_from_guest(c.data, domctl->u.hvmcontext.buffer, c.size) != 0 ) goto sethvmcontext_out; +ret = hvm_load(d, false, ); +if ( ret ) +goto sethvmcontext_out; + domain_pause(d); -ret = hvm_load(d, ); +ret = hvm_load(d, true, ); domain_unpause(d); sethvmcontext_out: --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -5397,7 +5397,7 @@ int hvm_copy_context_and_params(struct d } c.cur = 0; -rc = hvm_load(dst, ); +rc = hvm_load(dst, true, ); out: vfree(c.data); --- a/xen/arch/x86/hvm/save.c +++ b/xen/arch/x86/hvm/save.c @@ -30,7 +30,8 @@ static void arch_hvm_save(struct domain d->arch.hvm.sync_tsc = rdtsc(); } -static int arch_hvm_load(struct domain *d, const struct hvm_save_header *hdr) +static int arch_hvm_check(const struct domain *d, + const struct hvm_save_header *hdr) { uint32_t eax, ebx, ecx, edx; @@ -55,6 +56,11 @@ static int arch_hvm_load(struct domain * "(%#"PRIx32") and restored on another (%#"PRIx32").\n", d->domain_id, hdr->cpuid, eax); +return 0; +} + +static void arch_hvm_load(struct domain *d, const struct hvm_save_header *hdr) +{ /* Restore guest's preferred TSC frequency. */ if ( hdr->gtsc_khz ) d->arch.tsc_khz = hdr->gtsc_khz; @@ -66,13 +72,12 @@ static int arch_hvm_load(struct domain * /* VGA state is not saved/restored, so we nobble the cache. */ d->arch.hvm.stdvga.cache = STDVGA_CACHE_DISABLED; - -return 0; } /* List of handlers for various HVM save and restore types */ static struct { hvm_save_handler save; +hvm_check_handler check; hvm_load_handler load; const char *name; size_t size; @@ -88,6 +93,7 @@ void __init hvm_register_savevm(uint16_t { ASSERT(typecode <= HVM_SAVE_CODE_MAX); ASSERT(hvm_sr_handlers[typecode].save == NULL); +ASSERT(hvm_sr_handlers[typecode].check == NULL); ASSERT(hvm_sr_handlers[typecode].load == NULL); hvm_sr_handlers[typecode].save = save_state; hvm_sr_handlers[typecode].load = load_state; @@ -275,12 +281,10 @@ int hvm_save(struct domain *d, hvm_domai return 0; } -int hvm_load(struct domain *d, hvm_domain_context_t *h) +int hvm_load(struct domain *d, bool real, hvm_domain_context_t *h) { const struct hvm_save_header *hdr; struct hvm_save_descriptor *desc; -hvm_load_handler handler; -struct vcpu *v; int rc; if ( d->is_dying ) @@ -291,50 +295,91 @@ int hvm_load(struct domain *d, hvm_domai if ( !hdr ) return -ENODATA; -rc = arch_hvm_load(d, hdr); -if ( rc ) -return rc; +rc = arch_hvm_check(d, hdr); +if ( real ) +{ +struct vcpu *v; + +ASSERT(!rc); +arch_hvm_load(d, hdr); -/* Down all the vcpus: we only re-enable the ones that had state saved. */ -for_each_vcpu(d, v) -if ( !test_and_set_bit(_VPF_down, >pause_flags) ) -vcpu_sleep_nosync(v); +/* + * Down all the vcpus: we only re-enable the ones that had state + * saved. + */ +for_each_vcpu(d, v) +if ( !test_and_set_bit(_VPF_down, >pause_flags) ) +vcpu_sleep_nosync(v); +} +else if ( rc ) +return rc; for ( ; ; ) { +const char *name; +hvm_load_handler load; + if ( h->size - h->cur < sizeof(struct hvm_save_descriptor) ) { /* Run out of data */ printk(XENLOG_G_ERR "HVM%d restore: save did not end with a null entry\n", d->domain_id); +ASSERT(!real);
[PATCH v4 0/5] x86/HVM: load state checking
With the request to convert bounding to actual refusal, and then doing so in new hooks, the two previously separate patches now need to be in a series, with infrastructure work done first. Clearly the checking in other load handlers could (and likely wants to be) moved to separate check handlers as well - one example of doing so is added anew in v4, the rest will want doing down the road. 1: HVM: split restore state checking from state loading 2: HVM: adjust save/restore hook registration for optional check handler 3: vPIT: check values loaded from state save record 4: vPIC: check values loaded from state save record 5: vIRQ: split PCI link load state checking from actual loading Jan
Re: [XEN PATCH] xen/arm: ffa: return fpi size from FFA_PARTITION_INFO_GET
Hi Jens, > On 13 Dec 2023, at 11:31, Jens Wiklander wrote: > > Until now has FFA_PARTITION_INFO_GET always returned zero in w3, but > FF-A v1.1 requires FFA_PARTITION_INFO_GET to return the size of each > partition information descriptor returned if > FFA_PARTITION_INFO_GET_COUNT_FLAG isn't set. > Good finding. > The SPMC queried with FFA_PARTITION_INFO_GET must also return the each > partition information descriptor returned so fix this by passing along > the same value. > > Fixes: caf6491e95a9 ("xen/arm: ffa: support guest FFA_PARTITION_INFO_GET") > Signed-off-by: Jens Wiklander Reviewed-by: Bertrand Marquis Cheers Bertrand > --- > xen/arch/arm/tee/ffa.c | 21 + > 1 file changed, 13 insertions(+), 8 deletions(-) > > diff --git a/xen/arch/arm/tee/ffa.c b/xen/arch/arm/tee/ffa.c > index 183528d13388..1d4e0a083006 100644 > --- a/xen/arch/arm/tee/ffa.c > +++ b/xen/arch/arm/tee/ffa.c > @@ -514,7 +514,7 @@ static int32_t ffa_rxtx_map(paddr_t tx_addr, paddr_t > rx_addr, > > static int32_t ffa_partition_info_get(uint32_t w1, uint32_t w2, uint32_t w3, > uint32_t w4, uint32_t w5, > - uint32_t *count) > + uint32_t *count, uint32_t *fpi_size) > { > const struct arm_smccc_1_2_regs arg = { > .a0 = FFA_PARTITION_INFO_GET, > @@ -531,7 +531,10 @@ static int32_t ffa_partition_info_get(uint32_t w1, > uint32_t w2, uint32_t w3, > > ret = get_ffa_ret_code(); > if ( !ret ) > +{ > *count = resp.a2; > +*fpi_size = resp.a3; > +} > > return ret; > } > @@ -784,7 +787,7 @@ static uint32_t handle_rxtx_unmap(void) > > static int32_t handle_partition_info_get(uint32_t w1, uint32_t w2, uint32_t > w3, > uint32_t w4, uint32_t w5, > - uint32_t *count) > + uint32_t *count, uint32_t *fpi_size) > { > int32_t ret = FFA_RET_DENIED; > struct domain *d = current->domain; > @@ -799,7 +802,7 @@ static int32_t handle_partition_info_get(uint32_t w1, > uint32_t w2, uint32_t w3, > */ > if ( w5 == FFA_PARTITION_INFO_GET_COUNT_FLAG && > ctx->guest_vers == FFA_VERSION_1_1 ) > -return ffa_partition_info_get(w1, w2, w3, w4, w5, count); > +return ffa_partition_info_get(w1, w2, w3, w4, w5, count, fpi_size); > if ( w5 ) > return FFA_RET_INVALID_PARAMETERS; > > @@ -812,7 +815,7 @@ static int32_t handle_partition_info_get(uint32_t w1, > uint32_t w2, uint32_t w3, > if ( !ctx->page_count || !ctx->rx_is_free ) > goto out; > spin_lock(_rx_buffer_lock); > -ret = ffa_partition_info_get(w1, w2, w3, w4, w5, count); > +ret = ffa_partition_info_get(w1, w2, w3, w4, w5, count, fpi_size); > if ( ret ) > goto out_rx_buf_unlock; > /* > @@ -842,7 +845,7 @@ static int32_t handle_partition_info_get(uint32_t w1, > uint32_t w2, uint32_t w3, > } > else > { > -size_t sz = *count * sizeof(struct ffa_partition_info_1_1); > +size_t sz = *count * *fpi_size; > > if ( ctx->page_count * FFA_PAGE_SIZE < sz ) > { > @@ -1409,6 +1412,7 @@ static bool ffa_handle_call(struct cpu_user_regs *regs) > uint32_t fid = get_user_reg(regs, 0); > struct domain *d = current->domain; > struct ffa_ctx *ctx = d->arch.tee; > +uint32_t fpi_size; > uint32_t count; > int e; > > @@ -1444,11 +1448,11 @@ static bool ffa_handle_call(struct cpu_user_regs > *regs) > get_user_reg(regs, 2), > get_user_reg(regs, 3), > get_user_reg(regs, 4), > - get_user_reg(regs, 5), ); > + get_user_reg(regs, 5), , > _size); > if ( e ) > set_regs_error(regs, e); > else > -set_regs_success(regs, count, 0); > +set_regs_success(regs, count, fpi_size); > return true; > case FFA_RX_RELEASE: > e = handle_rx_release(); > @@ -1629,10 +1633,11 @@ static bool init_subscribers(struct > ffa_partition_info_1_1 *fpi, uint16_t count) > static bool init_sps(void) > { > bool ret = false; > +uint32_t fpi_size; > uint32_t count; > int e; > > -e = ffa_partition_info_get(0, 0, 0, 0, 0, ); > +e = ffa_partition_info_get(0, 0, 0, 0, 0, , _size); > if ( e ) > { > printk(XENLOG_ERR "ffa: Failed to get list of SPs: %d\n", e); > -- > 2.34.1 >
Re: [PATCH 3/9] xen/efi: address violations of MISRA C:2012 Rule 11.8
On 14/12/23 17:32, Jan Beulich wrote: On 14.12.2023 13:07, Simone Ballarin wrote: --- a/xen/common/version.c +++ b/xen/common/version.c @@ -178,7 +178,7 @@ void __init xen_build_init(void) if ( [1] >= __note_gnu_build_id_end ) return; -sz = (void *)__note_gnu_build_id_end - (void *)n; +sz = (const void *)__note_gnu_build_id_end - (const void *)n; rc = xen_build_id_check(n, sz, _id_p, _id_len); How does this change fit the subject? I'm also inclined to ask that these casts be changed to unsigned long or uintptr_t. For the actual EFI change: Reviewed-by: Jan Beulich Jan Ok, v2 will use uintptr_t. -- Simone Ballarin, M.Sc. Field Application Engineer, BUGSENG (https://bugseng.com)
Re: [PATCH 0/9] xen: address violations of MISRA C:2012 Rule 11.8
On 14/12/23 13:36, Jan Beulich wrote: On 14.12.2023 13:07, Simone Ballarin wrote: From: Maria Celeste Cesario The xen sources contain violations of MISRA C:2012 Rule 11.8 whose headline states: "A conversion shall not remove any const, volatile or _Atomic qualification from the type pointed to by a pointer". This patch amends or removes casts that unnecessarily drop const and volatile qualifiers. Example: static always_inline bool int_##name(volatile void *p) { volatile uint32_t *ptr = (uint32_t *)p; /* Non-compliant */ volatile uint32_t *ptr = (volatile uint32_t *)p;/* Compliant, proposed change */ } Why would you further complicate things when here the cast can simply be dropped? Jan Of course, the example will be improved in v2. -- Simone Ballarin, M.Sc. Field Application Engineer, BUGSENG (https://bugseng.com)
xen | Successful pipeline for staging-4.17 | 949a4aad
Pipeline #177680 has passed! Project: xen ( https://gitlab.com/xen-project/xen ) Branch: staging-4.17 ( https://gitlab.com/xen-project/xen/-/commits/staging-4.17 ) Commit: 949a4aad ( https://gitlab.com/xen-project/xen/-/commit/949a4aad417621638231e4cf9f1b35e8e0328463 ) Commit Message: update Xen version to 4.17.3 Commit Author: Jan Beulich ( https://gitlab.com/jbeulich ) Pipeline #177680 ( https://gitlab.com/xen-project/xen/-/pipelines/177680 ) triggered by Ganis ( https://gitlab.com/ganis ) successfully completed 84 jobs in 2 stages. -- You're receiving this email because of your account on gitlab.com.
Re: [RFC PATCH v1 1/1] xen/Makefile: introduce ARCH_FIXED_CONFIG for randconfig
On Mon, 2023-12-11 at 16:56 +0100, Jan Beulich wrote: > On 07.12.2023 21:17, Andrew Cooper wrote: > > On 07/12/2023 5:03 pm, Oleksii Kurochko wrote: > > > ARCH_FIXED_CONFIG is required in the case of randconfig > > > and CI for configs that aren't ready or are not > > > supposed to be implemented for specific architecture. > > > These configs should always be disabled to prevent randconfig > > > related tests from failing. > > > > > > Signed-off-by: Oleksii Kurochko > > > --- > > > xen/Makefile | 5 - > > > 1 file changed, 4 insertions(+), 1 deletion(-) > > > > > > diff --git a/xen/Makefile b/xen/Makefile > > > index ca571103c8..8ae8fe1480 100644 > > > --- a/xen/Makefile > > > +++ b/xen/Makefile > > > @@ -336,11 +336,14 @@ ifeq ($(config-build),y) > > > # *config targets only - make sure prerequisites are updated, > > > and descend > > > # in tools/kconfig to make the *config target > > > > > > +ARCH_FORCED_CONFIG := > > > $(srctree)/arch/$(SRCARCH)/configs/randomforced.config > > > + > > > # Create a file for KCONFIG_ALLCONFIG which depends on the > > > environment. > > > # This will be use by kconfig targets > > > allyesconfig/allmodconfig/allnoconfig/randconfig > > > filechk_kconfig_allconfig = \ > > > $(if $(findstring n,$(XEN_HAS_CHECKPOLICY)), echo > > > 'CONFIG_XSM_FLASK_POLICY=n';) \ > > > - $(if $(KCONFIG_ALLCONFIG), cat $(KCONFIG_ALLCONFIG);) \ > > > + $(if $(KCONFIG_ALLCONFIG), cat $(KCONFIG_ALLCONFIG); \ > > > + $(if $(wildcard $(ARCH_FORCED_CONFIG)), cat > > > $(ARCH_FORCED_CONFIG);) ) \ > > > : > > > > > > .allconfig.tmp: FORCE > > > > We already have infrastructure for this. What's wrong with > > EXTRA_FIXED_RANDCONFIG? > > What I don't understand here is why dealing with the issue would want > limiting to gitlab-CI. Anyone could run randconfig on their own, and > imo it would be helpful if the same issue(s) could be prevented > there, > too. Hence my earlier suggestion to have a snippet which can be used > by "interested" parties. And once dealt with in e.g. the makefile > there should not be a need for any overrides in the CI config > anymore. I agree with Jan's point of view that having a more universal solution for cases where it is necessary to disable a config for randconfig would be beneficial. Sometimes, I test randconfig locally, especially during the RISC-V Xen full build patch series where numerous configs are disabled. It would be convenient to have only one place to disable configs instead of duplicating them in two different places. Does anyone have any objections? ~ Oleksii > > > ---8<--- > > > > CI: Revert "automation: Drop ppc64le-*randconfig jobs", fix > > Randconfig > > with existing infrastructure > > > > This reverts commit cbb71b95dd708b1e26899bbe1e7bf9a85081fd60. > > > > Signed-off-by: Andrew Cooper > > > > diff --git a/automation/gitlab-ci/build.yaml > > b/automation/gitlab-ci/build.yaml > > index 32af30ccedc9..346d0400ed09 100644 > > --- a/automation/gitlab-ci/build.yaml > > +++ b/automation/gitlab-ci/build.yaml > > @@ -538,6 +538,7 @@ archlinux-current-gcc-riscv64-randconfig: > > RANDCONFIG: y > > EXTRA_FIXED_RANDCONFIG: > > CONFIG_COVERAGE=n > > + CONFIG_GRANT_TABLE=n > > > > archlinux-current-gcc-riscv64-debug-randconfig: > > extends: .gcc-riscv64-cross-build-debug > > @@ -547,6 +548,7 @@ archlinux-current-gcc-riscv64-debug-randconfig: > > RANDCONFIG: y > > EXTRA_FIXED_RANDCONFIG: > > CONFIG_COVERAGE=n > > + CONFIG_GRANT_TABLE=n > > > > # Power cross-build > > debian-bullseye-gcc-ppc64le: > > @@ -563,6 +565,26 @@ debian-bullseye-gcc-ppc64le-debug: > > KBUILD_DEFCONFIG: ppc64_defconfig > > HYPERVISOR_ONLY: y > > > > +debian-bullseye-gcc-ppc64le-randconfig: > > + extends: .gcc-ppc64le-cross-build > > + variables: > > + CONTAINER: debian:bullseye-ppc64le > > + KBUILD_DEFCONFIG: ppc64_defconfig > > + RANDCONFIG: y > > + EXTRA_FIXED_RANDCONFIG: > > + CONFIG_COVERAGE=n > > + CONFIG_GRANT_TABLE=n > > + > > +debian-bullseye-gcc-ppc64le-debug-randconfig: > > + extends: .gcc-ppc64le-cross-build-debug > > + variables: > > + CONTAINER: debian:bullseye-ppc64le > > + KBUILD_DEFCONFIG: ppc64_defconfig > > + RANDCONFIG: y > > + EXTRA_FIXED_RANDCONFIG: > > + CONFIG_COVERAGE=n > > + CONFIG_GRANT_TABLE=n > > + > > # Yocto test jobs > > yocto-qemuarm64: > > extends: .yocto-test-arm64 > > >
Re: [XEN PATCH v3 1/3] xen/x86: add missing instances of asmlinkage attributes
On 12.12.2023 02:48, Stefano Stabellini wrote: > On Mon, 11 Dec 2023, Nicola Vetrini wrote: >> No functional change. >> >> Signed-off-by: Nicola Vetrini > > Reviewed-by: Stefano Stabellini Acked-by: Jan Beulich
Re: [PATCH v4 1/4] x86/spec-ctrl: add logic to issue IBPB on exit to guest
On 18.12.2023 14:46, Jan Beulich wrote: > On 18.12.2023 13:11, Roger Pau Monné wrote: >> On Tue, Feb 14, 2023 at 05:10:42PM +0100, Jan Beulich wrote: >>> --- >>> I have to admit that I'm not really certain about the placement of the >>> IBPB wrt the MSR_SPEC_CTRL writes. For now I've simply used "opposite of >>> entry". >> >> Maybe it would easier to just add the MSR_PRED_CMD PRED_CMD_IBPB write >> to the vmcs MSR load list? >> >> It's a one-time only AFAICT, as you would only want to do this for >> context-switch AFAICT. > > That would be a back and forth of adding and removing the MSR to/from > that list then, which I'm not convinced is helpful. With these special > MSRs I would further be uncertain as to their effect when used via one > of these lists. Plus (as only a secondary argument, but still) it would make PV and HVM mechanisms entirely different. Jan
Re: [PATCH v4 2/4] x86/spec-ctrl: defer context-switch IBPB until guest entry
On 18.12.2023 13:39, Roger Pau Monné wrote: > On Tue, Feb 14, 2023 at 05:11:05PM +0100, Jan Beulich wrote: >> In order to avoid clobbering Xen's own predictions, defer the barrier as >> much as possible. Merely mark the CPU as needing a barrier issued the >> next time we're exiting to guest context. > > While I understand that doing the flush in the middle of the guest > context might not be ideal, as it's my understanding we also s/guest context/context switch/? > needlessly flush Xen predictions, I'm unsure whether this makes any > difference in practice, and IMO just makes the exit to guest paths > more complex. I need to redirect this question to Andrew, who suggested that doing so can be expected to make a difference. When we were discussing this, I could easily see it might make a difference, but I cannot provide hard proof. Jan
[linux-linus test] 184162: tolerable FAIL - PUSHED
flight 184162 linux-linus real [real] flight 184166 linux-linus real-retest [real] http://logs.test-lab.xenproject.org/osstest/logs/184162/ http://logs.test-lab.xenproject.org/osstest/logs/184166/ Failures :-/ but no regressions. Tests which are failing intermittently (not blocking): test-armhf-armhf-examine 8 reboot fail pass in 184166-retest test-armhf-armhf-libvirt 8 xen-bootfail pass in 184166-retest Tests which did not succeed, but are not blocking: test-armhf-armhf-libvirt-raw 15 saverestore-support-check fail blocked in 184159 test-armhf-armhf-libvirt 16 saverestore-support-check fail in 184166 like 184159 test-armhf-armhf-libvirt15 migrate-support-check fail in 184166 never pass test-amd64-amd64-xl-qemut-win7-amd64 19 guest-stopfail like 184159 test-amd64-amd64-xl-qemuu-win7-amd64 19 guest-stopfail like 184159 test-amd64-amd64-xl-qemuu-ws16-amd64 19 guest-stopfail like 184159 test-armhf-armhf-libvirt-qcow2 15 saverestore-support-check fail like 184159 test-amd64-amd64-xl-qemut-ws16-amd64 19 guest-stopfail like 184159 test-amd64-amd64-qemuu-nested-amd 20 debian-hvm-install/l1/l2 fail like 184159 test-amd64-amd64-libvirt 15 migrate-support-checkfail never pass test-amd64-amd64-libvirt-xsm 15 migrate-support-checkfail never pass test-arm64-arm64-xl-thunderx 15 migrate-support-checkfail never pass test-arm64-arm64-xl-thunderx 16 saverestore-support-checkfail never pass test-arm64-arm64-libvirt-xsm 15 migrate-support-checkfail never pass test-arm64-arm64-libvirt-xsm 16 saverestore-support-checkfail never pass test-arm64-arm64-xl 15 migrate-support-checkfail never pass test-arm64-arm64-xl 16 saverestore-support-checkfail never pass test-arm64-arm64-xl-credit2 15 migrate-support-checkfail never pass test-arm64-arm64-xl-credit2 16 saverestore-support-checkfail never pass test-arm64-arm64-xl-xsm 15 migrate-support-checkfail never pass test-arm64-arm64-xl-xsm 16 saverestore-support-checkfail never pass test-arm64-arm64-xl-credit1 15 migrate-support-checkfail never pass test-arm64-arm64-xl-credit1 16 saverestore-support-checkfail never pass test-armhf-armhf-xl-multivcpu 15 migrate-support-checkfail never pass test-armhf-armhf-xl-multivcpu 16 saverestore-support-checkfail never pass test-armhf-armhf-xl-credit1 15 migrate-support-checkfail never pass test-armhf-armhf-xl-credit1 16 saverestore-support-checkfail never pass test-armhf-armhf-xl-arndale 15 migrate-support-checkfail never pass test-armhf-armhf-xl-arndale 16 saverestore-support-checkfail never pass test-armhf-armhf-xl-rtds 15 migrate-support-checkfail never pass test-armhf-armhf-xl-rtds 16 saverestore-support-checkfail never pass test-amd64-amd64-libvirt-qcow2 14 migrate-support-checkfail never pass test-armhf-armhf-xl 15 migrate-support-checkfail never pass test-armhf-armhf-xl 16 saverestore-support-checkfail never pass test-amd64-amd64-libvirt-raw 14 migrate-support-checkfail never pass test-arm64-arm64-libvirt-raw 14 migrate-support-checkfail never pass test-arm64-arm64-libvirt-raw 15 saverestore-support-checkfail never pass test-arm64-arm64-xl-vhd 14 migrate-support-checkfail never pass test-arm64-arm64-xl-vhd 15 saverestore-support-checkfail never pass test-armhf-armhf-libvirt-qcow2 14 migrate-support-checkfail never pass test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check fail never pass test-armhf-armhf-xl-vhd 14 migrate-support-checkfail never pass test-armhf-armhf-xl-vhd 15 saverestore-support-checkfail never pass test-armhf-armhf-xl-credit2 15 migrate-support-checkfail never pass test-armhf-armhf-xl-credit2 16 saverestore-support-checkfail never pass test-armhf-armhf-libvirt-raw 14 migrate-support-checkfail never pass version targeted for testing: linuxceb6a6f023fd3e8b07761ed900352ef574010bcb baseline version: linux0e389834672c723435a44818ed2cabc4dad24429 Last test of basis 184159 2023-12-17 17:42:14 Z0 days Testing same since 184162 2023-12-18 02:29:46 Z0 days1 attempts People who touched revisions under test: Linus Torvalds Mark Rutland Peter Zijlstra (Intel) jobs: build-amd64-xsm pass build-arm64-xsm pass build-i386-xsm pass build-amd64 pass build-arm64
Re: [PATCH v4 1/4] x86/spec-ctrl: add logic to issue IBPB on exit to guest
On 18.12.2023 14:46, Jan Beulich wrote: > On 18.12.2023 13:11, Roger Pau Monné wrote: >> Hello, >> >> I'm not as expert as Andrew in all the speculation stuff, but I will >> try to provide some feedback. >> >> On Tue, Feb 14, 2023 at 05:10:42PM +0100, Jan Beulich wrote: >>> In order to be able to defer the context switch IBPB to the last >>> possible point, add logic to the exit-to-guest paths to issue the >>> barrier there, including the "IBPB doesn't flush the RSB/RAS" >>> workaround. Since alternatives, for now at least, can't nest, emit JMP >>> to skip past both constructs where both are needed. This may be more >>> efficient anyway, as the sequence of NOPs is pretty long. >> >> Could you elaborate on the reason why deferring the IBPB to the exit >> to guest path is helpful? So far it just seem to make the logic more >> complex without nay justification (at least in the changelog). > > I've added "(to leave behind as little as possible)" ahead of the 1st > comma - is that sufficient, do you think? Actually, the next patch supplies better context, i.e. is more / also about avoiding to clobber Xen's own predictions. Jan
Re: [PATCH v4 1/4] x86/spec-ctrl: add logic to issue IBPB on exit to guest
On 18.12.2023 13:11, Roger Pau Monné wrote: > Hello, > > I'm not as expert as Andrew in all the speculation stuff, but I will > try to provide some feedback. > > On Tue, Feb 14, 2023 at 05:10:42PM +0100, Jan Beulich wrote: >> In order to be able to defer the context switch IBPB to the last >> possible point, add logic to the exit-to-guest paths to issue the >> barrier there, including the "IBPB doesn't flush the RSB/RAS" >> workaround. Since alternatives, for now at least, can't nest, emit JMP >> to skip past both constructs where both are needed. This may be more >> efficient anyway, as the sequence of NOPs is pretty long. > > Could you elaborate on the reason why deferring the IBPB to the exit > to guest path is helpful? So far it just seem to make the logic more > complex without nay justification (at least in the changelog). I've added "(to leave behind as little as possible)" ahead of the 1st comma - is that sufficient, do you think? >> --- >> I have to admit that I'm not really certain about the placement of the >> IBPB wrt the MSR_SPEC_CTRL writes. For now I've simply used "opposite of >> entry". > > Maybe it would easier to just add the MSR_PRED_CMD PRED_CMD_IBPB write > to the vmcs MSR load list? > > It's a one-time only AFAICT, as you would only want to do this for > context-switch AFAICT. That would be a back and forth of adding and removing the MSR to/from that list then, which I'm not convinced is helpful. With these special MSRs I would further be uncertain as to their effect when used via one of these lists. Jan
Re: [PATCH v4 2/4] x86/spec-ctrl: defer context-switch IBPB until guest entry
On Tue, Feb 14, 2023 at 05:11:05PM +0100, Jan Beulich wrote: > In order to avoid clobbering Xen's own predictions, defer the barrier as > much as possible. Merely mark the CPU as needing a barrier issued the > next time we're exiting to guest context. While I understand that doing the flush in the middle of the guest context might not be ideal, as it's my understanding we also needlessly flush Xen predictions, I'm unsure whether this makes any difference in practice, and IMO just makes the exit to guest paths more complex. Thanks, Roger.
Re: [PATCH v4 1/4] x86/spec-ctrl: add logic to issue IBPB on exit to guest
Hello, I'm not as expert as Andrew in all the speculation stuff, but I will try to provide some feedback. On Tue, Feb 14, 2023 at 05:10:42PM +0100, Jan Beulich wrote: > In order to be able to defer the context switch IBPB to the last > possible point, add logic to the exit-to-guest paths to issue the > barrier there, including the "IBPB doesn't flush the RSB/RAS" > workaround. Since alternatives, for now at least, can't nest, emit JMP > to skip past both constructs where both are needed. This may be more > efficient anyway, as the sequence of NOPs is pretty long. Could you elaborate on the reason why deferring the IBPB to the exit to guest path is helpful? So far it just seem to make the logic more complex without nay justification (at least in the changelog). > > As with all other conditional blocks on exit-to-guest paths, no > Spectre-v1 protections are necessary as execution will imminently be > hitting a serialising event. > > Signed-off-by: Jan Beulich > --- > I have to admit that I'm not really certain about the placement of the > IBPB wrt the MSR_SPEC_CTRL writes. For now I've simply used "opposite of > entry". Maybe it would easier to just add the MSR_PRED_CMD PRED_CMD_IBPB write to the vmcs MSR load list? It's a one-time only AFAICT, as you would only want to do this for context-switch AFAICT. Thanks, Roger.
Re: [PATCH v2 32/39] xen/riscv: add minimal stuff to asm/page.h to build full Xen
On 18.12.2023 12:57, Oleksii wrote: > On Mon, 2023-12-18 at 12:36 +0100, Jan Beulich wrote: >> On 18.12.2023 11:45, Oleksii wrote: >>> On Thu, 2023-12-14 at 16:57 +0100, Jan Beulich wrote: On 24.11.2023 11:30, Oleksii Kurochko wrote: > Signed-off-by: Oleksii Kurochko Acked-by: Jan Beulich I wonder though ... > --- a/xen/arch/riscv/include/asm/page.h > +++ b/xen/arch/riscv/include/asm/page.h > @@ -6,6 +6,7 @@ > #ifndef __ASSEMBLY__ > > #include > +#include > #include > > #include > @@ -32,6 +33,9 @@ > #define PTE_LEAF_DEFAULT (PTE_VALID | PTE_READABLE > | > PTE_WRITABLE) > #define PTE_TABLE (PTE_VALID) > > +/* TODO */ > +#define PAGE_HYPERVISOR 0 ... whether this couldn't be defined properly right away. >>> It can be introduced now but it requires some additional defines to >>> be >>> introduced in the same time: >>> >>> #define _PAGE_W_BIT 2 >>> #define _PAGE_XN_BIT 3 >>> #define _PAGE_RO_BIT 1 >>> #define _PAGE_XN (1U << _PAGE_XN_BIT) >>> #define _PAGE_RO (1U << _PAGE_RO_BIT) >>> #define _PAGE_W (1U << _PAGE_W_BIT) >> >> Why would you need these, when you already have >> PTE_{READABLE,WRITABLE,EXECUTABLE} just out of context from above? > I thought that the hypervisor page table is fully software-related, and > that's why a separate PAGE*BIT was introduced. These bits can be > different from PTE* bits, which are hardware-related > https://gitlab.com/xen-project/xen/-/blob/staging/xen/arch/arm/include/asm/page.h?ref_type=heads#L66 > > It looks like I misunderstood that, and PTE* can be used everywhere > instead of _PAGE*. Alternatively, we could consider renaming everything > to PAGE* to maintain consistency across architectures. > > Does it make sense? Sure. Whether renaming makes sense is harder to tell though. It would be only the name prefix that's uniform, as how exactly e.g. permissions are controlled is arch-specific anyway. The one place I'm aware where PAGE_* (or, sadly, still _PAGE_*) would matter for common code is _PAGE_NONE (not sure though whether that's something that can / wants to be expressed for RISC-V). Jan
Community Manager Update
Hi all, Merry Christmas! Just a quick update from myself. Whilst I can’t speak for everyone in the community, I know we have valuable members who are committed to making the Xen Project successful. We are a strong and passionate community made up of individuals who consistently seek improvement in ways of working to ensure our open source project is welcoming and friendly. Our project is not only advancing the industry in many ways, but there are policies and governance practices that the community has helped shape to make it a better environment for all. As such, I strongly condemn activities or communications that diminish our efforts on how great the community is. We may not be perfect, but collectively we are all working towards a common goal on how to be better. This is a reminder to everyone to keep up the great work you are already doing, and for other individuals to refrain from making comments that could be seen as harmful to the community. Our communications can easily be misperceived and interpreted by others negatively, so if your messaging is not beneficial, please do not send these. Our next steps will be to look at creating a community working or technical group of some sort, to help address concerns and drive direction within the community. Given this will be something that affects everyone, I’d like to ask for your patience as we scope out the specifics before asking for your feedback. I want to make sure we do this properly to ensure the process is something that benefits the community in the long run. As soon as I have the details, I will be sharing them with you all. I wish you all a great holiday and a Happy New Year! Many thanks, Kelly Choi Community Manager Xen Project
Re: [PATCH v2 32/39] xen/riscv: add minimal stuff to asm/page.h to build full Xen
On Mon, 2023-12-18 at 12:36 +0100, Jan Beulich wrote: > On 18.12.2023 11:45, Oleksii wrote: > > On Thu, 2023-12-14 at 16:57 +0100, Jan Beulich wrote: > > > On 24.11.2023 11:30, Oleksii Kurochko wrote: > > > > Signed-off-by: Oleksii Kurochko > > > > > > Acked-by: Jan Beulich > > > > > > I wonder though ... > > > > > > > --- a/xen/arch/riscv/include/asm/page.h > > > > +++ b/xen/arch/riscv/include/asm/page.h > > > > @@ -6,6 +6,7 @@ > > > > #ifndef __ASSEMBLY__ > > > > > > > > #include > > > > +#include > > > > #include > > > > > > > > #include > > > > @@ -32,6 +33,9 @@ > > > > #define PTE_LEAF_DEFAULT (PTE_VALID | PTE_READABLE > > > > | > > > > PTE_WRITABLE) > > > > #define PTE_TABLE (PTE_VALID) > > > > > > > > +/* TODO */ > > > > +#define PAGE_HYPERVISOR 0 > > > > > > ... whether this couldn't be defined properly right away. > > It can be introduced now but it requires some additional defines to > > be > > introduced in the same time: > > > > #define _PAGE_W_BIT 2 > > #define _PAGE_XN_BIT 3 > > #define _PAGE_RO_BIT 1 > > #define _PAGE_XN (1U << _PAGE_XN_BIT) > > #define _PAGE_RO (1U << _PAGE_RO_BIT) > > #define _PAGE_W (1U << _PAGE_W_BIT) > > Why would you need these, when you already have > PTE_{READABLE,WRITABLE,EXECUTABLE} just out of context from above? I thought that the hypervisor page table is fully software-related, and that's why a separate PAGE*BIT was introduced. These bits can be different from PTE* bits, which are hardware-related https://gitlab.com/xen-project/xen/-/blob/staging/xen/arch/arm/include/asm/page.h?ref_type=heads#L66 It looks like I misunderstood that, and PTE* can be used everywhere instead of _PAGE*. Alternatively, we could consider renaming everything to PAGE* to maintain consistency across architectures. Does it make sense? > (And > that's aside from me (a) not following the naming (vs their purpose) > and > (b) not seeing what _PAGE_*_BIT are needed for, not even thinking > about > the leading underscores in these identifiers again.) > > > ... > > /* > > * _PAGE_DEVICE and _PAGE_NORMAL are convenience defines. They are > > not > > * meant to be used outside of this header. > > */ > > // #define _PAGE_DEVICE _PAGE_XN > > #define _PAGE_NORMAL _PAGE_PRESENT > > > > #define PAGE_HYPERVISOR_RW (_PAGE_NORMAL | _PAGE_RO | _PAGE_XN > > | > > _PAGE_W) > > > > #define PAGE_HYPERVISOR PAGE_HYPERVISOR_RW > > > > And _PAGE_PRESENT in pgtbl-bits.h: > > > > #define _PAGE_PRESENT (1 << 0) > > > > I prefer to introduce all this things when it will be really used. > > I understand that, yet for easy things it may help doing them right > away, rather than leaving them to be touched (in a straightforward > way) > again very soon. > ~ Oleksii
[XEN PATCH] docs/misra: add entries to exclude-list
Exclude efibind.h for all the architectures: it is used to build the efi stub, which is a separate entry point for Xen when booted from EFI firmware. Remove redundant entries from out_of_scope.ecl. Exclude common/coverage: it is code to support gcov, hence it is part of the testing machinery. Exclude decompress.h: file ported from Linux that defines a unique and documented interface towards all the (adopted) decompress functions. Signed-off-by: Federico Serafini --- .../eclair_analysis/ECLAIR/out_of_scope.ecl | 5 - docs/misra/exclude-list.json | 16 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/automation/eclair_analysis/ECLAIR/out_of_scope.ecl b/automation/eclair_analysis/ECLAIR/out_of_scope.ecl index fd870716cf..9bcec4c69d 100644 --- a/automation/eclair_analysis/ECLAIR/out_of_scope.ecl +++ b/automation/eclair_analysis/ECLAIR/out_of_scope.ecl @@ -17,11 +17,6 @@ -file_tag+={out_of_scope,"^xen/arch/x86/include/asm/intel-family\\.h$"} -doc_end --doc_begin="Files imported from the gnu-efi package" --file_tag+={adopted,"^xen/include/efi/.*$"} --file_tag+={adopted,"^xen/arch/x86/include/asm/x86_64/efibind\\.h$"} --doc_end - -doc_begin="Build tools are out of scope." -file_tag+={out_of_scope_tools,"^xen/tools/.*$"} -file_tag+={out_of_scope_tools,"^xen/arch/x86/efi/mkreloc\\.c$"} diff --git a/docs/misra/exclude-list.json b/docs/misra/exclude-list.json index 48f671c548..83fae0b4a4 100644 --- a/docs/misra/exclude-list.json +++ b/docs/misra/exclude-list.json @@ -1,6 +1,10 @@ { "version": "1.0", "content": [ +{ +"rel_path": "arch/*/include/asm/*/efibind.h", +"comment": "Imported from gnu-efi package, ignore for now" +}, { "rel_path": "arch/arm/arm64/cpufeature.c", "comment": "Imported from Linux, ignore for now" @@ -97,6 +101,10 @@ "rel_path": "arch/x86/efi/check.c", "comment": "The resulting code is not included in the final Xen binary, ignore for now" }, +{ +"rel_path": "common/coverage/*", +"comment": "Files to support gcov, ignore for now" +}, { "rel_path": "common/bitmap.c", "comment": "Imported from Linux, ignore for now" @@ -213,6 +221,14 @@ "rel_path": "include/xen/acpi.h", "comment": "Imported from Linux, ignore for now" }, +{ +"rel_path": "include/efi/*", +"comment": "Imported from gnu-efi package, ignore for now" +}, +{ +"rel_path": "include/xen/decompress.h", +"comment": "Imported from Linux, ignore for now" +}, { "rel_path": "lib/list-sort.c", "comment": "Imported from Linux, ignore for now" -- 2.34.1
Re: [PATCH v2 31/39] xen/riscv: add required things to asm/current.h
On Mon, 2023-12-18 at 12:28 +0100, Jan Beulich wrote: > On 18.12.2023 11:39, Oleksii wrote: > > On Thu, 2023-12-14 at 16:55 +0100, Jan Beulich wrote: > > > On 24.11.2023 11:30, Oleksii Kurochko wrote: > > > > --- a/xen/arch/riscv/include/asm/current.h > > > > +++ b/xen/arch/riscv/include/asm/current.h > > > > @@ -3,6 +3,22 @@ > > > > #ifndef __ASM_CURRENT_H > > > > #define __ASM_CURRENT_H > > > > > > > > +#include > > > > +#include > > > > + > > > > +#ifndef __ASSEMBLY__ > > > > + > > > > +struct vcpu; > > > > > > I don't think you need this here? > > Shouldn't forward declaration be for the case of curr_vcpu > > declaration > > in the next line ? > > I don't think so. In C forward decls are needed only when an > otherwise > undeclared type is used as type of a function parameter. (C++ is > different > in this regard.) Thanks for clarifying; in this case, it can be dropped. ~ Oleksii > > > > +/* Which VCPU is "current" on this PCPU. */ > > > > +DECLARE_PER_CPU(struct vcpu *, curr_vcpu); > > > > + > > > > +#define current (this_cpu(curr_vcpu)) >
Re: [PATCH v2 24/39] xen/riscv: introduce asm/irq.h
On Mon, 2023-12-18 at 11:12 +0100, Jan Beulich wrote: > On 18.12.2023 11:04, Oleksii wrote: > > On Thu, 2023-12-14 at 15:09 +0100, Jan Beulich wrote: > > > On 24.11.2023 11:30, Oleksii Kurochko wrote: > > > > Signed-off-by: Oleksii Kurochko > > > > --- > > > > Changes in V2: > > > > - add ifdef CONFIG_HAS_DEVICE_TREE for things that > > > > shouldn't be > > > > in case !CONFIG_HAS_DEVICE_TREE > > > > > > Is there going to be a RISC-V build without this enabled > > > (selected)? > > > If > > > not, I'd recommend against such pointless #ifdef-ary. > > For this stage (Xen RISC-V full build), CONFIG_HAS_DEVICE_TREE will > > not > > be selected, but it will be in the near future. > > And from then on it'll always be selected, or only conditionally? In > the > former case it would still feel odd if #ifdef-s were introduced. It will always be selected until ACPI support is provided. I've seen patches that add ACPI support for the Linux kernel, but I'm not sure if it is really needed at this point. So, I've planned to go with Device Tree for quite a while ~ Oleksii
Re: [PATCH v2 33/39] xen/riscv: add minimal stuff to asm/processor.h to build full Xen
On 18.12.2023 11:49, Oleksii wrote: > On Thu, 2023-12-14 at 17:04 +0100, Jan Beulich wrote: >> On 24.11.2023 11:30, Oleksii Kurochko wrote: >>> +static inline void cpu_relax(void) >>> +{ >>> + int dummy; >>> + /* In lieu of a halt instruction, induce a long-latency >>> stall. */ >>> + __asm__ __volatile__ ("div %0, %0, zero" : "=r" (dummy)); >> >> Any reason for this, when Arm's is just barrier(), and apparently >> they got >> away with this quite fine? Also isn't this causing a division by >> zero, >> which I'd expect to cause some kind of exception? (Terminology-wise >> I'm of >> course biased by x86, where "halt instruction" wouldn't be suitable >> to use >> here. But if that terminology is fine on RISC-V, then obviously no >> objection.) > It was based on Linux kernel code: > https://elixir.bootlin.com/linux/latest/source/arch/riscv/include/asm/vdso/processor.h#L9 > > But looks I missed barrier()... > Probably it will be better update cpu_relax() to: > > /* Encoding of the pause instruction */ > __asm__ __volatile__ (".4byte 0x10F"); > > barrier(); But definitely without .4byte, which defines a piece of data. If for whatever reason you don't want to use "pause" directly, please use .insn. Jan
Re: [PATCH v2 32/39] xen/riscv: add minimal stuff to asm/page.h to build full Xen
On 18.12.2023 11:45, Oleksii wrote: > On Thu, 2023-12-14 at 16:57 +0100, Jan Beulich wrote: >> On 24.11.2023 11:30, Oleksii Kurochko wrote: >>> Signed-off-by: Oleksii Kurochko >> >> Acked-by: Jan Beulich >> >> I wonder though ... >> >>> --- a/xen/arch/riscv/include/asm/page.h >>> +++ b/xen/arch/riscv/include/asm/page.h >>> @@ -6,6 +6,7 @@ >>> #ifndef __ASSEMBLY__ >>> >>> #include >>> +#include >>> #include >>> >>> #include >>> @@ -32,6 +33,9 @@ >>> #define PTE_LEAF_DEFAULT (PTE_VALID | PTE_READABLE | >>> PTE_WRITABLE) >>> #define PTE_TABLE (PTE_VALID) >>> >>> +/* TODO */ >>> +#define PAGE_HYPERVISOR 0 >> >> ... whether this couldn't be defined properly right away. > It can be introduced now but it requires some additional defines to be > introduced in the same time: > > #define _PAGE_W_BIT 2 > #define _PAGE_XN_BIT3 > #define _PAGE_RO_BIT1 > #define _PAGE_XN(1U << _PAGE_XN_BIT) > #define _PAGE_RO(1U << _PAGE_RO_BIT) > #define _PAGE_W (1U << _PAGE_W_BIT) Why would you need these, when you already have PTE_{READABLE,WRITABLE,EXECUTABLE} just out of context from above? (And that's aside from me (a) not following the naming (vs their purpose) and (b) not seeing what _PAGE_*_BIT are needed for, not even thinking about the leading underscores in these identifiers again.) > ... > /* > * _PAGE_DEVICE and _PAGE_NORMAL are convenience defines. They are not > * meant to be used outside of this header. > */ > // #define _PAGE_DEVICE_PAGE_XN > #define _PAGE_NORMAL_PAGE_PRESENT > > #define PAGE_HYPERVISOR_RW (_PAGE_NORMAL | _PAGE_RO | _PAGE_XN | > _PAGE_W) > > #define PAGE_HYPERVISOR PAGE_HYPERVISOR_RW > > And _PAGE_PRESENT in pgtbl-bits.h: > > #define _PAGE_PRESENT (1 << 0) > > I prefer to introduce all this things when it will be really used. I understand that, yet for easy things it may help doing them right away, rather than leaving them to be touched (in a straightforward way) again very soon. Jan