Re: [PATCH 4/5] x86/svm: Switch SVM features over normal cpu_has_*
On 4/29/24 5:16 PM, Andrew Cooper wrote: Delete the boot time rendering of advanced features. It's entirely ad-hoc and not even everything printed here is used by Xen. It is available in `xen-cpuid` now. With (only) svm_load_segs_{,prefetch}() declared now in svm.h, only svm.c and domain.c which need the header. Clean up all others. No functional change. Signed-off-by: Andrew Cooper Reviewed-by: Vaishali Thakkar --- CC: Jan Beulich CC: Roger Pau Monné CC: Stefano Stabellini CC: Xenia Ragiadakou CC: Sergiy Kibrik CC: George Dunlap CC: Andrei Semenov CC: Vaishali Thakkar --- xen/arch/x86/hvm/svm/asid.c| 5 ++- xen/arch/x86/hvm/svm/emulate.c | 3 +- xen/arch/x86/hvm/svm/intr.c| 1 - xen/arch/x86/hvm/svm/nestedsvm.c | 14 xen/arch/x86/hvm/svm/svm.c | 50 +++--- xen/arch/x86/hvm/svm/vmcb.c| 1 - xen/arch/x86/include/asm/cpufeature.h | 10 ++ xen/arch/x86/include/asm/hvm/svm/svm.h | 36 --- 8 files changed, 31 insertions(+), 89 deletions(-) diff --git a/xen/arch/x86/hvm/svm/asid.c b/xen/arch/x86/hvm/svm/asid.c index 7977a8e86b53..6117a362d310 100644 --- a/xen/arch/x86/hvm/svm/asid.c +++ b/xen/arch/x86/hvm/svm/asid.c @@ -6,7 +6,6 @@ #include #include -#include #include "svm.h" @@ -39,7 +38,7 @@ void svm_asid_handle_vmrun(void) { vmcb_set_asid(vmcb, true); vmcb->tlb_control = -cpu_has_svm_flushbyasid ? TLB_CTRL_FLUSH_ASID : TLB_CTRL_FLUSH_ALL; +cpu_has_flush_by_asid ? TLB_CTRL_FLUSH_ASID : TLB_CTRL_FLUSH_ALL; return; } @@ -48,7 +47,7 @@ void svm_asid_handle_vmrun(void) vmcb->tlb_control = !need_flush ? TLB_CTRL_NO_FLUSH : -cpu_has_svm_flushbyasid ? TLB_CTRL_FLUSH_ASID : TLB_CTRL_FLUSH_ALL; +cpu_has_flush_by_asid ? TLB_CTRL_FLUSH_ASID : TLB_CTRL_FLUSH_ALL; } /* diff --git a/xen/arch/x86/hvm/svm/emulate.c b/xen/arch/x86/hvm/svm/emulate.c index 93ac1d3435f9..da6e21b2e270 100644 --- a/xen/arch/x86/hvm/svm/emulate.c +++ b/xen/arch/x86/hvm/svm/emulate.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include "svm.h" @@ -20,7 +19,7 @@ static unsigned long svm_nextrip_insn_length(struct vcpu *v) { struct vmcb_struct *vmcb = v->arch.hvm.svm.vmcb; -if ( !cpu_has_svm_nrips ) +if ( !cpu_has_nrips ) return 0; #ifndef NDEBUG diff --git a/xen/arch/x86/hvm/svm/intr.c b/xen/arch/x86/hvm/svm/intr.c index 4805c5567213..facd2894a2c6 100644 --- a/xen/arch/x86/hvm/svm/intr.c +++ b/xen/arch/x86/hvm/svm/intr.c @@ -17,7 +17,6 @@ #include #include #include -#include #include /* for nestedhvm_vcpu_in_guestmode */ #include #include diff --git a/xen/arch/x86/hvm/svm/nestedsvm.c b/xen/arch/x86/hvm/svm/nestedsvm.c index 35a2cbfd7d13..255af112661f 100644 --- a/xen/arch/x86/hvm/svm/nestedsvm.c +++ b/xen/arch/x86/hvm/svm/nestedsvm.c @@ -6,7 +6,6 @@ */ #include -#include #include #include #include @@ -1620,7 +1619,7 @@ void svm_nested_features_on_efer_update(struct vcpu *v) { if ( !vmcb->virt_ext.fields.vloadsave_enable && paging_mode_hap(v->domain) && - cpu_has_svm_vloadsave ) + cpu_has_v_loadsave ) { vmcb->virt_ext.fields.vloadsave_enable = 1; general2_intercepts = vmcb_get_general2_intercepts(vmcb); @@ -1629,8 +1628,7 @@ void svm_nested_features_on_efer_update(struct vcpu *v) vmcb_set_general2_intercepts(vmcb, general2_intercepts); } -if ( !vmcb->_vintr.fields.vgif_enable && - cpu_has_svm_vgif ) +if ( !vmcb->_vintr.fields.vgif_enable && cpu_has_v_gif ) { vintr = vmcb_get_vintr(vmcb); vintr.fields.vgif = svm->ns_gif; @@ -1675,8 +1673,8 @@ void __init start_nested_svm(struct hvm_function_table *hvm_function_table) */ hvm_function_table->caps.nested_virt = hvm_function_table->caps.hap && -cpu_has_svm_lbrv && -cpu_has_svm_nrips && -cpu_has_svm_flushbyasid && -cpu_has_svm_decode; +cpu_has_v_lbr && +cpu_has_nrips && +cpu_has_flush_by_asid && +cpu_has_decode_assist; } diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c index 4719fffae589..16eb875aab94 100644 --- a/xen/arch/x86/hvm/svm/svm.c +++ b/xen/arch/x86/hvm/svm/svm.c @@ -1287,7 +1287,7 @@ static void cf_check svm_inject_event(const struct x86_event *event) * that hardware doesn't perform DPL checking on injection. */ if ( event->type == X86_EVENTTYPE_PRI_SW_EXCEPTION || - (!cpu_has_svm_nrips && (event->type >= X86_EVENTTYPE_SW_INTERRUPT)) ) + (!cpu_has_nrips && (event->type >= X86_EVENTTYPE_SW_INTERRUPT)) ) svm_emul_swint_injection(&_event); switch ( _event.vector | -(_event.type ==
[xen-4.17-testing test] 185864: tolerable FAIL - PUSHED
flight 185864 xen-4.17-testing real [real] flight 185874 xen-4.17-testing real-retest [real] http://logs.test-lab.xenproject.org/osstest/logs/185864/ http://logs.test-lab.xenproject.org/osstest/logs/185874/ Failures :-/ but no regressions. Tests which are failing intermittently (not blocking): test-armhf-armhf-xl-multivcpu 10 host-ping-check-xen fail pass in 185874-retest Tests which did not succeed, but are not blocking: test-armhf-armhf-xl-multivcpu 15 migrate-support-check fail in 185874 never pass test-armhf-armhf-xl-multivcpu 16 saverestore-support-check fail in 185874 never pass test-armhf-armhf-libvirt 16 saverestore-support-checkfail like 185711 test-amd64-amd64-xl-qemuu-win7-amd64 19 guest-stopfail like 185711 test-amd64-amd64-xl-qemut-win7-amd64 19 guest-stopfail like 185711 test-amd64-amd64-xl-qemut-ws16-amd64 19 guest-stopfail like 185711 test-amd64-amd64-xl-qemuu-ws16-amd64 19 guest-stopfail like 185711 test-amd64-amd64-qemuu-nested-amd 20 debian-hvm-install/l1/l2 fail like 185711 test-arm64-arm64-xl-thunderx 15 migrate-support-checkfail never pass test-arm64-arm64-xl-thunderx 16 saverestore-support-checkfail never pass test-amd64-amd64-libvirt-xsm 15 migrate-support-checkfail never pass test-amd64-amd64-libvirt 15 migrate-support-checkfail never pass test-amd64-amd64-libvirt-qcow2 14 migrate-support-checkfail never pass test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check fail never pass test-arm64-arm64-xl 15 migrate-support-checkfail never pass test-arm64-arm64-xl 16 saverestore-support-checkfail never pass test-arm64-arm64-xl-credit2 15 migrate-support-checkfail never pass test-arm64-arm64-xl-credit2 16 saverestore-support-checkfail never pass test-arm64-arm64-xl-credit1 15 migrate-support-checkfail never pass test-arm64-arm64-xl-xsm 15 migrate-support-checkfail never pass test-arm64-arm64-xl-credit1 16 saverestore-support-checkfail never pass test-arm64-arm64-xl-xsm 16 saverestore-support-checkfail never pass test-arm64-arm64-libvirt-xsm 15 migrate-support-checkfail never pass test-arm64-arm64-libvirt-xsm 16 saverestore-support-checkfail never pass test-armhf-armhf-xl-credit2 15 migrate-support-checkfail never pass test-armhf-armhf-xl-credit2 16 saverestore-support-checkfail never pass test-armhf-armhf-xl-rtds 15 migrate-support-checkfail never pass test-armhf-armhf-xl-rtds 16 saverestore-support-checkfail never pass test-armhf-armhf-libvirt 15 migrate-support-checkfail never pass test-amd64-amd64-libvirt-vhd 14 migrate-support-checkfail never pass test-armhf-armhf-xl 15 migrate-support-checkfail never pass test-armhf-armhf-xl 16 saverestore-support-checkfail never pass test-armhf-armhf-xl-credit1 15 migrate-support-checkfail never pass test-armhf-armhf-xl-credit1 16 saverestore-support-checkfail never pass test-armhf-armhf-xl-arndale 15 migrate-support-checkfail never pass test-armhf-armhf-xl-arndale 16 saverestore-support-checkfail never pass test-amd64-amd64-libvirt-raw 14 migrate-support-checkfail never pass test-arm64-arm64-libvirt-raw 14 migrate-support-checkfail never pass test-arm64-arm64-libvirt-raw 15 saverestore-support-checkfail never pass test-arm64-arm64-xl-vhd 14 migrate-support-checkfail never pass test-arm64-arm64-xl-vhd 15 saverestore-support-checkfail never pass test-armhf-armhf-xl-qcow214 migrate-support-checkfail never pass test-armhf-armhf-xl-qcow215 saverestore-support-checkfail never pass test-armhf-armhf-libvirt-vhd 14 migrate-support-checkfail never pass test-armhf-armhf-libvirt-vhd 15 saverestore-support-checkfail never pass test-armhf-armhf-xl-raw 14 migrate-support-checkfail never pass test-armhf-armhf-xl-raw 15 saverestore-support-checkfail never pass version targeted for testing: xen effcf70f020ff12d34c80e2abde0ecb00ce92bda baseline version: xen 5d9a931fe2c1310dbfd946bbc1e22a177add4f5c Last test of basis 185711 2024-04-17 07:07:14 Z 12 days Testing same since 185864 2024-04-29 08:08:55 Z0 days1 attempts People who touched revisions under test: Jan Beulich Roger Pau Monné jobs: build-amd64-xsm pass build-arm64-xsm pass build-i386-xsm pass build-amd64-xtf pass build-amd64
[ovmf test] 185873: all pass - PUSHED
flight 185873 ovmf real [real] http://logs.test-lab.xenproject.org/osstest/logs/185873/ Perfect :-) All tests in this flight passed as required version targeted for testing: ovmf 88781ccd744c73acbbbe9767627860a538b9f3a2 baseline version: ovmf 094727264f887e275444bd11d9d99c651a85c2e4 Last test of basis 185868 2024-04-29 10:45:51 Z0 days Testing same since 185873 2024-04-30 02:41:15 Z0 days1 attempts People who touched revisions under test: Wenxing Hou jobs: build-amd64-xsm pass build-i386-xsm pass build-amd64 pass build-i386 pass build-amd64-libvirt pass build-i386-libvirt pass build-amd64-pvopspass build-i386-pvops pass test-amd64-amd64-xl-qemuu-ovmf-amd64 pass sg-report-flight on osstest.test-lab.xenproject.org logs: /home/logs/logs images: /home/logs/images Logs, config files, etc. are available at http://logs.test-lab.xenproject.org/osstest/logs Explanation of these reports, and of osstest in general, is at http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README.email;hb=master http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README;hb=master Test harness code can be found at http://xenbits.xen.org/gitweb?p=osstest.git;a=summary Pushing revision : To xenbits.xen.org:/home/xen/git/osstest/ovmf.git 094727264f..88781ccd74 88781ccd744c73acbbbe9767627860a538b9f3a2 -> xen-tested-master
Re: [PATCH 07/15] xen/overlay: Enable device tree overlay assignment to running domains
Hi Julien, On 4/30/2024 1:34 AM, Julien Grall wrote: On 29/04/2024 04:36, Henry Wang wrote: Hi Jan, Julien, Stefano, Hi Henry, On 4/24/2024 2:05 PM, Jan Beulich wrote: On 24.04.2024 05:34, Henry Wang wrote: --- a/xen/include/public/sysctl.h +++ b/xen/include/public/sysctl.h @@ -1197,7 +1197,9 @@ struct xen_sysctl_dt_overlay { #define XEN_SYSCTL_DT_OVERLAY_ADD 1 #define XEN_SYSCTL_DT_OVERLAY_REMOVE 2 uint8_t overlay_op; /* IN: Add or remove. */ - uint8_t pad[3]; /* IN: Must be zero. */ + bool domain_mapping; /* IN: True of False. */ + uint8_t pad[2]; /* IN: Must be zero. */ + uint32_t domain_id; }; If you merely re-purposed padding fields, all would be fine without bumping the interface version. Yet you don't, albeit for an unclear reason: Why uint32_t rather than domid_t? And on top of that - why a separate boolean when you could use e.g. DOMID_INVALID to indicate "no domain mapping"? I think both of your suggestion make great sense. I will follow the suggestion in v2. That said - anything taking a domain ID is certainly suspicious in a sysctl. Judging from the description you really mean this to be a domctl. Anything else will require extra justification. I also think a domctl is better. I had a look at the history of the already merged series, it looks like in the first version of merged part 1 [1], the hypercall was implemented as the domctl in the beginning but later in v2 changed to sysctl. I think this makes sense as the scope of that time is just to make Xen aware of the device tree node via Xen device tree. However this is now a problem for the current part where the scope (and the end goal) is extended to assign the added device to Linux Dom0/DomU via device tree overlays. I am not sure which way is better, should we repurposing the sysctl to domctl or maybe add another domctl (I am worrying about the duplication because basically we need the same sysctl functionality but now with a domid in it)? What do you think? I am not entirely sure this is a good idea to try to add the device in Xen and attach it to the guests at the same time. Imagine the following situation: 1) Add and attach devices 2) The domain is rebooted 3) Detach and remove devices After step 2, you technically have a new domain. You could have also a case where this is a completely different guest. So the flow would look a little bit weird (you create the DT overlay with domain A but remove with domain B). So, at the moment, it feels like the add/attach (resp detech/remove) operations should happen separately. Can you clarify why you want to add devices to Xen and attach to a guest within a single hypercall? Sorry I don't know if there is any specific thoughts on the design of using a single hypercall to do both add devices to Xen device tree and assign the device to the guest. In fact seeing your above comments, I think separating these two functionality to two xl commands using separated hypercalls would indeed be a better idea. Thank you for the suggestion! To make sure I understand correctly, would you mind confirming if below actions for v2 make sense to you? Thanks! - Only use the XEN_SYSCTL_DT_OVERLAY_{ADD, REMOVE} sysctls to add/remove overlay to Xen device tree - Introduce the xl dt-overlay attach command and respective domctls to do the device assignment for the overlay to domain. Kind regards, Henry Cheers,
Re: [PATCH 02/15] xen/arm/gic: Enable interrupt assignment to running VM
Hi Julien, Sorry for the late reply, On 4/25/2024 10:28 PM, Julien Grall wrote: Hi, On 25/04/2024 08:06, Henry Wang wrote: Hi Julien, On 4/24/2024 8:58 PM, Julien Grall wrote: Hi Henry, On 24/04/2024 04:34, Henry Wang wrote: From: Vikram Garhwal Enable interrupt assign/remove for running VMs in CONFIG_OVERLAY_DTB. Currently, irq_route and mapping is only allowed at the domain creation. Adding exception for CONFIG_OVERLAY_DTB. AFAICT, this is mostly reverting b8577547236f ("xen/arm: Restrict when a physical IRQ can be routed/removed from/to a domain"). Signed-off-by: Vikram Garhwal Signed-off-by: Stefano Stabellini Signed-off-by: Henry Wang --- xen/arch/arm/gic.c | 4 1 file changed, 4 insertions(+) diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c index 44c40e86de..a775f886ed 100644 --- a/xen/arch/arm/gic.c +++ b/xen/arch/arm/gic.c @@ -140,8 +140,10 @@ int gic_route_irq_to_guest(struct domain *d, unsigned int virq, * back to the physical IRQ. To prevent get unsync, restrict the * routing to when the Domain is been created. */ The above comment explains why the check was added. But the commit message doesn't explain why this can be disregarded for your use-case. Looking at the history, I don't think you can simply remove the checks. Regardless that... +#ifndef CONFIG_OVERLAY_DTB ... I am against such #ifdef. A distros may want to have OVERLAY_DTB enabled, yet the user will not use it. Instead, you want to remove the check once the code can properly handle routing an IRQ the domain is created or ... if ( d->creation_finished ) return -EBUSY; +#endif ret = vgic_connect_hw_irq(d, NULL, virq, desc, true); if ( ret ) @@ -171,8 +173,10 @@ int gic_remove_irq_from_guest(struct domain *d, unsigned int virq, * Removing an interrupt while the domain is running may have * undesirable effect on the vGIC emulation. */ +#ifndef CONFIG_OVERLAY_DTB if ( !d->is_dying ) return -EBUSY; +#endif ... removed before they domain is destroyed. Thanks for your feeedback. After checking the b8577547236f commit message I think I now understand your point. Do you have any suggestion about how can I properly add the support to route/remove the IRQ to running domains? Thanks. I spent some time going through the GIC/vGIC code and had some discussions with Stefano and Stewart during the last couple of days, let me see if I can describe the use case properly now to continue the discussion: We have some use cases that requires assigning devices to domains after domain boot time. For example, suppose there is an FPGA on the board which can simulate a device, and the bitstream for the FPGA is provided and programmed after domain boot. So we need a way to assign the device to the running domain. This series tries to implement this use case by using device tree overlay - users can firstly add the overlay to Xen dtb, assign the device in the overlay to a domain by the xl command, then apply the overlay to Linux. I haven't really look at that code in quite a while. I think we need to make sure that the virtual and physical IRQ state matches at the time we do the routing. I am undecided on whether we want to simply prevent the action to happen or try to reset the state. There is also the question of what to do if the guest is enabling the vIRQ before it is routed. Sorry for bothering, would you mind elaborating a bit more about the two cases that you mentioned above? Commit b8577547236f ("xen/arm: Restrict when a physical IRQ can be routed/removed from/to a domain") only said there will be undesirable effects, so I am not sure if I understand the concerns raised above and the consequences of these two use cases. I am probably wrong, I think when we add the overlay, we are probably fine as the interrupt is not being used before. Also since we only load the device driver after the IRQ is routed to the guest, I am not sure the guest can enable the vIRQ before it is routed. Kind regards, Henry Overall, someone needs to spend some time reading the code and then make a proposal (this could be just documentation if we believe it is safe to do). Both the current vGIC and the new one may need an update. Cheers,
Re: [VirtIO] Support for various devices in Xen
On 30-04-24, 03:11, Andrei Cherechesu wrote: > Are there any other virtio device types you managed to test so far > besides these ones (over virtio-mmio/virtio-grant)? Has anyone > tested the rust-vmm vhost-device backends from Viresh with Xen? I have tested them earlier with Xen emulated with the help of Qemu. Steps are mentioned here: https://github.com/vireshk/xen-vhost-frontend -- viresh
Re: [PATCH 1/3] xen/arm/dom0less-build: Alloc magic pages for Dom0less DomUs from hypervisor
Hi Daniel, On 4/30/2024 8:27 AM, Daniel P. Smith wrote: On 4/25/24 23:14, Henry Wang wrote: There are use cases (for example using the PV driver) in Dom0less setup that require Dom0less DomUs start immediately with Dom0, but initialize XenStore later after Dom0's successful boot and call to the init-dom0less application. An error message can seen from the init-dom0less application on 1:1 direct-mapped domains: ``` Allocating magic pages memory.c:238:d0v0 mfn 0x39000 doesn't belong to d1 Error on alloc magic pages ``` This is because currently the magic pages for Dom0less DomUs are populated by the init-dom0less app through populate_physmap(), and populate_physmap() automatically assumes gfn == mfn for 1:1 direct mapped domains. This cannot be true for the magic pages that are allocated later from the init-dom0less application executed in Dom0. For domain using statically allocated memory but not 1:1 direct-mapped, similar error "failed to retrieve a reserved page" can be seen as the reserved memory list is empty at that time. To solve above issue, this commit allocates the magic pages for Dom0less DomUs at the domain construction time. The base address/PFN of the magic page region will be noted and communicated to the init-dom0less application in Dom0. Might I suggest we not refer to these as magic pages? I would consider them as hypervisor reserved pages for the VM to have access to virtual platform capabilities. We may see this expand in the future for some unforeseen, new capability. I think magic page is a specific terminology to refer to these pages, see alloc_magic_pages() for both x86 and Arm. I will reword the last paragraph of the commit message to refer them as "hypervisor reserved pages (currently used as magic pages on Arm)" if this sounds good to you. Kind regards, Henry
Re: [PATCH 2/3] xen/arm, tools: Add a new HVM_PARAM_MAGIC_BASE_PFN key in HVMOP
Hi Daniel, On 4/30/2024 8:31 AM, Daniel P. Smith wrote: On 4/26/24 02:21, Jan Beulich wrote: On 26.04.2024 05:14, Henry Wang wrote: --- a/xen/include/public/hvm/params.h +++ b/xen/include/public/hvm/params.h @@ -76,6 +76,7 @@ */ #define HVM_PARAM_STORE_PFN 1 #define HVM_PARAM_STORE_EVTCHN 2 +#define HVM_PARAM_MAGIC_BASE_PFN 3 #define HVM_PARAM_IOREQ_PFN 5 Considering all adjacent values are used, it is overwhelmingly likely that 3 was once used, too. Such re-use needs to be done carefully. Since you need this for Arm only, that's likely okay, but doesn't go without (a) saying and (b) considering the possible future case of dom0less becoming arch-agnostic, or hyperlaunch wanting to extend the scope. Plus (c) imo this also needs at least a comment, maybe even an #ifdef, seeing how x86- focused most of the rest of this header is. I would recommend having two new params, Sounds good. I can do the suggestion in v2. #define HVM_PARAM_HV_RSRV_BASE_PVH 3 #define HVM_PARAM_HV_RSRV_SIZE 4 I think 4 is currently in use, so I think I will find another couple of numbers in the end for both of them. Instead of reusing 3 and 4. Kind regards, Henry This will communicate how many pages have been reserved and where those pages are located. v/r, dps
Re: [PATCH 2/3] xen/arm, tools: Add a new HVM_PARAM_MAGIC_BASE_PFN key in HVMOP
On 4/29/24 20:35, Daniel P. Smith wrote: On 4/25/24 23:14, Henry Wang wrote: For use cases such as Dom0less PV drivers, a mechanism to communicate Dom0less DomU's static data with the runtime control plane (Dom0) is needed. Since on Arm HVMOP is already the existing approach to address such use cases (for example the allocation of HVM_PARAM_CALLBACK_IRQ), add a new HVMOP key HVM_PARAM_MAGIC_BASE_PFN for storing the magic page region base PFN. The value will be set at Dom0less DomU construction time after Dom0less DomU's magic page region has been allocated. To keep consistent, also set the value for HVM_PARAM_MAGIC_BASE_PFN for libxl guests in alloc_magic_pages(). Reported-by: Alec Kwapis Signed-off-by: Henry Wang --- tools/libs/guest/xg_dom_arm.c | 2 ++ xen/arch/arm/dom0less-build.c | 2 ++ xen/arch/arm/hvm.c | 1 + xen/include/public/hvm/params.h | 1 + 4 files changed, 6 insertions(+) diff --git a/tools/libs/guest/xg_dom_arm.c b/tools/libs/guest/xg_dom_arm.c index 8cc7f27dbb..3c08782d1d 100644 --- a/tools/libs/guest/xg_dom_arm.c +++ b/tools/libs/guest/xg_dom_arm.c @@ -74,6 +74,8 @@ static int alloc_magic_pages(struct xc_dom_image *dom) xc_clear_domain_page(dom->xch, dom->guest_domid, base + MEMACCESS_PFN_OFFSET); xc_clear_domain_page(dom->xch, dom->guest_domid, dom->vuart_gfn); + xc_hvm_param_set(dom->xch, dom->guest_domid, HVM_PARAM_MAGIC_BASE_PFN, + base); xc_hvm_param_set(dom->xch, dom->guest_domid, HVM_PARAM_CONSOLE_PFN, dom->console_pfn); xc_hvm_param_set(dom->xch, dom->guest_domid, HVM_PARAM_STORE_PFN, diff --git a/xen/arch/arm/dom0less-build.c b/xen/arch/arm/dom0less-build.c index 40dc85c759..72187c167d 100644 --- a/xen/arch/arm/dom0less-build.c +++ b/xen/arch/arm/dom0less-build.c @@ -861,6 +861,8 @@ static int __init construct_domU(struct domain *d, free_domheap_pages(magic_pg, get_order_from_pages(NR_MAGIC_PAGES)); return rc; } + + d->arch.hvm.params[HVM_PARAM_MAGIC_BASE_PFN] = gfn_x(gfn); } return rc; diff --git a/xen/arch/arm/hvm.c b/xen/arch/arm/hvm.c index 0989309fea..fa6141e30c 100644 --- a/xen/arch/arm/hvm.c +++ b/xen/arch/arm/hvm.c @@ -55,6 +55,7 @@ static int hvm_allow_get_param(const struct domain *d, unsigned int param) case HVM_PARAM_STORE_EVTCHN: case HVM_PARAM_CONSOLE_PFN: case HVM_PARAM_CONSOLE_EVTCHN: + case HVM_PARAM_MAGIC_BASE_PFN: return 0; I know you are just adding, so more of a question for the Arm maintainers: Why does Arm have a pair of private access control functions subverting XSM? On closer look, I see x86 has done the same. While the way this is set up bothers me, reviewing the history it was clearly decided that only controlling use of the op by a src domain against a target domain was sufficient. Ultimately, it is seeing a mini access control policy being codified in the access code is what is bothering me here. Fixing this would require a complete rework of xsm_hvm_param, and while it would correct the access control from a purist perspective, the security benefit would be very low. v/r, dps
Re: [PATCH] xen/xsm: Wire up get_dom0_console
On 4/26/24 00:04, Jason Andryuk wrote: An XSM hook for get_dom0_console is currently missing. Using XSM with a PVH dom0 shows: (XEN) FLASK: Denying unknown platform_op: 64. Wire up the hook, and allow it for dom0. Fixes: 4dd160583c ("x86/platform: introduce hypercall to get initial video console settings") Signed-off-by: Jason Andryuk --- Acked-by: Daniel P. Smith
Re: [PATCH 2/3] xen/arm, tools: Add a new HVM_PARAM_MAGIC_BASE_PFN key in HVMOP
On 4/25/24 23:14, Henry Wang wrote: For use cases such as Dom0less PV drivers, a mechanism to communicate Dom0less DomU's static data with the runtime control plane (Dom0) is needed. Since on Arm HVMOP is already the existing approach to address such use cases (for example the allocation of HVM_PARAM_CALLBACK_IRQ), add a new HVMOP key HVM_PARAM_MAGIC_BASE_PFN for storing the magic page region base PFN. The value will be set at Dom0less DomU construction time after Dom0less DomU's magic page region has been allocated. To keep consistent, also set the value for HVM_PARAM_MAGIC_BASE_PFN for libxl guests in alloc_magic_pages(). Reported-by: Alec Kwapis Signed-off-by: Henry Wang --- tools/libs/guest/xg_dom_arm.c | 2 ++ xen/arch/arm/dom0less-build.c | 2 ++ xen/arch/arm/hvm.c | 1 + xen/include/public/hvm/params.h | 1 + 4 files changed, 6 insertions(+) diff --git a/tools/libs/guest/xg_dom_arm.c b/tools/libs/guest/xg_dom_arm.c index 8cc7f27dbb..3c08782d1d 100644 --- a/tools/libs/guest/xg_dom_arm.c +++ b/tools/libs/guest/xg_dom_arm.c @@ -74,6 +74,8 @@ static int alloc_magic_pages(struct xc_dom_image *dom) xc_clear_domain_page(dom->xch, dom->guest_domid, base + MEMACCESS_PFN_OFFSET); xc_clear_domain_page(dom->xch, dom->guest_domid, dom->vuart_gfn); +xc_hvm_param_set(dom->xch, dom->guest_domid, HVM_PARAM_MAGIC_BASE_PFN, +base); xc_hvm_param_set(dom->xch, dom->guest_domid, HVM_PARAM_CONSOLE_PFN, dom->console_pfn); xc_hvm_param_set(dom->xch, dom->guest_domid, HVM_PARAM_STORE_PFN, diff --git a/xen/arch/arm/dom0less-build.c b/xen/arch/arm/dom0less-build.c index 40dc85c759..72187c167d 100644 --- a/xen/arch/arm/dom0less-build.c +++ b/xen/arch/arm/dom0less-build.c @@ -861,6 +861,8 @@ static int __init construct_domU(struct domain *d, free_domheap_pages(magic_pg, get_order_from_pages(NR_MAGIC_PAGES)); return rc; } + +d->arch.hvm.params[HVM_PARAM_MAGIC_BASE_PFN] = gfn_x(gfn); } return rc; diff --git a/xen/arch/arm/hvm.c b/xen/arch/arm/hvm.c index 0989309fea..fa6141e30c 100644 --- a/xen/arch/arm/hvm.c +++ b/xen/arch/arm/hvm.c @@ -55,6 +55,7 @@ static int hvm_allow_get_param(const struct domain *d, unsigned int param) case HVM_PARAM_STORE_EVTCHN: case HVM_PARAM_CONSOLE_PFN: case HVM_PARAM_CONSOLE_EVTCHN: +case HVM_PARAM_MAGIC_BASE_PFN: return 0; I know you are just adding, so more of a question for the Arm maintainers: Why does Arm have a pair of private access control functions subverting XSM? v/r, dps
Re: [PATCH 2/3] xen/arm, tools: Add a new HVM_PARAM_MAGIC_BASE_PFN key in HVMOP
On 4/26/24 02:21, Jan Beulich wrote: On 26.04.2024 05:14, Henry Wang wrote: --- a/xen/include/public/hvm/params.h +++ b/xen/include/public/hvm/params.h @@ -76,6 +76,7 @@ */ #define HVM_PARAM_STORE_PFN1 #define HVM_PARAM_STORE_EVTCHN 2 +#define HVM_PARAM_MAGIC_BASE_PFN3 #define HVM_PARAM_IOREQ_PFN5 Considering all adjacent values are used, it is overwhelmingly likely that 3 was once used, too. Such re-use needs to be done carefully. Since you need this for Arm only, that's likely okay, but doesn't go without (a) saying and (b) considering the possible future case of dom0less becoming arch-agnostic, or hyperlaunch wanting to extend the scope. Plus (c) imo this also needs at least a comment, maybe even an #ifdef, seeing how x86- focused most of the rest of this header is. I would recommend having two new params, #define HVM_PARAM_HV_RSRV_BASE_PVH 3 #define HVM_PARAM_HV_RSRV_SIZE 4 This will communicate how many pages have been reserved and where those pages are located. v/r, dps
Re: [PATCH 1/3] xen/arm/dom0less-build: Alloc magic pages for Dom0less DomUs from hypervisor
On 4/25/24 23:14, Henry Wang wrote: There are use cases (for example using the PV driver) in Dom0less setup that require Dom0less DomUs start immediately with Dom0, but initialize XenStore later after Dom0's successful boot and call to the init-dom0less application. An error message can seen from the init-dom0less application on 1:1 direct-mapped domains: ``` Allocating magic pages memory.c:238:d0v0 mfn 0x39000 doesn't belong to d1 Error on alloc magic pages ``` This is because currently the magic pages for Dom0less DomUs are populated by the init-dom0less app through populate_physmap(), and populate_physmap() automatically assumes gfn == mfn for 1:1 direct mapped domains. This cannot be true for the magic pages that are allocated later from the init-dom0less application executed in Dom0. For domain using statically allocated memory but not 1:1 direct-mapped, similar error "failed to retrieve a reserved page" can be seen as the reserved memory list is empty at that time. To solve above issue, this commit allocates the magic pages for Dom0less DomUs at the domain construction time. The base address/PFN of the magic page region will be noted and communicated to the init-dom0less application in Dom0. Might I suggest we not refer to these as magic pages? I would consider them as hypervisor reserved pages for the VM to have access to virtual platform capabilities. We may see this expand in the future for some unforeseen, new capability.
[xen-4.18-testing test] 185865: tolerable FAIL - PUSHED
flight 185865 xen-4.18-testing real [real] http://logs.test-lab.xenproject.org/osstest/logs/185865/ Failures :-/ but no regressions. Tests which did not succeed, but are not blocking: test-armhf-armhf-libvirt 16 saverestore-support-checkfail like 185724 test-amd64-amd64-xl-qemut-win7-amd64 19 guest-stopfail like 185724 test-amd64-amd64-xl-qemuu-win7-amd64 19 guest-stopfail like 185724 test-amd64-amd64-xl-qemut-ws16-amd64 19 guest-stopfail like 185724 test-amd64-amd64-xl-qemuu-ws16-amd64 19 guest-stopfail like 185724 test-amd64-amd64-qemuu-nested-amd 20 debian-hvm-install/l1/l2 fail like 185724 test-amd64-amd64-libvirt 15 migrate-support-checkfail never pass test-amd64-amd64-libvirt-xsm 15 migrate-support-checkfail never pass test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check fail never pass test-arm64-arm64-xl 15 migrate-support-checkfail never pass test-arm64-arm64-xl 16 saverestore-support-checkfail never pass test-arm64-arm64-xl-thunderx 15 migrate-support-checkfail never pass test-arm64-arm64-xl-thunderx 16 saverestore-support-checkfail never pass test-arm64-arm64-xl-credit1 15 migrate-support-checkfail never pass test-arm64-arm64-xl-credit2 15 migrate-support-checkfail never pass test-arm64-arm64-xl-credit1 16 saverestore-support-checkfail never pass test-arm64-arm64-xl-credit2 16 saverestore-support-checkfail never pass test-arm64-arm64-xl-xsm 15 migrate-support-checkfail never pass test-arm64-arm64-xl-xsm 16 saverestore-support-checkfail never pass test-arm64-arm64-libvirt-xsm 15 migrate-support-checkfail never pass test-arm64-arm64-libvirt-xsm 16 saverestore-support-checkfail never pass test-armhf-armhf-xl-arndale 15 migrate-support-checkfail never pass test-armhf-armhf-xl-arndale 16 saverestore-support-checkfail never pass test-armhf-armhf-xl-credit2 15 migrate-support-checkfail never pass test-armhf-armhf-xl-credit2 16 saverestore-support-checkfail never pass test-armhf-armhf-xl-credit1 15 migrate-support-checkfail never pass test-armhf-armhf-xl-credit1 16 saverestore-support-checkfail never pass test-amd64-amd64-libvirt-qcow2 14 migrate-support-checkfail never pass test-armhf-armhf-libvirt 15 migrate-support-checkfail never pass test-armhf-armhf-xl-rtds 15 migrate-support-checkfail never pass test-armhf-armhf-xl-rtds 16 saverestore-support-checkfail never pass test-amd64-amd64-libvirt-raw 14 migrate-support-checkfail never pass test-amd64-amd64-libvirt-vhd 14 migrate-support-checkfail never pass test-armhf-armhf-xl 15 migrate-support-checkfail never pass test-armhf-armhf-xl 16 saverestore-support-checkfail never pass test-arm64-arm64-libvirt-raw 14 migrate-support-checkfail never pass test-arm64-arm64-libvirt-raw 15 saverestore-support-checkfail never pass test-arm64-arm64-xl-vhd 14 migrate-support-checkfail never pass test-arm64-arm64-xl-vhd 15 saverestore-support-checkfail never pass test-armhf-armhf-xl-qcow214 migrate-support-checkfail never pass test-armhf-armhf-xl-qcow215 saverestore-support-checkfail never pass test-armhf-armhf-xl-multivcpu 15 migrate-support-checkfail never pass test-armhf-armhf-xl-multivcpu 16 saverestore-support-checkfail never pass test-armhf-armhf-libvirt-vhd 14 migrate-support-checkfail never pass test-armhf-armhf-libvirt-vhd 15 saverestore-support-checkfail never pass test-armhf-armhf-xl-raw 14 migrate-support-checkfail never pass test-armhf-armhf-xl-raw 15 saverestore-support-checkfail never pass version targeted for testing: xen f0ff1d9cb96041a84a24857a6464628240deed4f baseline version: xen 2d38302c33b117aa9a417056db241aefc840c2f0 Last test of basis 185724 2024-04-17 15:40:35 Z 12 days Testing same since 185865 2024-04-29 08:08:54 Z0 days1 attempts People who touched revisions under test: Jan Beulich Roger Pau Monné Ross Lagerwall jobs: build-amd64-xsm pass build-arm64-xsm pass build-i386-xsm pass build-amd64-xtf pass build-amd64 pass build-arm64 pass build-armhf pass build-i386 pass
Re: [VirtIO] Support for various devices in Xen
On 12/04/2024 11:35, Edgar E. Iglesias wrote: > On Fri, Apr 12, 2024 at 1:23 AM Stefano Stabellini > wrote: > > -Vikram +Edgar > > On Thu, 11 Apr 2024, Andrei Cherechesu wrote: >>> I've managed to successfully get a DomU up and running with the rootfs >>> based on virtio-blk. I'm running QEMU 8.2.1, Xen 4.18 + Vikram's downstream >>> patches, Linux 6.6.12-rt, built through yocto with some changes to >>> xen-tools and QEMU recipes. >>> >>> However, when also enabling PV networking through virtio-net, it seems that >>> DomU cannot successfully boot. The device model args passed by xen-tools >>> when invoking QEMU look exactly like what Vikram said they should. >>> >>> While executing `xl -v create ..` I can see some error regarding the device >>> model crashing: >>> >>> libxl: debug: libxl_exec.c:127:libxl_report_child_exitstatus: >>> domain 1 device model (dying as expected) [300] died due to fatal signal >>> Killed >>> >>> But the error is not fatal and the DomU spawn goes on, it boots but never >>> reaches prompt. It seems that kernel crashes silently at some point. >>> Though, the networking interface is present since udev tries to rename it >>> right before boot hangs: >>> >>> [4.376715] vif vif-0 enX0: renamed from eth1 >>> >>> Why would the QEMU DM process be killed, though? Invalid memory access? >>> >>> Here are the full logs for the "xl create" command [0] and for DomU's dmesg >>> [1]. >>> Any ideas as to why that might happen, some debugging insights, or maybe >>> some configuration details I could have overlooked? >>> >>> Thank you very much for your help once again. > Hi Andrei, > > I'll share some info about my setup: > I'm using: > > Xen upstream/master + virtio patches that Vikram shared > Commit 63f66058b5 on this repo/branch: > https://github.com/edgarigl/xen/tree/edgar/virtio-base > > QEMU 02e16ab9f4 upstream/master > Linux 09e5c48fea17 upstream/master (from March) > Yocto rootfs. > I had a look at your logs but I can't tell why it's failing on your side. > I've not tried using a virtio-blk as a rootfs on my side, perhaps related. > It would be useful to see a diff of your Xen tree compared to plain > 4.18 or whatever base you've got. > You probably don't have > https://github.com/edgarigl/xen/commit/63f66058b508180107963ea37217bc88d813df8f > but if that was the problem, I'd thought virtio wouldn't work at all > with your kernel it could also be related. > > My guest config looks like this: > name = "g0" > memory = 1024 > vcpus = 1 > kernel = "Image" > ramdisk = "core-image-minimal-qemuarm64.rootfs.cpio.gz" > extra = "root=/dev/ram0 console=ttyAMA0" > vif = [ 'model=virtio-net,type=ioemu,bridge=xenbr0' ] > disk = [ '/etc/xen/file.img,,xvda,backendtype=qdisk,specification=virtio' ] Hi Edgar, Stefano, Thank you for your support. Just wanted to let you know that I've managed to run everything just fine. After some more debugging, I figured the fix I needed was actually in QEMU, one which had been already merged upstream (available in v9.0.0) by Peng Fan [0], following a discussion a few months ago on this list. And since I'm running v8.2.1, my QEMU did not have it. So I can confirm granting DomU access to rootfs from an SDCard partition over virtio-blk also works. However, if I use the usual Xen PV drivers for block + virtio-net, it does not work (device model fails to spawn). But if I'm using virtio-blk + Xen PV Net, it works :). Also both VirtIOs at the same time work. This is the configuration: vif = [ 'model=virtio-net,type=ioemu' ] disk = [ 'phy:/dev/mmcblk0p3,xvda,backendtype=qdisk,specification=virtio' ] extra = "root=/dev/vda ..." Also tested them with foreign mappings and with grant based transports. Are there any other virtio device types you managed to test so far besides these ones (over virtio-mmio/virtio-grant)? Has anyone tested the rust-vmm vhost-device backends from Viresh with Xen? Regards, Andrei C [0] https://github.com/qemu/qemu/commit/9253d83062268209533df4b29859e5b51a2dc324 > > xl launches QEMU with the following args: > /usr/bin/qemu-system-aarch64 -xen-domid 1 -no-shutdown -chardev > socket,id=libxl-cmd,path=/var/run/xen/qmp-libxl-1,server=on,wait=off > -mon chardev=libxl-cmd,mode=control -chardev > socket,id=libxenstat-cmd,path=/var/run/xen/qmp-libxenstat-1,server=on,wait=off > -mon chardev=libxenstat-cmd,mode=control -nodefaults -no-user-config > -xen-attach -name g0 -vnc none -display none -nographic -global > virtio-mmio.force-legacy=false -device > virtio-net-device,id=nic0,netdev=net0,mac=00:16:3e:13:86:9c,iommu_platform=on > -netdev type=tap,id=net0,ifname=vif1.0-emu,br=xenbr0,script=no,downscript=no > -machine xenpvh -m 1024 -device > virtio-blk-device,drive=image,iommu_platform=on -drive > if=none,id=image,format=raw,file=/etc/xen/file.img -global > virtio-mmio.force-legacy=false > > Cheers, > Edgar > > >> Edgar (CCed) has recently setup a working system with QEMU and the >> xenpvh machine for
Re: [XEN PATCH v3 5/5] xen/arm: ffa: support notification
Hi Bertrand, On 29/04/2024 08:20, Bertrand Marquis wrote: From the comment in sched.h: /* * rcu_lock_domain_by_id() is more efficient than get_domain_by_id(). * This is the preferred function if the returned domain reference * is short lived, but it cannot be used if the domain reference needs * to be kept beyond the current scope (e.g., across a softirq). * The returned domain reference must be discarded using rcu_unlock_domain(). */ Now the question of short lived should be challenged but I do not think we can consider the current code as "long lived". Actually, I am not entirely sure you can use put_domain() in interrupt context. If you look at the implementation of domain_destroy() it takes a spin lock without disabling the interrupts. The same spinlock is taken in domain_create(). So there is a potential deadlock. Which means the decision between the two is not only about liveness. Cheers, -- Julien Grall
[linux-6.1 test] 185863: regressions - FAIL
flight 185863 linux-6.1 real [real] http://logs.test-lab.xenproject.org/osstest/logs/185863/ Regressions :-( Tests which did not succeed and are blocking, including tests which could not be run: build-armhf-pvops 6 kernel-build fail REGR. vs. 185746 Tests which did not succeed, but are not blocking: test-armhf-armhf-examine 1 build-check(1) blocked n/a test-armhf-armhf-libvirt 1 build-check(1) blocked n/a test-armhf-armhf-libvirt-vhd 1 build-check(1) blocked n/a test-armhf-armhf-xl 1 build-check(1) blocked n/a test-armhf-armhf-xl-arndale 1 build-check(1) blocked n/a test-armhf-armhf-xl-credit1 1 build-check(1) blocked n/a test-armhf-armhf-xl-credit2 1 build-check(1) blocked n/a test-armhf-armhf-xl-multivcpu 1 build-check(1) blocked n/a test-armhf-armhf-xl-qcow2 1 build-check(1) blocked n/a test-armhf-armhf-xl-raw 1 build-check(1) blocked n/a test-armhf-armhf-xl-rtds 1 build-check(1) blocked n/a test-amd64-amd64-xl-qemuu-win7-amd64 19 guest-stopfail like 185746 test-amd64-amd64-xl-qemuu-ws16-amd64 19 guest-stopfail like 185746 test-amd64-amd64-xl-qemut-win7-amd64 19 guest-stopfail like 185746 test-amd64-amd64-xl-qemut-ws16-amd64 19 guest-stopfail like 185746 test-amd64-amd64-qemuu-nested-amd 20 debian-hvm-install/l1/l2 fail like 185746 test-amd64-amd64-libvirt 15 migrate-support-checkfail never pass test-amd64-amd64-libvirt-xsm 15 migrate-support-checkfail never pass test-arm64-arm64-xl-thunderx 15 migrate-support-checkfail never pass test-arm64-arm64-xl-thunderx 16 saverestore-support-checkfail never pass test-arm64-arm64-libvirt-xsm 15 migrate-support-checkfail never pass test-arm64-arm64-libvirt-xsm 16 saverestore-support-checkfail never pass test-arm64-arm64-xl 15 migrate-support-checkfail never pass test-arm64-arm64-xl 16 saverestore-support-checkfail never pass test-arm64-arm64-xl-credit1 15 migrate-support-checkfail never pass test-arm64-arm64-xl-credit2 15 migrate-support-checkfail never pass test-arm64-arm64-xl-credit1 16 saverestore-support-checkfail never pass test-arm64-arm64-xl-credit2 16 saverestore-support-checkfail never pass test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check fail never pass test-arm64-arm64-xl-xsm 15 migrate-support-checkfail never pass test-arm64-arm64-xl-xsm 16 saverestore-support-checkfail never pass test-amd64-amd64-libvirt-qcow2 14 migrate-support-checkfail never pass test-amd64-amd64-libvirt-raw 14 migrate-support-checkfail never pass test-amd64-amd64-libvirt-vhd 14 migrate-support-checkfail never pass test-arm64-arm64-libvirt-raw 14 migrate-support-checkfail never pass test-arm64-arm64-libvirt-raw 15 saverestore-support-checkfail never pass test-arm64-arm64-xl-vhd 14 migrate-support-checkfail never pass test-arm64-arm64-xl-vhd 15 saverestore-support-checkfail never pass version targeted for testing: linuxf2295faba5e8249ae4082791bfc1664c88fff83a baseline version: linux6741e066ec7633450d3186946035c1f80c4226b8 Last test of basis 185746 2024-04-20 18:14:28 Z9 days Testing same since 185832 2024-04-27 15:44:00 Z2 days5 attempts People who touched revisions under test: Ai Chao Alan Stern Alex Deucher Alexander Gordeev Alexander Usyskin Alexey Izbyshev Alvaro Karsz Andi Shyti Andrew Morton AngeloGioacchino Del Regno Ard Biesheuvel Arnd Bergmann Arınç ÜNAL Bjorn Helgaas bolan wang Borislav Petkov (AMD) Borislav Petkov Brenton Simpson Carlos Llamas Carolina Jubran Catalin Marinas Chinmoy Ghosh Christophe JAILLET Chuanhong Guo Chuck Lever Coia Prant Daniel Golle Daniele Palmas Danilo Krummrich Dave Airlie Dave Hansen David S. Miller David Yang Dillon Varone Dmitry Baryshkov Dmitry Torokhov Emil Kronborg Eric Biggers Finn Thain Florian Fainelli Florian Westphal Geoffrey D. Bennett Gil Fine Greg Kroah-Hartman H. Peter Anvin (Intel) Hamza Mahfooz Hans de Goede Hardik Gajjar Hawking Zhang Hou Wenlong Ilpo Järvinen Ingo Molnar Jakub Kicinski Janusz Krzysztofik Jarkko Nikula Jason A. Donenfeld Jason Wang Jens Axboe Jeongjun Park Jerry Meng Jiri Kosina Johan Hovold Johan Hovold Jose Ignacio Tornos Martinez Josh Poimboeuf Kai-Heng Feng Kelvin Cao kernelci.org bot Konrad Dybcio Kuniyuki Iwashima Lei Chen
Re: [PATCH v2 2/3] x86: detect PIC aliasing on ports other than 0x[2A][01]
On 2023-12-18 15:48, Jan Beulich wrote: ... in order to also deny Dom0 access through the alias ports. Without this it is only giving the impression of denying access to both PICs. Unlike for CMOS/RTC, do detection very early, to avoid disturbing normal operation later on. Like for CMOS/RTC a fundamental assumption of the probing is that reads from the probed alias port won't have side effects in case it does not alias the respective PIC's one. Signed-off-by: Jan Beulich --- v2: Use new command line option. s/pic/8252A/. Re-base over new earlier patch. Use ISOLATE_LSB(). Hi, coming back to this patch, which I believe didn't receive much feedback and thus wasn't committed, for a reason: --- a/xen/arch/x86/i8259.c +++ b/xen/arch/x86/i8259.c @@ -19,6 +19,7 @@ #include #include #include +#include Here asm/setup is included, which provides the declaration for init_IRQ, defined below in the file void __init init_IRQ(void) @@ -343,6 +396,8 @@ void __init init_IRQ(void) which is defined here. This patch would, among other things, address a MISRA C Rule 8.4 violation ("A compatible declaration shall be visible when an object or function with external linkage is defined"). I did send a patch concerned only with the MISRA violation, but correctly it was pointed out that this one was doing that and more. Perhaps someone can have a look at this? init_8259A(0); +probe_8259A_alias(); + for (irq = 0; platform_legacy_irq(irq); irq++) { struct irq_desc *desc = irq_to_desc(irq); --- a/xen/arch/x86/include/asm/setup.h +++ b/xen/arch/x86/include/asm/setup.h @@ -46,6 +46,8 @@ extern uint8_t kbd_shift_flags; extern unsigned long highmem_start; #endif +extern unsigned int i8259A_alias_mask; + extern int8_t opt_smt; extern int8_t opt_probe_port_aliases; -- Nicola Vetrini, BSc Software Engineer, BUGSENG srl (https://bugseng.com)
Re: [PATCH 1/3] x86/boot: Explain how moving mod[0] works
On 2024-04-26 10:01, Andrew Cooper wrote: modules_headroom is a misleading name as it applies strictly to mod[0] only, and the movement loop is deeply unintuitive and completely undocumented. Provide help to whomever needs to look at this code next. Signed-off-by: Andrew Cooper --- CC: Jan Beulich CC: Roger Pau Monné CC: Stefano Stabellini CC: Daniel Smith CC: Christopher Clark --- xen/arch/x86/setup.c | 5 + 1 file changed, 5 insertions(+) diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c index caf235c6286d..59907fae095f 100644 --- a/xen/arch/x86/setup.c +++ b/xen/arch/x86/setup.c @@ -1432,6 +1432,11 @@ void asmlinkage __init noreturn __start_xen(unsigned long mbi_p) /* Is the region suitable for relocating the multiboot modules? */ for ( j = mbi->mods_count - 1; j >= 0; j-- ) { +/* + * 'headroom' is a guess for the decompressed size and + * decompressor overheads of mod[0] (the dom0 kernel). When we + * move mod[0], we incorperate this as extra space at the start. incorporate With that: Reviewed-by: Jason Andryuk Thanks, Jason + */ unsigned long headroom = j ? 0 : modules_headroom; unsigned long size = PAGE_ALIGN(headroom + mod[j].mod_end);
[PATCH v2 3/4] x86/cpu-policy: Simplify recalculate_xstate()
Make use of the new xstate_uncompressed_size() helper rather than maintaining the running calculation while accumulating feature components. The rest of the CPUID data can come direct from the raw cpu policy. All per-component data form an ABI through the behaviour of the X{SAVE,RSTOR}* instructions. Use for_each_set_bit() rather than opencoding a slightly awkward version of it. Mask the attributes in ecx down based on the visible features. This isn't actually necessary for any components or attributes defined at the time of writing (up to AMX), but is added out of an abundance of caution. Signed-off-by: Andrew Cooper --- CC: Jan Beulich CC: Roger Pau Monné v2: * Tie ALIGN64 to xsavec rather than xsaves. --- xen/arch/x86/cpu-policy.c | 55 +++ xen/arch/x86/include/asm/xstate.h | 1 + 2 files changed, 21 insertions(+), 35 deletions(-) diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c index 4b6d96276399..fc7933be8577 100644 --- a/xen/arch/x86/cpu-policy.c +++ b/xen/arch/x86/cpu-policy.c @@ -193,8 +193,7 @@ static void sanitise_featureset(uint32_t *fs) static void recalculate_xstate(struct cpu_policy *p) { uint64_t xstates = XSTATE_FP_SSE; -uint32_t xstate_size = XSTATE_AREA_MIN_SIZE; -unsigned int i, Da1 = p->xstate.Da1; +unsigned int i, ecx_mask = 0, Da1 = p->xstate.Da1; /* * The Da1 leaf is the only piece of information preserved in the common @@ -206,61 +205,47 @@ static void recalculate_xstate(struct cpu_policy *p) return; if ( p->basic.avx ) -{ xstates |= X86_XCR0_YMM; -xstate_size = max(xstate_size, - xstate_offsets[X86_XCR0_YMM_POS] + - xstate_sizes[X86_XCR0_YMM_POS]); -} if ( p->feat.mpx ) -{ xstates |= X86_XCR0_BNDREGS | X86_XCR0_BNDCSR; -xstate_size = max(xstate_size, - xstate_offsets[X86_XCR0_BNDCSR_POS] + - xstate_sizes[X86_XCR0_BNDCSR_POS]); -} if ( p->feat.avx512f ) -{ xstates |= X86_XCR0_OPMASK | X86_XCR0_ZMM | X86_XCR0_HI_ZMM; -xstate_size = max(xstate_size, - xstate_offsets[X86_XCR0_HI_ZMM_POS] + - xstate_sizes[X86_XCR0_HI_ZMM_POS]); -} if ( p->feat.pku ) -{ xstates |= X86_XCR0_PKRU; -xstate_size = max(xstate_size, - xstate_offsets[X86_XCR0_PKRU_POS] + - xstate_sizes[X86_XCR0_PKRU_POS]); -} -p->xstate.max_size = xstate_size; +/* Subleaf 0 */ +p->xstate.max_size = +xstate_uncompressed_size(xstates & ~XSTATE_XSAVES_ONLY); p->xstate.xcr0_low = xstates & ~XSTATE_XSAVES_ONLY; p->xstate.xcr0_high = (xstates & ~XSTATE_XSAVES_ONLY) >> 32; +/* Subleaf 1 */ p->xstate.Da1 = Da1; +if ( p->xstate.xsavec ) +ecx_mask |= XSTATE_ALIGN64; + if ( p->xstate.xsaves ) { +ecx_mask |= XSTATE_XSS; p->xstate.xss_low = xstates & XSTATE_XSAVES_ONLY; p->xstate.xss_high = (xstates & XSTATE_XSAVES_ONLY) >> 32; } -else -xstates &= ~XSTATE_XSAVES_ONLY; -for ( i = 2; i < min(63UL, ARRAY_SIZE(p->xstate.comp)); ++i ) +/* Subleafs 2+ */ +xstates &= ~XSTATE_FP_SSE; +BUILD_BUG_ON(ARRAY_SIZE(p->xstate.comp) < 63); +for_each_set_bit ( i, , 63 ) { -uint64_t curr_xstate = 1UL << i; - -if ( !(xstates & curr_xstate) ) -continue; - -p->xstate.comp[i].size = xstate_sizes[i]; -p->xstate.comp[i].offset = xstate_offsets[i]; -p->xstate.comp[i].xss= curr_xstate & XSTATE_XSAVES_ONLY; -p->xstate.comp[i].align = curr_xstate & xstate_align; +/* + * Pass through size (eax) and offset (ebx) directly. Visbility of + * attributes in ecx limited by visible features in Da1. + */ +p->xstate.raw[i].a = raw_cpu_policy.xstate.raw[i].a; +p->xstate.raw[i].b = raw_cpu_policy.xstate.raw[i].b; +p->xstate.raw[i].c = raw_cpu_policy.xstate.raw[i].c & ecx_mask; } } diff --git a/xen/arch/x86/include/asm/xstate.h b/xen/arch/x86/include/asm/xstate.h index f5115199d4f9..bfb66dd766b6 100644 --- a/xen/arch/x86/include/asm/xstate.h +++ b/xen/arch/x86/include/asm/xstate.h @@ -40,6 +40,7 @@ extern uint32_t mxcsr_mask; #define XSTATE_XSAVES_ONLY 0 #define XSTATE_COMPACTION_ENABLED (1ULL << 63) +#define XSTATE_XSS (1U << 0) #define XSTATE_ALIGN64 (1U << 1) extern u64 xfeature_mask; -- 2.30.2
[PATCH v2 4/4] x86/cpuid: Fix handling of xsave dynamic leaves
If max leaf is greater than 0xd but xsave not available to the guest, then the current XSAVE size should not be filled in. This is a latent bug for now as the guest max leaf is 0xd, but will become problematic in the future. The comment concerning XSS state is wrong. VT-x doesn't manage host/guest state automatically, but there is provision for "host only" bits to be set, so the implications are still accurate. Introduce {xstate,hw}_compressed_size() helpers to mirror the uncompressed ones. This in turn higlights a bug in xstate_init(). Defaulting this_cpu(xss) to ~0 requires a forced write to clear it back out. This in turn highlights that it's only a safe default on systems with XSAVES. Signed-off-by: Andrew Cooper --- CC: Jan Beulich CC: Roger Pau Monné The more I think about it, the more I think that cross-checking with hardware is a bad move. It's horribly expensive, and for supervisor states in particular, liable to interfere with functionality. v2: * Cope with blind reads of CPUID.0xD[1] prior to %xcr0 having been set up. --- xen/arch/x86/cpuid.c | 24 xen/arch/x86/include/asm/xstate.h | 1 + xen/arch/x86/xstate.c | 64 ++- 3 files changed, 72 insertions(+), 17 deletions(-) diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c index 7a38e032146a..a822e80c7ea7 100644 --- a/xen/arch/x86/cpuid.c +++ b/xen/arch/x86/cpuid.c @@ -330,23 +330,15 @@ void guest_cpuid(const struct vcpu *v, uint32_t leaf, case XSTATE_CPUID: switch ( subleaf ) { -case 1: -if ( !p->xstate.xsavec && !p->xstate.xsaves ) -break; - -/* - * TODO: Figure out what to do for XSS state. VT-x manages host - * vs guest MSR_XSS automatically, so as soon as we start - * supporting any XSS states, the wrong XSS will be in context. - */ -BUILD_BUG_ON(XSTATE_XSAVES_ONLY != 0); -fallthrough; case 0: -/* - * Read CPUID[0xD,0/1].EBX from hardware. They vary with enabled - * XSTATE, and appropriate XCR0|XSS are in context. - */ -res->b = cpuid_count_ebx(leaf, subleaf); +if ( p->basic.xsave ) +res->b = xstate_uncompressed_size(v->arch.xcr0); +break; + +case 1: +if ( p->xstate.xsavec ) +res->b = xstate_compressed_size(v->arch.xcr0 | +v->arch.msrs->xss.raw); break; } break; diff --git a/xen/arch/x86/include/asm/xstate.h b/xen/arch/x86/include/asm/xstate.h index bfb66dd766b6..da1d89d2f416 100644 --- a/xen/arch/x86/include/asm/xstate.h +++ b/xen/arch/x86/include/asm/xstate.h @@ -109,6 +109,7 @@ void xstate_free_save_area(struct vcpu *v); int xstate_alloc_save_area(struct vcpu *v); void xstate_init(struct cpuinfo_x86 *c); unsigned int xstate_uncompressed_size(uint64_t xcr0); +unsigned int xstate_compressed_size(uint64_t xstates); static inline uint64_t xgetbv(unsigned int index) { diff --git a/xen/arch/x86/xstate.c b/xen/arch/x86/xstate.c index a94f4025fce5..b2cf3ae6acee 100644 --- a/xen/arch/x86/xstate.c +++ b/xen/arch/x86/xstate.c @@ -614,6 +614,65 @@ unsigned int xstate_uncompressed_size(uint64_t xcr0) return size; } +static unsigned int hw_compressed_size(uint64_t xstates) +{ +uint64_t curr_xcr0 = get_xcr0(), curr_xss = get_msr_xss(); +unsigned int size; +bool ok; + +ok = set_xcr0(xstates & ~XSTATE_XSAVES_ONLY); +ASSERT(ok); +set_msr_xss(xstates & XSTATE_XSAVES_ONLY); + +size = cpuid_count_ebx(XSTATE_CPUID, 1); + +ok = set_xcr0(curr_xcr0); +ASSERT(ok); +set_msr_xss(curr_xss); + +return size; +} + +unsigned int xstate_compressed_size(uint64_t xstates) +{ +unsigned int i, size = XSTATE_AREA_MIN_SIZE; + +if ( xstates == 0 ) /* TODO: clean up paths passing 0 in here. */ +return 0; + +if ( xstates <= (X86_XCR0_SSE | X86_XCR0_FP) ) +return size; + +/* + * For the compressed size, every component matters. Some are + * automatically rounded up to 64 first. + */ +xstates &= ~XSTATE_FP_SSE; +for_each_set_bit ( i, , 63 ) +{ +if ( test_bit(i, _align) ) +size = ROUNDUP(size, 64); + +size += xstate_sizes[i]; +} + +/* In debug builds, cross-check our calculation with hardware. */ +if ( IS_ENABLED(CONFIG_DEBUG) ) +{ +unsigned int hwsize; + +xstates |= XSTATE_FP_SSE; +hwsize = hw_compressed_size(xstates); + +if ( size != hwsize ) +printk_once(XENLOG_ERR "%s(%#"PRIx64") size %#x != hwsize %#x\n", +__func__, xstates, size, hwsize); +size = hwsize; +} + +return size; +} + static bool valid_xcr0(uint64_t xcr0) { /* FP must be unconditionally set. */ @@ -681,7 +740,8 @@ void
[PATCH v2 0/4] x86/xstate: Fixes to size calculations
Various fixes and improvements to xsave image size calculations. Since v1: * Rebase over exposing XSAVEC. This has highlighted several latent bugs. * Rework the uncompressed size handling. LWP / APX_F make for much sadness. Be aware that Intel and AMD have different ABIs for the xstate layout. Andrew Cooper (4): x86/hvm: Defer the size calculation in hvm_save_cpu_xsave_states() x86/xstate: Rework xstate_ctxt_size() as xstate_uncompressed_size() x86/cpu-policy: Simplify recalculate_xstate() x86/cpuid: Fix handling of xsave dynamic leaves xen/arch/x86/cpu-policy.c | 55 ++- xen/arch/x86/cpuid.c | 24 +++ xen/arch/x86/domctl.c | 2 +- xen/arch/x86/hvm/hvm.c| 12 +++- xen/arch/x86/include/asm/xstate.h | 4 +- xen/arch/x86/xstate.c | 111 -- 6 files changed, 145 insertions(+), 63 deletions(-) -- 2.30.2
[PATCH v2 2/4] x86/xstate: Rework xstate_ctxt_size() as xstate_uncompressed_size()
We're soon going to need a compressed helper of the same form. The size of the uncompressed image depends on the single element with the largest offset+size. Sadly this isn't always the element with the largest index. Retain the cross-check with hardware in debug builds, but forgo it normal builds. In particular, this means that the migration paths don't need to mess with XCR0 just to sanity check the buffer size. No practical change. Signed-off-by: Andrew Cooper --- CC: Jan Beulich CC: Roger Pau Monné v2: * Scan all features. LWP/APX_F are out-of-order. --- xen/arch/x86/domctl.c | 2 +- xen/arch/x86/hvm/hvm.c| 2 +- xen/arch/x86/include/asm/xstate.h | 2 +- xen/arch/x86/xstate.c | 45 +++ 4 files changed, 43 insertions(+), 8 deletions(-) diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c index 9a72d57333e9..c2f2016ed45a 100644 --- a/xen/arch/x86/domctl.c +++ b/xen/arch/x86/domctl.c @@ -833,7 +833,7 @@ long arch_do_domctl( uint32_t offset = 0; #define PV_XSAVE_HDR_SIZE (2 * sizeof(uint64_t)) -#define PV_XSAVE_SIZE(xcr0) (PV_XSAVE_HDR_SIZE + xstate_ctxt_size(xcr0)) +#define PV_XSAVE_SIZE(xcr0) (PV_XSAVE_HDR_SIZE + xstate_uncompressed_size(xcr0)) ret = -ESRCH; if ( (evc->vcpu >= d->max_vcpus) || diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index 9594e0a5c530..9e677d891d6c 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -1191,7 +1191,7 @@ HVM_REGISTER_SAVE_RESTORE(CPU, hvm_save_cpu_ctxt, NULL, hvm_load_cpu_ctxt, 1, #define HVM_CPU_XSAVE_SIZE(xcr0) (offsetof(struct hvm_hw_cpu_xsave, \ save_area) + \ - xstate_ctxt_size(xcr0)) + xstate_uncompressed_size(xcr0)) static int cf_check hvm_save_cpu_xsave_states( struct vcpu *v, hvm_domain_context_t *h) diff --git a/xen/arch/x86/include/asm/xstate.h b/xen/arch/x86/include/asm/xstate.h index c08c267884f0..f5115199d4f9 100644 --- a/xen/arch/x86/include/asm/xstate.h +++ b/xen/arch/x86/include/asm/xstate.h @@ -107,7 +107,7 @@ void compress_xsave_states(struct vcpu *v, const void *src, unsigned int size); void xstate_free_save_area(struct vcpu *v); int xstate_alloc_save_area(struct vcpu *v); void xstate_init(struct cpuinfo_x86 *c); -unsigned int xstate_ctxt_size(u64 xcr0); +unsigned int xstate_uncompressed_size(uint64_t xcr0); static inline uint64_t xgetbv(unsigned int index) { diff --git a/xen/arch/x86/xstate.c b/xen/arch/x86/xstate.c index 99cedb4f5e24..a94f4025fce5 100644 --- a/xen/arch/x86/xstate.c +++ b/xen/arch/x86/xstate.c @@ -183,7 +183,7 @@ void expand_xsave_states(const struct vcpu *v, void *dest, unsigned int size) /* Check there is state to serialise (i.e. at least an XSAVE_HDR) */ BUG_ON(!v->arch.xcr0_accum); /* Check there is the correct room to decompress into. */ -BUG_ON(size != xstate_ctxt_size(v->arch.xcr0_accum)); +BUG_ON(size != xstate_uncompressed_size(v->arch.xcr0_accum)); if ( !(xstate->xsave_hdr.xcomp_bv & XSTATE_COMPACTION_ENABLED) ) { @@ -245,7 +245,7 @@ void compress_xsave_states(struct vcpu *v, const void *src, unsigned int size) u64 xstate_bv, valid; BUG_ON(!v->arch.xcr0_accum); -BUG_ON(size != xstate_ctxt_size(v->arch.xcr0_accum)); +BUG_ON(size != xstate_uncompressed_size(v->arch.xcr0_accum)); ASSERT(!xsave_area_compressed(src)); xstate_bv = ((const struct xsave_struct *)src)->xsave_hdr.xstate_bv; @@ -567,16 +567,51 @@ static unsigned int hw_uncompressed_size(uint64_t xcr0) return size; } -/* Fastpath for common xstate size requests, avoiding reloads of xcr0. */ -unsigned int xstate_ctxt_size(u64 xcr0) +unsigned int xstate_uncompressed_size(uint64_t xcr0) { +unsigned int size = XSTATE_AREA_MIN_SIZE, i; + if ( xcr0 == xfeature_mask ) return xsave_cntxt_size; if ( xcr0 == 0 ) /* TODO: clean up paths passing 0 in here. */ return 0; -return hw_uncompressed_size(xcr0); +if ( xcr0 <= (X86_XCR0_SSE | X86_XCR0_FP) ) +return size; + +/* + * For the non-legacy states, search all activate states and find the + * maximum offset+size. Some states (e.g. LWP, APX_F) are out-of-order + * with respect their index. + */ +xcr0 &= ~XSTATE_FP_SSE; +for_each_set_bit ( i, , 63 ) +{ +unsigned int s; + +ASSERT(xstate_offsets[i] && xstate_sizes[i]); + +s = xstate_offsets[i] && xstate_sizes[i]; + +size = max(size, s); +} + +/* In debug builds, cross-check our calculation with hardware. */ +if ( IS_ENABLED(CONFIG_DEBUG) ) +{ +unsigned int hwsize; + +xcr0 |= XSTATE_FP_SSE; +hwsize = hw_uncompressed_size(xcr0); + +if ( size != hwsize ) +printk_once(XENLOG_ERR "%s(%#"PRIx64") size %#x != hwsize %#x\n", +__func__,
[PATCH v2 1/4] x86/hvm: Defer the size calculation in hvm_save_cpu_xsave_states()
HVM_CPU_XSAVE_SIZE() may rewrite %xcr0 twice. Defer the caluclation it until after we've decided to write out an XSAVE record. Note in hvm_load_cpu_xsave_states() that there were versions of Xen which wrote out a useless XSAVE record. This sadly limits out ability to tidy up the existing infrastructure. Also leave a note in xstate_ctxt_size() that 0 still needs tolerating for now. No functional change. Signed-off-by: Andrew Cooper --- CC: Jan Beulich CC: Roger Pau Monné v2: * New --- xen/arch/x86/hvm/hvm.c | 10 -- xen/arch/x86/xstate.c | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index 0ce45b177cf4..9594e0a5c530 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -1197,12 +1197,13 @@ static int cf_check hvm_save_cpu_xsave_states( struct vcpu *v, hvm_domain_context_t *h) { struct hvm_hw_cpu_xsave *ctxt; -unsigned int size = HVM_CPU_XSAVE_SIZE(v->arch.xcr0_accum); +unsigned int size; int err; -if ( !cpu_has_xsave || !xsave_enabled(v) ) +if ( !xsave_enabled(v) ) return 0; /* do nothing */ +size = HVM_CPU_XSAVE_SIZE(v->arch.xcr0_accum); err = _hvm_init_entry(h, CPU_XSAVE_CODE, v->vcpu_id, size); if ( err ) return err; @@ -1255,6 +1256,11 @@ static int cf_check hvm_load_cpu_xsave_states( if ( !cpu_has_xsave ) return -EOPNOTSUPP; +/* + * Note: Xen prior to 4.12 would write out empty XSAVE records for VMs + * running on XSAVE-capable hardware but without XSAVE active. + */ + /* Customized checking for entry since our entry is of variable length */ desc = (struct hvm_save_descriptor *)>data[h->cur]; if ( sizeof (*desc) > h->size - h->cur) diff --git a/xen/arch/x86/xstate.c b/xen/arch/x86/xstate.c index cf94761d0542..99cedb4f5e24 100644 --- a/xen/arch/x86/xstate.c +++ b/xen/arch/x86/xstate.c @@ -573,7 +573,7 @@ unsigned int xstate_ctxt_size(u64 xcr0) if ( xcr0 == xfeature_mask ) return xsave_cntxt_size; -if ( xcr0 == 0 ) +if ( xcr0 == 0 ) /* TODO: clean up paths passing 0 in here. */ return 0; return hw_uncompressed_size(xcr0); -- 2.30.2
Re: [PATCH 07/15] xen/overlay: Enable device tree overlay assignment to running domains
On 29/04/2024 04:36, Henry Wang wrote: Hi Jan, Julien, Stefano, Hi Henry, On 4/24/2024 2:05 PM, Jan Beulich wrote: On 24.04.2024 05:34, Henry Wang wrote: --- a/xen/include/public/sysctl.h +++ b/xen/include/public/sysctl.h @@ -1197,7 +1197,9 @@ struct xen_sysctl_dt_overlay { #define XEN_SYSCTL_DT_OVERLAY_ADD 1 #define XEN_SYSCTL_DT_OVERLAY_REMOVE 2 uint8_t overlay_op; /* IN: Add or remove. */ - uint8_t pad[3]; /* IN: Must be zero. */ + bool domain_mapping; /* IN: True of False. */ + uint8_t pad[2]; /* IN: Must be zero. */ + uint32_t domain_id; }; If you merely re-purposed padding fields, all would be fine without bumping the interface version. Yet you don't, albeit for an unclear reason: Why uint32_t rather than domid_t? And on top of that - why a separate boolean when you could use e.g. DOMID_INVALID to indicate "no domain mapping"? I think both of your suggestion make great sense. I will follow the suggestion in v2. That said - anything taking a domain ID is certainly suspicious in a sysctl. Judging from the description you really mean this to be a domctl. Anything else will require extra justification. I also think a domctl is better. I had a look at the history of the already merged series, it looks like in the first version of merged part 1 [1], the hypercall was implemented as the domctl in the beginning but later in v2 changed to sysctl. I think this makes sense as the scope of that time is just to make Xen aware of the device tree node via Xen device tree. However this is now a problem for the current part where the scope (and the end goal) is extended to assign the added device to Linux Dom0/DomU via device tree overlays. I am not sure which way is better, should we repurposing the sysctl to domctl or maybe add another domctl (I am worrying about the duplication because basically we need the same sysctl functionality but now with a domid in it)? What do you think? I am not entirely sure this is a good idea to try to add the device in Xen and attach it to the guests at the same time. Imagine the following situation: 1) Add and attach devices 2) The domain is rebooted 3) Detach and remove devices After step 2, you technically have a new domain. You could have also a case where this is a completely different guest. So the flow would look a little bit weird (you create the DT overlay with domain A but remove with domain B). So, at the moment, it feels like the add/attach (resp detech/remove) operations should happen separately. Can you clarify why you want to add devices to Xen and attach to a guest within a single hypercall? Cheers, -- Julien Grall
[linux-linus test] 185862: tolerable FAIL - PUSHED
flight 185862 linux-linus real [real] flight 185869 linux-linus real-retest [real] http://logs.test-lab.xenproject.org/osstest/logs/185862/ http://logs.test-lab.xenproject.org/osstest/logs/185869/ Failures :-/ but no regressions. Tests which are failing intermittently (not blocking): test-amd64-amd64-xl-qemut-debianhvm-i386-xsm 12 debian-hvm-install fail pass in 185869-retest test-armhf-armhf-libvirt 8 xen-bootfail pass in 185869-retest test-armhf-armhf-xl-arndale 8 xen-bootfail pass in 185869-retest Tests which did not succeed, but are not blocking: test-armhf-armhf-libvirt 16 saverestore-support-check fail in 185869 like 185858 test-armhf-armhf-xl-arndale 15 migrate-support-check fail in 185869 never pass test-armhf-armhf-xl-arndale 16 saverestore-support-check fail in 185869 never pass test-armhf-armhf-libvirt15 migrate-support-check fail in 185869 never pass test-amd64-amd64-xl-qemut-win7-amd64 19 guest-stopfail like 185858 test-amd64-amd64-xl-qemuu-win7-amd64 19 guest-stopfail like 185858 test-amd64-amd64-xl-qemuu-ws16-amd64 19 guest-stopfail like 185858 test-amd64-amd64-qemuu-nested-amd 20 debian-hvm-install/l1/l2 fail like 185858 test-amd64-amd64-xl-qemut-ws16-amd64 19 guest-stopfail like 185858 test-amd64-amd64-libvirt 15 migrate-support-checkfail never pass test-amd64-amd64-libvirt-xsm 15 migrate-support-checkfail never pass test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check fail never pass test-arm64-arm64-xl-credit1 15 migrate-support-checkfail never pass test-arm64-arm64-xl-xsm 15 migrate-support-checkfail never pass test-arm64-arm64-xl-credit1 16 saverestore-support-checkfail never pass test-arm64-arm64-xl-xsm 16 saverestore-support-checkfail never pass test-arm64-arm64-libvirt-xsm 15 migrate-support-checkfail never pass test-arm64-arm64-libvirt-xsm 16 saverestore-support-checkfail never pass test-arm64-arm64-xl-thunderx 15 migrate-support-checkfail never pass test-arm64-arm64-xl-thunderx 16 saverestore-support-checkfail never pass test-arm64-arm64-xl-credit2 15 migrate-support-checkfail never pass test-arm64-arm64-xl-credit2 16 saverestore-support-checkfail never pass test-arm64-arm64-xl 15 migrate-support-checkfail never pass test-arm64-arm64-xl 16 saverestore-support-checkfail never pass test-armhf-armhf-xl 15 migrate-support-checkfail never pass test-armhf-armhf-xl 16 saverestore-support-checkfail never pass test-amd64-amd64-libvirt-qcow2 14 migrate-support-checkfail never pass test-amd64-amd64-libvirt-raw 14 migrate-support-checkfail never pass test-armhf-armhf-xl-multivcpu 15 migrate-support-checkfail never pass test-armhf-armhf-xl-multivcpu 16 saverestore-support-checkfail never pass test-amd64-amd64-libvirt-vhd 14 migrate-support-checkfail never pass test-armhf-armhf-xl-credit1 15 migrate-support-checkfail never pass test-armhf-armhf-xl-credit1 16 saverestore-support-checkfail never pass test-arm64-arm64-libvirt-raw 14 migrate-support-checkfail never pass test-arm64-arm64-libvirt-raw 15 saverestore-support-checkfail never pass test-arm64-arm64-xl-vhd 14 migrate-support-checkfail never pass test-arm64-arm64-xl-vhd 15 saverestore-support-checkfail never pass test-armhf-armhf-xl-qcow214 migrate-support-checkfail never pass test-armhf-armhf-xl-qcow215 saverestore-support-checkfail never pass test-armhf-armhf-libvirt-vhd 14 migrate-support-checkfail never pass test-armhf-armhf-libvirt-vhd 15 saverestore-support-checkfail never pass test-armhf-armhf-xl-raw 14 migrate-support-checkfail never pass test-armhf-armhf-xl-raw 15 saverestore-support-checkfail never pass test-armhf-armhf-xl-rtds 15 migrate-support-checkfail never pass test-armhf-armhf-xl-rtds 16 saverestore-support-checkfail never pass test-armhf-armhf-xl-credit2 15 migrate-support-checkfail never pass test-armhf-armhf-xl-credit2 16 saverestore-support-checkfail never pass version targeted for testing: linuxe67572cd2204894179d89bd7b984072f19313b03 baseline version: linux245c8e81741b51fe1281964e4a6525311be6858f Last test of basis 185858 2024-04-28 19:42:05 Z0 days Testing same since 185862 2024-04-29 05:18:20 Z0 days1 attempts People who touched revisions under test: Linus Torvalds jobs: build-amd64-xsm pass build-arm64-xsm pass build-i386-xsm
Re: [PATCH v6 8/8] xen: allow up to 16383 cpus
Hi Juergen, On 29/04/2024 12:28, Jürgen Groß wrote: On 29.04.24 13:04, Julien Grall wrote: Hi Juergen, Sorry for the late reply. On 29/04/2024 11:33, Juergen Gross wrote: On 08.04.24 09:10, Jan Beulich wrote: On 27.03.2024 16:22, Juergen Gross wrote: With lock handling now allowing up to 16384 cpus (spinlocks can handle 65535 cpus, rwlocks can handle 16384 cpus), raise the allowed limit for the number of cpus to be configured to 16383. The new limit is imposed by IOMMU_CMD_BUFFER_MAX_ENTRIES and QINVAL_MAX_ENTRY_NR required to be larger than 2 * CONFIG_NR_CPUS. Signed-off-by: Juergen Gross Acked-by: Jan Beulich I'd prefer this to also gain an Arm ack, though. Any comment from Arm side? Can you clarify what the new limits mean in term of (security) support? Are we now claiming that Xen will work perfectly fine on platforms with up to 16383? If so, I can't comment for x86, but for Arm, I am doubtful that it would work without any (at least performance) issues. AFAIK, this is also an untested configuration. In fact I would be surprised if Xen on Arm was tested with more than a couple of hundreds cores (AFAICT the Ampere CPUs has 192 CPUs). I think we should add a security support limit for the number of physical cpus similar to the memory support limit we already have in place. For x86 I'd suggest 4096 cpus for security support (basically the limit we have with this patch), but I'm open for other suggestions, too. I have no idea about any sensible limits for Arm32/Arm64. I am not entirely. Bertrand, Michal, Stefano, should we use 192 (the number of CPUs from Ampere)? Cheers, -- Julien Grall
[ovmf test] 185868: all pass - PUSHED
flight 185868 ovmf real [real] http://logs.test-lab.xenproject.org/osstest/logs/185868/ Perfect :-) All tests in this flight passed as required version targeted for testing: ovmf 094727264f887e275444bd11d9d99c651a85c2e4 baseline version: ovmf c0dfe3ec1f364dbdaf6b241e01343e560b21dd03 Last test of basis 185803 2024-04-26 03:11:18 Z3 days Testing same since 185868 2024-04-29 10:45:51 Z0 days1 attempts People who touched revisions under test: Foster Nong Liming Gao jobs: build-amd64-xsm pass build-i386-xsm pass build-amd64 pass build-i386 pass build-amd64-libvirt pass build-i386-libvirt pass build-amd64-pvopspass build-i386-pvops pass test-amd64-amd64-xl-qemuu-ovmf-amd64 pass sg-report-flight on osstest.test-lab.xenproject.org logs: /home/logs/logs images: /home/logs/images Logs, config files, etc. are available at http://logs.test-lab.xenproject.org/osstest/logs Explanation of these reports, and of osstest in general, is at http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README.email;hb=master http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README;hb=master Test harness code can be found at http://xenbits.xen.org/gitweb?p=osstest.git;a=summary Pushing revision : To xenbits.xen.org:/home/xen/git/osstest/ovmf.git c0dfe3ec1f..094727264f 094727264f887e275444bd11d9d99c651a85c2e4 -> xen-tested-master
Re: [XEN PATCH] xen/mem_access: address violations of MISRA C: 2012 Rule 8.4
On 29.04.2024 17:45, Alessandro Zucchelli wrote: > Change #ifdef CONFIG_MEM_ACCESS by OR-ing defined(CONFIG_ARM), > allowing asm/mem_access.h to be included in all ARM build configurations. > This is to address the violation of MISRA C: 2012 Rule 8.4 which states: > "A compatible declaration shall be visible when an object or function > with external linkage is defined". Functions p2m_mem_access_check > and p2m_mem_access_check_and_get_page when CONFIG_MEM_ACCESS is not > defined in ARM builds don't have visible declarations in the file > containing their definitions. > > Signed-off-by: Alessandro Zucchelli > --- > xen/include/xen/mem_access.h | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/xen/include/xen/mem_access.h b/xen/include/xen/mem_access.h > index 87d93b31f6..ec0630677d 100644 > --- a/xen/include/xen/mem_access.h > +++ b/xen/include/xen/mem_access.h > @@ -33,7 +33,7 @@ > */ > struct vm_event_st; > > -#ifdef CONFIG_MEM_ACCESS > +#if defined(CONFIG_MEM_ACCESS) || defined(CONFIG_ARM) > #include > #endif This doesn't look quite right. If Arm supports mem-access, why would it not set MEM_ACCESS=y? Whereas if it's only stubs that Arm supplies, then those would better move here, thus eliminating the need for a per-arch stub header (see what was e.g. done for numa.h). This way RISC-V and PPC (and whatever is to come) would then be taken care of as well. Jan
Re: [XEN PATCH v1 7/7] x86/MCE: optional build of AMD/Intel MCE code
On 27.04.2024 01:16, Stefano Stabellini wrote: > On Tue, 23 Apr 2024, Sergiy Kibrik wrote: >> --- a/xen/arch/x86/cpu/mcheck/Makefile >> +++ b/xen/arch/x86/cpu/mcheck/Makefile >> @@ -1,12 +1,10 @@ >> -obj-y += amd_nonfatal.o >> -obj-y += mce_amd.o >> obj-y += mcaction.o >> obj-y += barrier.o >> -obj-y += intel-nonfatal.o >> obj-y += mctelem.o >> obj-y += mce.o >> obj-y += mce-apei.o >> -obj-y += mce_intel.o >> +obj-$(CONFIG_AMD) += mce_amd.o amd_nonfatal.o >> +obj-$(CONFIG_INTEL) += mce_intel.o intel-nonfatal.o >> obj-y += non-fatal.o >> obj-y += util.o >> obj-y += vmce.o > > Awesome! Almost. I'd appreciate if the ordering of files would be retained. It's not quite alphabetic, but still. Moving mce_amd.o and mcaction.o to their designated slots may or may not be done right here. >> --- a/xen/arch/x86/cpu/mcheck/non-fatal.c >> +++ b/xen/arch/x86/cpu/mcheck/non-fatal.c >> @@ -24,14 +24,20 @@ static int __init cf_check >> init_nonfatal_mce_checker(void) >> * Check for non-fatal errors every MCE_RATE s >> */ >> switch (c->x86_vendor) { >> +#ifdef CONFIG_AMD >> case X86_VENDOR_AMD: >> case X86_VENDOR_HYGON: >> /* Assume we are on K8 or newer AMD or Hygon CPU here */ >> amd_nonfatal_mcheck_init(c); >> break; >> +#endif >> +#ifdef CONFIG_INTEL >> case X86_VENDOR_INTEL: >> intel_nonfatal_mcheck_init(c); >> break; >> +#endif >> +default: >> +return -ENODEV; This, while perhaps desirable, doesn't fit ... >> } >> printk(KERN_INFO "mcheck_poll: Machine check polling timer started.\n"); >> return 0; ... earlier behavior, and hence is somewhat unexpected in a change which, by its description, looks like a "no functional change" one. > For consistency in all other cases this patch series uses IS_ENABLED > checks. They could be used here as well. Hmm, I think for switch() statements like this (see also comments elsewhere on this series) using #ifdef is overall better. Jan
[PATCH] xen/x86: add extra pages to unpopulated-alloc if available
Commit 262fc47ac174 ('xen/balloon: don't use PV mode extra memory for zone device allocations') removed the addition of the extra memory ranges to the unpopulated range allocator, using those only for the balloon driver. This forces the unpopulated allocator to attach hotplug ranges even when spare memory (as part of the extra memory ranges) is available. Furthermore, on PVH domains it defeats the purpose of commit 38620fc4e893 ('x86/xen: attempt to inflate the memory balloon on PVH'), as extra memory ranges would only be used to map foreign memory if the kernel is built without XEN_UNPOPULATED_ALLOC support. Fix this by adding a helpers that adds the extra memory ranges to the list of unpopulated pages, and zeroes the ranges so they are not also consumed by the balloon driver. This should have been part of 38620fc4e893, hence the fixes tag. Note the current logic relies on unpopulated_init() (and hence arch_xen_unpopulated_init()) always being called ahead of balloon_init(), so that the extra memory regions are consumed by arch_xen_unpopulated_init(). Fixes: 38620fc4e893 ('x86/xen: attempt to inflate the memory balloon on PVH') Signed-off-by: Roger Pau Monné --- There's a lot of duplication between the unpopulated allocator and the balloon driver. I feel like the balloon driver should request any extra memory it needs to the unpopulated allocator, so that the current helpers provided by the XEN_BALLOON_MEMORY_HOTPLUG option could be replaced with wrappers around the unpopulated handlers. However this is much more work than strictly required here, and won't be suitable for backport IMO. Hence the more contained fix presented in this patch. --- arch/x86/xen/enlighten.c | 33 + 1 file changed, 33 insertions(+) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index a01ca255b0c6..b88722dfc4f8 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -382,3 +382,36 @@ void __init xen_add_extra_mem(unsigned long start_pfn, unsigned long n_pfns) memblock_reserve(PFN_PHYS(start_pfn), PFN_PHYS(n_pfns)); } + +#ifdef CONFIG_XEN_UNPOPULATED_ALLOC +int __init arch_xen_unpopulated_init(struct resource **res) +{ + unsigned int i; + + if (!xen_domain()) + return -ENODEV; + + /* Must be set strictly before calling xen_free_unpopulated_pages(). */ + *res = _resource; + + /* +* Initialize with pages from the extra memory regions (see +* arch/x86/xen/setup.c). +*/ + for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { + unsigned int j; + + for (j = 0; j < xen_extra_mem[i].n_pfns; j++) { + struct page *pg = + pfn_to_page(xen_extra_mem[i].start_pfn + j); + + xen_free_unpopulated_pages(1, ); + } + + /* Zero so region is not also added to the balloon driver. */ + xen_extra_mem[i].n_pfns = 0; + } + + return 0; +} +#endif -- 2.44.0
Re: [XEN PATCH v1 6/7] x86/MCE: guard call to Intel-specific intel_get_extended_msrs()
On 27.04.2024 01:14, Stefano Stabellini wrote: > On Tue, 23 Apr 2024, Sergiy Kibrik wrote: >> Add check for CONFIG_INTEL build option to conditional call of this routine, >> so that if Intel support is disabled the call would be eliminated. >> >> No functional change intended. >> >> Signed-off-by: Sergiy Kibrik > > Reviewed-by: Stefano Stabellini Acked-by: Jan Beulich Aiui this doesn't depend on earlier patches and hence could go in right away. Jan
[XEN PATCH] xen/mem_access: address violations of MISRA C: 2012 Rule 8.4
Change #ifdef CONFIG_MEM_ACCESS by OR-ing defined(CONFIG_ARM), allowing asm/mem_access.h to be included in all ARM build configurations. This is to address the violation of MISRA C: 2012 Rule 8.4 which states: "A compatible declaration shall be visible when an object or function with external linkage is defined". Functions p2m_mem_access_check and p2m_mem_access_check_and_get_page when CONFIG_MEM_ACCESS is not defined in ARM builds don't have visible declarations in the file containing their definitions. Signed-off-by: Alessandro Zucchelli --- xen/include/xen/mem_access.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xen/include/xen/mem_access.h b/xen/include/xen/mem_access.h index 87d93b31f6..ec0630677d 100644 --- a/xen/include/xen/mem_access.h +++ b/xen/include/xen/mem_access.h @@ -33,7 +33,7 @@ */ struct vm_event_st; -#ifdef CONFIG_MEM_ACCESS +#if defined(CONFIG_MEM_ACCESS) || defined(CONFIG_ARM) #include #endif -- 2.25.1
Re: [XEN PATCH v1 5/7] x86/MCE: guard {intel/amd}_mcheck_init() calls
On 23.04.2024 10:56, Sergiy Kibrik wrote: > --- a/xen/arch/x86/cpu/mcheck/mce.c > +++ b/xen/arch/x86/cpu/mcheck/mce.c > @@ -761,7 +761,8 @@ void mcheck_init(struct cpuinfo_x86 *c, bool bsp) > { > case X86_VENDOR_AMD: > case X86_VENDOR_HYGON: > -inited = amd_mcheck_init(c, bsp); > +inited = IS_ENABLED(CONFIG_AMD) ? amd_mcheck_init(c, bsp) : > + mcheck_unset; > break; > > case X86_VENDOR_INTEL: > @@ -769,7 +770,8 @@ void mcheck_init(struct cpuinfo_x86 *c, bool bsp) > { > case 6: > case 15: > -inited = intel_mcheck_init(c, bsp); > +inited = IS_ENABLED(CONFIG_INTEL) ? intel_mcheck_init(c, bsp) : > +mcheck_unset; > break; > } > break; Same question as on an earlier patch: Why set a value different from what "default:" below here does (really: keeps)? And why not arrange to have that "default:" take care of what's build-disabled? Jan
Re: [XEN PATCH v1 4/7] x86/MCE: guard lmce_support/cmci_support
On 23.04.2024 10:54, Sergiy Kibrik wrote: > Guard access to Intel-specific lmce_support & cmci_support variables in > common MCE/VMCE code. These are set in Intel-specific parts of mcheck code > and can potentially be skipped if building for non-intel platform by > disabling CONFIG_INTEL option. > > Signed-off-by: Sergiy Kibrik See comments given for patch 2. Jan
Re: [XEN PATCH v1 3/7] x86/MCE: guard access to Intel/AMD-specific MCA MSRs
On 23.04.2024 10:52, Sergiy Kibrik wrote: > --- a/xen/arch/x86/cpu/mcheck/vmce.c > +++ b/xen/arch/x86/cpu/mcheck/vmce.c > @@ -141,12 +141,14 @@ static int bank_mce_rdmsr(const struct vcpu *v, > uint32_t msr, uint64_t *val) > case X86_VENDOR_CENTAUR: > case X86_VENDOR_SHANGHAI: > case X86_VENDOR_INTEL: > -ret = vmce_intel_rdmsr(v, msr, val); > +ret = IS_ENABLED(CONFIG_INTEL) ? > + vmce_intel_rdmsr(v, msr, val) : -ENODEV; > break; > > case X86_VENDOR_AMD: > case X86_VENDOR_HYGON: > -ret = vmce_amd_rdmsr(v, msr, val); > +ret = IS_ENABLED(CONFIG_AMD) ? > + vmce_amd_rdmsr(v, msr, val) : -ENODEV; > break; Why -ENODEV when ... > default: ... below here 0 is put into "ret"? And why not have the default case take care of unsupported/unrecognized vendors uniformly? Jan
Re: [XEN PATCH v1 2/7] x86/intel: guard vmce_has_lmce() with INTEL option
On 23.04.2024 10:50, Sergiy Kibrik wrote: > Since MCG_LMCE_P bit is specific to Intel CPUs That's the case now. It could change going forward, and an underlying hypervisor might also have (obscure?) reasons to surface it elsewhere. > the code to check it can > possibly be excluded from build if !CONFIG_INTEL. With these guards > calls to vmce_has_lmce() are eliminated and mce_intel.c can end up > not being built. > > Also replace boilerplate code that checks for MCG_LMCE_P flag with > vmce_has_lmce(), which might contribute to readability a bit. Alternatively, have you considered making that function an inline one in a suitable header? Besides addressing your build issue (I think), ... > --- a/xen/arch/x86/cpu/mcheck/vmce.c > +++ b/xen/arch/x86/cpu/mcheck/vmce.c > @@ -199,7 +199,7 @@ int vmce_rdmsr(uint32_t msr, uint64_t *val) > * bits are always set in guest MSR_IA32_FEATURE_CONTROL by Xen, so > it > * does not need to check them here. > */ > -if ( cur->arch.vmce.mcg_cap & MCG_LMCE_P ) > +if ( IS_ENABLED(CONFIG_INTEL) && vmce_has_lmce(cur) ) ... doing so would alternatively also permit integrating the IS_ENABLED() into the function, rather than repeating the same ... > @@ -324,7 +324,7 @@ int vmce_wrmsr(uint32_t msr, uint64_t val) > break; > > case MSR_IA32_MCG_EXT_CTL: > -if ( (cur->arch.vmce.mcg_cap & MCG_LMCE_P) && > +if ( IS_ENABLED(CONFIG_INTEL) && vmce_has_lmce(cur) && > !(val & ~MCG_EXT_CTL_LMCE_EN) ) > cur->arch.vmce.mcg_ext_ctl = val; > else > --- a/xen/arch/x86/msr.c > +++ b/xen/arch/x86/msr.c > @@ -86,7 +86,7 @@ int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val) > goto gp_fault; > > *val = IA32_FEATURE_CONTROL_LOCK; > -if ( vmce_has_lmce(v) ) > +if ( IS_ENABLED(CONFIG_INTEL) && vmce_has_lmce(v) ) > *val |= IA32_FEATURE_CONTROL_LMCE_ON; > if ( cp->basic.vmx ) > *val |= IA32_FEATURE_CONTROL_ENABLE_VMXON_OUTSIDE_SMX; ... three times. Jan
Re: [XEN PATCH v1 1/7] x86/vpmu: separate amd/intel vPMU code
On 23.04.2024 10:48, Sergiy Kibrik wrote: > --- a/xen/arch/x86/include/asm/vpmu.h > +++ b/xen/arch/x86/include/asm/vpmu.h > @@ -11,6 +11,7 @@ > #define __ASM_X86_HVM_VPMU_H_ > > #include > +#include > > #define vcpu_vpmu(vcpu) (&(vcpu)->arch.vpmu) > #define vpmu_vcpu(vpmu) container_of((vpmu), struct vcpu, arch.vpmu) > @@ -42,9 +43,27 @@ struct arch_vpmu_ops { > #endif > }; > > +#ifdef CONFIG_INTEL > const struct arch_vpmu_ops *core2_vpmu_init(void); > +#else > +static inline const struct arch_vpmu_ops *core2_vpmu_init(void) > +{ > +return ERR_PTR(-ENODEV); > +} > +#endif > +#ifdef CONFIG_AMD > const struct arch_vpmu_ops *amd_vpmu_init(void); > const struct arch_vpmu_ops *hygon_vpmu_init(void); > +#else > +static inline const struct arch_vpmu_ops *amd_vpmu_init(void) > +{ > +return ERR_PTR(-ENODEV); > +} > +static inline const struct arch_vpmu_ops *hygon_vpmu_init(void) > +{ > +return ERR_PTR(-ENODEV); > +} > +#endif Any reason you don't follow the approach used in patch 7, putting simple #ifdef in the switch() in vpmu_init()? That would avoid the need for the three almost identical stubs. Jan
Re: [PATCH 2/5] x86/cpu-policy: Add SVM features already used by Xen
On 29/04/2024 4:16 pm, Andrew Cooper wrote: > diff --git a/xen/tools/gen-cpuid.py b/xen/tools/gen-cpuid.py > index bf3f9ec01e6e..f07b1f4cf905 100755 > --- a/xen/tools/gen-cpuid.py > +++ b/xen/tools/gen-cpuid.py > @@ -280,6 +280,9 @@ def crunch_numbers(state): > # standard 3DNow in the earlier K6 processors. > _3DNOW: [_3DNOWEXT], > > +# The SVM bit enumerates the whole SVM leave. > +SVM: list(range(NPT, NPT + 32)), This should say leaf. Fixed locally. ~Andrew
[XEN PATCH 2/3] automation: do not allow failure for triggered analyses
Do not allow_failure for triggered analyses: introducing regressions of clean guidelines will cause a CI failure. Signed-off-by: Federico Serafini --- automation/gitlab-ci/analyze.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/automation/gitlab-ci/analyze.yaml b/automation/gitlab-ci/analyze.yaml index 46c9d8e2e5..32bf570149 100644 --- a/automation/gitlab-ci/analyze.yaml +++ b/automation/gitlab-ci/analyze.yaml @@ -26,7 +26,6 @@ .eclair-analysis:triggered: extends: .eclair-analysis - allow_failure: true rules: - if: $CI_PIPELINE_SOURCE == "schedule" when: never -- 2.34.1
[XEN PATCH 0/3] automation/eclair: do not allow failure for triggered analyses
Patch 1/3 does some preparation work. Patch 2/3, as the title says, removes allow_failure = true for triggered analyses. Patch 3/3 makes explicit that initally no files are tagged as adopted, this is needed by the scheduled analysis. Federico Serafini (3): automation/eclair: tag Rule 7.2 as clean and temporarily remove Rules 1.1 and 8.2 automation: do not allow failure for triggered analyses automation/eclair: make explicit there are no adopted files by default automation/eclair_analysis/ECLAIR/analysis.ecl | 4 automation/eclair_analysis/ECLAIR/tagging.ecl | 2 +- automation/gitlab-ci/analyze.yaml | 1 - 3 files changed, 5 insertions(+), 2 deletions(-) -- 2.34.1
[XEN PATCH 3/3] automation/eclair: make explicit there are no adopted files by default
Update ECLAIR configuration to consider no adopted files by default. Signed-off-by: Federico Serafini --- automation/eclair_analysis/ECLAIR/analysis.ecl | 4 1 file changed, 4 insertions(+) diff --git a/automation/eclair_analysis/ECLAIR/analysis.ecl b/automation/eclair_analysis/ECLAIR/analysis.ecl index a604582da3..66ed7f952c 100644 --- a/automation/eclair_analysis/ECLAIR/analysis.ecl +++ b/automation/eclair_analysis/ECLAIR/analysis.ecl @@ -21,6 +21,10 @@ map_strings("scheduled-analysis",analysis_kind) -eval_file=toolchain.ecl -eval_file=public_APIs.ecl + +-doc="Initially, there are no files tagged as adopted." +-file_tag+={adopted,"none()"} + if(not(scheduled_analysis), eval_file("adopted.ecl") ) -- 2.34.1
[XEN PATCH 1/3] automation/eclair: tag Rule 7.2 as clean and temporarily remove Rules 1.1 and 8.2
Update ECLAIR configuration to consider Rule 7.2 as clean. Temporarily remove the clean tag from Rules 1.1 and 8.2: when violations of such rules will be addressed, the clean tag will be reintroduced. Signed-off-by: Federico Serafini --- automation/eclair_analysis/ECLAIR/tagging.ecl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/automation/eclair_analysis/ECLAIR/tagging.ecl b/automation/eclair_analysis/ECLAIR/tagging.ecl index d609b470eb..bdf94ed996 100644 --- a/automation/eclair_analysis/ECLAIR/tagging.ecl +++ b/automation/eclair_analysis/ECLAIR/tagging.ecl @@ -19,7 +19,7 @@ -doc_begin="Clean guidelines: new violations for these guidelines are not accepted." --service_selector={clean_guidelines_common,"MC3R1.D1.1||MC3R1.D2.1||MC3R1.D4.1||MC3R1.D4.11||MC3R1.D4.14||MC3R1.R1.1||MC3R1.R11.7||MC3R1.R11.9||MC3R1.R12.5||MC3R1.R1.3||MC3R1.R1.4||MC3R1.R14.1||MC3R1.R16.7||MC3R1.R17.1||MC3R1.R17.3||MC3R1.R17.4||MC3R1.R17.5||MC3R1.R17.6||MC3R1.R20.13||MC3R1.R20.14||MC3R1.R20.4||MC3R1.R20.9||MC3R1.R21.13||MC3R1.R21.19||MC3R1.R21.21||MC3R1.R2.2||MC3R1.R22.2||MC3R1.R22.4||MC3R1.R22.5||MC3R1.R22.6||MC3R1.R2.6||MC3R1.R3.1||MC3R1.R3.2||MC3R1.R4.1||MC3R1.R4.2||MC3R1.R5.1||MC3R1.R5.2||MC3R1.R5.4||MC3R1.R5.6||MC3R1.R6.1||MC3R1.R6.2||MC3R1.R7.1||MC3R1.R7.4||MC3R1.R8.1||MC3R1.R8.10||MC3R1.R8.12||MC3R1.R8.14||MC3R1.R8.2||MC3R1.R8.5||MC3R1.R8.6||MC3R1.R8.8||MC3R1.R9.2||MC3R1.R9.3||MC3R1.R9.4||MC3R1.R9.5" +-service_selector={clean_guidelines_common,"MC3R1.D1.1||MC3R1.D2.1||MC3R1.D4.1||MC3R1.D4.11||MC3R1.D4.14||MC3R1.R11.7||MC3R1.R11.9||MC3R1.R12.5||MC3R1.R1.3||MC3R1.R1.4||MC3R1.R14.1||MC3R1.R16.7||MC3R1.R17.1||MC3R1.R17.3||MC3R1.R17.4||MC3R1.R17.5||MC3R1.R17.6||MC3R1.R20.13||MC3R1.R20.14||MC3R1.R20.4||MC3R1.R20.9||MC3R1.R21.13||MC3R1.R21.19||MC3R1.R21.21||MC3R1.R2.2||MC3R1.R22.2||MC3R1.R22.4||MC3R1.R22.5||MC3R1.R22.6||MC3R1.R2.6||MC3R1.R3.1||MC3R1.R3.2||MC3R1.R4.1||MC3R1.R4.2||MC3R1.R5.1||MC3R1.R5.2||MC3R1.R5.4||MC3R1.R5.6||MC3R1.R6.1||MC3R1.R6.2||MC3R1.R7.1||MC3R1.R7.2||MC3R1.R7.4||MC3R1.R8.1||MC3R1.R8.10||MC3R1.R8.12||MC3R1.R8.14||MC3R1.R8.5||MC3R1.R8.6||MC3R1.R8.8||MC3R1.R9.2||MC3R1.R9.3||MC3R1.R9.4||MC3R1.R9.5" } -setq=target,getenv("XEN_TARGET_ARCH") -- 2.34.1
Re: [PATCH v3 8/8] gzip: move crc state into gunzip state
On 24.04.2024 18:34, Daniel P. Smith wrote: > Move the crc and its state into struct gunzip_state. In the process, expand > the > only use of CRC_VALUE as it is hides what is being compared. Andrew mentioned uint32_t only for the description, but I think you want (maybe even need) to go further and actually use that type also, e.g. > --- a/xen/common/gzip/gunzip.c > +++ b/xen/common/gzip/gunzip.c > @@ -20,6 +20,9 @@ struct gunzip_state { > > unsigned long bb; /* bit buffer */ > unsigned int bk; /* bits in bit buffer */ > + > +uint32_t crc_32_tab[256]; > +uint32_t crc; > }; ... not just here, but also ... > @@ -72,7 +75,7 @@ static __init void flush_window(struct gunzip_state *s) > * The window is equal to the output buffer therefore only need to > * compute the crc. > */ > -unsigned long c = crc; > +unsigned int c = s->crc; ... here. > --- a/xen/common/gzip/inflate.c > +++ b/xen/common/gzip/inflate.c > @@ -1032,16 +1032,12 @@ static int __init inflate(struct gunzip_state *s) > * > **/ > > -static ulg __initdata crc_32_tab[256]; > -static ulg __initdata crc; /* initialized in makecrc() so it'll reside in > bss */ > -#define CRC_VALUE (crc ^ 0xUL) Note how this is _not_ same as ~0u, also ... > @@ -1069,11 +1065,11 @@ static void __init makecrc(void) > if (k & 1) > c ^= e; > } > -crc_32_tab[i] = c; > +s->crc_32_tab[i] = c; > } > > /* this is initialized here so this code could reside in ROM */ > -crc = (ulg)0xUL; /* shift register contents */ > +s->crc = (ulg)~0u; /* shift register contents */ ... applicable here then: sizeof(int) >= 4, not ==. As Andrew indicates, the cast previously wasn't needed here. Keeping it is at best misleading, when s->crc isn't of that type anymore. Finally please stick to upper-case number suffixes; see all the related Misra changes, which (iirc) put in place only upper-case ones. Jan
[PATCH 1/5] x86/cpu-policy: Infrastructure for the AMD SVM and SEV leaves
Allocate two new feature leaves, and extend cpu_policy with the non-feature fields too. The CPUID dependency between the SVM bit on the whole SVM leaf is intentionally deferred, to avoid transiently breaking nested virt. Signed-off-by: Andrew Cooper --- CC: Jan Beulich CC: Roger Pau Monné CC: Stefano Stabellini CC: Xenia Ragiadakou CC: Sergiy Kibrik CC: George Dunlap CC: Andrei Semenov CC: Vaishali Thakkar --- tools/libs/light/libxl_cpuid.c | 2 ++ tools/misc/xen-cpuid.c | 10 + xen/arch/x86/cpu/common.c | 4 xen/include/public/arch-x86/cpufeatureset.h | 4 xen/include/xen/lib/x86/cpu-policy.h| 24 +++-- xen/lib/x86/cpuid.c | 4 6 files changed, 46 insertions(+), 2 deletions(-) diff --git a/tools/libs/light/libxl_cpuid.c b/tools/libs/light/libxl_cpuid.c index ce4f3c7095ba..c7a8b76f541d 100644 --- a/tools/libs/light/libxl_cpuid.c +++ b/tools/libs/light/libxl_cpuid.c @@ -342,6 +342,8 @@ int libxl_cpuid_parse_config(libxl_cpuid_policy_list *policy, const char* str) CPUID_ENTRY(0x0007, 1, CPUID_REG_EDX), MSR_ENTRY(0x10a, CPUID_REG_EAX), MSR_ENTRY(0x10a, CPUID_REG_EDX), +CPUID_ENTRY(0x800a, NA, CPUID_REG_EDX), +CPUID_ENTRY(0x801f, NA, CPUID_REG_EAX), #undef MSR_ENTRY #undef CPUID_ENTRY }; diff --git a/tools/misc/xen-cpuid.c b/tools/misc/xen-cpuid.c index 8893547bebce..ab09410a05d6 100644 --- a/tools/misc/xen-cpuid.c +++ b/tools/misc/xen-cpuid.c @@ -264,6 +264,14 @@ static const char *const str_m10Ah[32] = { }; +static const char *const str_eAd[32] = +{ +}; + +static const char *const str_e1Fa[32] = +{ +}; + static const struct { const char *name; const char *abbr; @@ -288,6 +296,8 @@ static const struct { { "CPUID 0x0007:1.edx", "7d1", str_7d1 }, { "MSR_ARCH_CAPS.lo", "m10Al", str_m10Al }, { "MSR_ARCH_CAPS.hi", "m10Ah", str_m10Ah }, +{ "CPUID 0x800a.edx", "eAd", str_eAd }, +{ "CPUID 0x801f.eax", "e1Fa", str_e1Fa }, }; #define COL_ALIGN "24" diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c index 28d7f34c4dbe..25b11e6472b8 100644 --- a/xen/arch/x86/cpu/common.c +++ b/xen/arch/x86/cpu/common.c @@ -477,6 +477,10 @@ static void generic_identify(struct cpuinfo_x86 *c) c->x86_capability[FEATURESET_e7d] = cpuid_edx(0x8007); if (c->extended_cpuid_level >= 0x8008) c->x86_capability[FEATURESET_e8b] = cpuid_ebx(0x8008); + if (c->extended_cpuid_level >= 0x800a) + c->x86_capability[FEATURESET_eAd] = cpuid_edx(0x800a); + if (c->extended_cpuid_level >= 0x801f) + c->x86_capability[FEATURESET_e1Fa] = cpuid_eax(0x801f); if (c->extended_cpuid_level >= 0x8021) c->x86_capability[FEATURESET_e21a] = cpuid_eax(0x8021); diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h index 53f13dec31f7..0f869214811e 100644 --- a/xen/include/public/arch-x86/cpufeatureset.h +++ b/xen/include/public/arch-x86/cpufeatureset.h @@ -357,6 +357,10 @@ XEN_CPUFEATURE(RFDS_CLEAR, 16*32+28) /*!A Register File(s) cleared by VE /* Intel-defined CPU features, MSR_ARCH_CAPS 0x10a.edx, word 17 */ +/* AMD-defined CPU features, CPUID level 0x800a.edx, word 18 */ + +/* AMD-defined CPU features, CPUID level 0x801f.eax, word 19 */ + #endif /* XEN_CPUFEATURE */ /* Clean up from a default include. Close the enum (for C). */ diff --git a/xen/include/xen/lib/x86/cpu-policy.h b/xen/include/xen/lib/x86/cpu-policy.h index d5e447e9dc06..936e00e4da73 100644 --- a/xen/include/xen/lib/x86/cpu-policy.h +++ b/xen/include/xen/lib/x86/cpu-policy.h @@ -22,6 +22,8 @@ #define FEATURESET_7d1 15 /* 0x0007:1.edx*/ #define FEATURESET_m10Al 16 /* 0x010a.eax */ #define FEATURESET_m10Ah 17 /* 0x010a.edx */ +#define FEATURESET_eAd 18 /* 0x800a.edx */ +#define FEATURESET_e1Fa 19 /* 0x801f.eax */ struct cpuid_leaf { @@ -296,7 +298,16 @@ struct cpu_policy uint32_t /* d */:32; uint64_t :64, :64; /* Leaf 0x8009. */ -uint64_t :64, :64; /* Leaf 0x800a - SVM rev and features. */ + +/* Leaf 0x800a - SVM rev and features. */ +uint8_t svm_rev, :8, :8, :8; +uint32_t /* b */ :32; +uint32_t nr_asids; +union { +uint32_t eAd; +struct { DECL_BITFIELD(eAd); }; +}; + uint64_t :64, :64; /* Leaf 0x800b. */ uint64_t :64, :64; /* Leaf 0x800c. */ uint64_t :64, :64; /* Leaf 0x800d. */ @@ -317,7 +328,16 @@ struct cpu_policy uint64_t :64, :64; /* Leaf 0x801c. */ uint64_t :64, :64; /* Leaf
[PATCH 4/5] x86/svm: Switch SVM features over normal cpu_has_*
Delete the boot time rendering of advanced features. It's entirely ad-hoc and not even everything printed here is used by Xen. It is available in `xen-cpuid` now. With (only) svm_load_segs_{,prefetch}() declared now in svm.h, only svm.c and domain.c which need the header. Clean up all others. No functional change. Signed-off-by: Andrew Cooper --- CC: Jan Beulich CC: Roger Pau Monné CC: Stefano Stabellini CC: Xenia Ragiadakou CC: Sergiy Kibrik CC: George Dunlap CC: Andrei Semenov CC: Vaishali Thakkar --- xen/arch/x86/hvm/svm/asid.c| 5 ++- xen/arch/x86/hvm/svm/emulate.c | 3 +- xen/arch/x86/hvm/svm/intr.c| 1 - xen/arch/x86/hvm/svm/nestedsvm.c | 14 xen/arch/x86/hvm/svm/svm.c | 50 +++--- xen/arch/x86/hvm/svm/vmcb.c| 1 - xen/arch/x86/include/asm/cpufeature.h | 10 ++ xen/arch/x86/include/asm/hvm/svm/svm.h | 36 --- 8 files changed, 31 insertions(+), 89 deletions(-) diff --git a/xen/arch/x86/hvm/svm/asid.c b/xen/arch/x86/hvm/svm/asid.c index 7977a8e86b53..6117a362d310 100644 --- a/xen/arch/x86/hvm/svm/asid.c +++ b/xen/arch/x86/hvm/svm/asid.c @@ -6,7 +6,6 @@ #include #include -#include #include "svm.h" @@ -39,7 +38,7 @@ void svm_asid_handle_vmrun(void) { vmcb_set_asid(vmcb, true); vmcb->tlb_control = -cpu_has_svm_flushbyasid ? TLB_CTRL_FLUSH_ASID : TLB_CTRL_FLUSH_ALL; +cpu_has_flush_by_asid ? TLB_CTRL_FLUSH_ASID : TLB_CTRL_FLUSH_ALL; return; } @@ -48,7 +47,7 @@ void svm_asid_handle_vmrun(void) vmcb->tlb_control = !need_flush ? TLB_CTRL_NO_FLUSH : -cpu_has_svm_flushbyasid ? TLB_CTRL_FLUSH_ASID : TLB_CTRL_FLUSH_ALL; +cpu_has_flush_by_asid ? TLB_CTRL_FLUSH_ASID : TLB_CTRL_FLUSH_ALL; } /* diff --git a/xen/arch/x86/hvm/svm/emulate.c b/xen/arch/x86/hvm/svm/emulate.c index 93ac1d3435f9..da6e21b2e270 100644 --- a/xen/arch/x86/hvm/svm/emulate.c +++ b/xen/arch/x86/hvm/svm/emulate.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include "svm.h" @@ -20,7 +19,7 @@ static unsigned long svm_nextrip_insn_length(struct vcpu *v) { struct vmcb_struct *vmcb = v->arch.hvm.svm.vmcb; -if ( !cpu_has_svm_nrips ) +if ( !cpu_has_nrips ) return 0; #ifndef NDEBUG diff --git a/xen/arch/x86/hvm/svm/intr.c b/xen/arch/x86/hvm/svm/intr.c index 4805c5567213..facd2894a2c6 100644 --- a/xen/arch/x86/hvm/svm/intr.c +++ b/xen/arch/x86/hvm/svm/intr.c @@ -17,7 +17,6 @@ #include #include #include -#include #include /* for nestedhvm_vcpu_in_guestmode */ #include #include diff --git a/xen/arch/x86/hvm/svm/nestedsvm.c b/xen/arch/x86/hvm/svm/nestedsvm.c index 35a2cbfd7d13..255af112661f 100644 --- a/xen/arch/x86/hvm/svm/nestedsvm.c +++ b/xen/arch/x86/hvm/svm/nestedsvm.c @@ -6,7 +6,6 @@ */ #include -#include #include #include #include @@ -1620,7 +1619,7 @@ void svm_nested_features_on_efer_update(struct vcpu *v) { if ( !vmcb->virt_ext.fields.vloadsave_enable && paging_mode_hap(v->domain) && - cpu_has_svm_vloadsave ) + cpu_has_v_loadsave ) { vmcb->virt_ext.fields.vloadsave_enable = 1; general2_intercepts = vmcb_get_general2_intercepts(vmcb); @@ -1629,8 +1628,7 @@ void svm_nested_features_on_efer_update(struct vcpu *v) vmcb_set_general2_intercepts(vmcb, general2_intercepts); } -if ( !vmcb->_vintr.fields.vgif_enable && - cpu_has_svm_vgif ) +if ( !vmcb->_vintr.fields.vgif_enable && cpu_has_v_gif ) { vintr = vmcb_get_vintr(vmcb); vintr.fields.vgif = svm->ns_gif; @@ -1675,8 +1673,8 @@ void __init start_nested_svm(struct hvm_function_table *hvm_function_table) */ hvm_function_table->caps.nested_virt = hvm_function_table->caps.hap && -cpu_has_svm_lbrv && -cpu_has_svm_nrips && -cpu_has_svm_flushbyasid && -cpu_has_svm_decode; +cpu_has_v_lbr && +cpu_has_nrips && +cpu_has_flush_by_asid && +cpu_has_decode_assist; } diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c index 4719fffae589..16eb875aab94 100644 --- a/xen/arch/x86/hvm/svm/svm.c +++ b/xen/arch/x86/hvm/svm/svm.c @@ -1287,7 +1287,7 @@ static void cf_check svm_inject_event(const struct x86_event *event) * that hardware doesn't perform DPL checking on injection. */ if ( event->type == X86_EVENTTYPE_PRI_SW_EXCEPTION || - (!cpu_has_svm_nrips && (event->type >= X86_EVENTTYPE_SW_INTERRUPT)) ) + (!cpu_has_nrips && (event->type >= X86_EVENTTYPE_SW_INTERRUPT)) ) svm_emul_swint_injection(&_event); switch ( _event.vector | -(_event.type == X86_EVENTTYPE_SW_INTERRUPT) ) @@ -1341,7 +1341,7 @@ static void cf_check svm_inject_event(const struct x86_event *event) switch (
[PATCH 0/5] x86: AMD CPUID handling improvements
This is (half) the series I've promised various people, to untangle some AMD CPUID handling. It moves the SVM feature leaf into the standard x86_capabilities[] infrastructure. On a random Milan system, with this in place, xen-cpuid reports: Dynamic sets: Raw 178bfbff:7eda320b:2fd3fbff:75c237ff:000f:219c95a9:0040069c:6799:91bef75f:::204d:::::::119b9cff:0101fd3f [18] CPUID 0x800a.edx npt v-lbr svm-lock nrips v-tsc-rate vmcb-cleanbits flush-by-asid decode-assist pause-filter <11> pause-thresh v-loadsave v-gif <17> npt-sss v-spec-ctrl <23> <24> <28> [19] CPUID 0x801f.eax sme sev <2> sev-es sev-snp <5> <8> <10> <11> <12> <13> <14> <15> <16> <24> Host 178bf3ff:76da320b:2fd3fbff:644037ff:000f:219c95a9:0040068c:0780:319ed205:::1845:::::::001994ff: [18] CPUID 0x800a.edx npt v-lbr svm-lock nrips v-tsc-rate vmcb-cleanbits flush-by-asid decode-assist pause-filter pause-thresh v-loadsave v-gif npt-sss v-spec-ctrl [19] CPUID 0x801f.eax HVM Max 178bfbff:f6fa3203:2e500800:440001f7:000f:219c05a9:0040060c:0100:331ed005:::1845:::::::04ab: [18] CPUID 0x800a.edx npt v-lbr nrips vmcb-cleanbits decode-assist pause-filter [19] CPUID 0x801f.eax Unforunately, I haven't managed to do the second half to make the host_policy usable earlier on boot. Untanling __setup_xen() is proving stuborn due to (ab)uses of the MB1 module list. Andrew Cooper (5): x86/cpu-policy: Infrastructure for the AMD SVM and SEV leaves x86/cpu-policy: Add SVM features already used by Xen x86/spec-ctrl: Remove open-coded check of SVM_FEATURE_SPEC_CTRL x86/svm: Switch SVM features over normal cpu_has_* x86/cpu-policy: Introduce some SEV features tools/libs/light/libxl_cpuid.c | 2 + tools/misc/xen-cpuid.c | 24 ++ xen/arch/x86/cpu-policy.c | 17 +++ xen/arch/x86/cpu/common.c | 4 ++ xen/arch/x86/hvm/svm/asid.c | 5 +-- xen/arch/x86/hvm/svm/emulate.c | 3 +- xen/arch/x86/hvm/svm/intr.c | 1 - xen/arch/x86/hvm/svm/nestedsvm.c| 14 +++--- xen/arch/x86/hvm/svm/svm.c | 50 + xen/arch/x86/hvm/svm/vmcb.c | 1 - xen/arch/x86/include/asm/cpufeature.h | 16 +++ xen/arch/x86/include/asm/hvm/svm/svm.h | 36 --- xen/arch/x86/spec_ctrl.c| 7 +-- xen/include/public/arch-x86/cpufeatureset.h | 22 + xen/include/xen/lib/x86/cpu-policy.h| 24 +- xen/lib/x86/cpuid.c | 4 ++ xen/tools/gen-cpuid.py | 7 +++ 17 files changed, 128 insertions(+), 109 deletions(-) -- 2.30.2
[PATCH 5/5] x86/cpu-policy: Introduce some SEV features
For display purposes only right now. Signed-off-by: Andrew Cooper --- CC: Jan Beulich CC: Roger Pau Monné CC: Stefano Stabellini CC: Xenia Ragiadakou CC: Sergiy Kibrik CC: George Dunlap CC: Andrei Semenov CC: Vaishali Thakkar This is only half the work to get SEV working nicely. The other half (rearranging __start_xen() so we can move the host policy collection earlier) is still a work-in-progress. --- tools/misc/xen-cpuid.c | 3 +++ xen/arch/x86/include/asm/cpufeature.h | 3 +++ xen/include/public/arch-x86/cpufeatureset.h | 4 xen/tools/gen-cpuid.py | 6 +- 4 files changed, 15 insertions(+), 1 deletion(-) diff --git a/tools/misc/xen-cpuid.c b/tools/misc/xen-cpuid.c index 0d01b0e797f1..1463e0429ba1 100644 --- a/tools/misc/xen-cpuid.c +++ b/tools/misc/xen-cpuid.c @@ -281,6 +281,9 @@ static const char *const str_eAd[32] = static const char *const str_e1Fa[32] = { +[ 0] = "sme", [ 1] = "sev", +/* 2 */ [ 3] = "sev-es", +[ 4] = "sev-snp", }; static const struct { diff --git a/xen/arch/x86/include/asm/cpufeature.h b/xen/arch/x86/include/asm/cpufeature.h index b6fb8c24423c..732f0d2bf758 100644 --- a/xen/arch/x86/include/asm/cpufeature.h +++ b/xen/arch/x86/include/asm/cpufeature.h @@ -230,6 +230,9 @@ static inline bool boot_cpu_has(unsigned int feat) #define cpu_has_v_gif boot_cpu_has(X86_FEATURE_V_GIF) #define cpu_has_v_spec_ctrl boot_cpu_has(X86_FEATURE_V_SPEC_CTRL) +/* CPUID level 0x801f.eax */ +#define cpu_has_sev boot_cpu_has(X86_FEATURE_SEV) + /* Synthesized. */ #define cpu_has_arch_perfmonboot_cpu_has(X86_FEATURE_ARCH_PERFMON) #define cpu_has_cpuid_faulting boot_cpu_has(X86_FEATURE_CPUID_FAULTING) diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h index 80d252a38c2d..7ee0f2329151 100644 --- a/xen/include/public/arch-x86/cpufeatureset.h +++ b/xen/include/public/arch-x86/cpufeatureset.h @@ -374,6 +374,10 @@ XEN_CPUFEATURE(NPT_SSS,18*32+19) /* NPT Supervisor Shadow Stacks * XEN_CPUFEATURE(V_SPEC_CTRL,18*32+20) /* Virtualised MSR_SPEC_CTRL */ /* AMD-defined CPU features, CPUID level 0x801f.eax, word 19 */ +XEN_CPUFEATURE(SME,19*32+ 0) /* Secure Memory Encryption */ +XEN_CPUFEATURE(SEV,19*32+ 1) /* Secure Encryped VM */ +XEN_CPUFEATURE(SEV_ES, 19*32+ 3) /* SEV Encrypted State */ +XEN_CPUFEATURE(SEV_SNP,19*32+ 4) /* SEV Secure Nested Paging */ #endif /* XEN_CPUFEATURE */ diff --git a/xen/tools/gen-cpuid.py b/xen/tools/gen-cpuid.py index f07b1f4cf905..bff4d9389ff6 100755 --- a/xen/tools/gen-cpuid.py +++ b/xen/tools/gen-cpuid.py @@ -281,7 +281,7 @@ def crunch_numbers(state): _3DNOW: [_3DNOWEXT], # The SVM bit enumerates the whole SVM leave. -SVM: list(range(NPT, NPT + 32)), +SVM: list(range(NPT, NPT + 32)) + [SEV], # This is just the dependency between AVX512 and AVX2 of XSTATE # feature flags. If want to use AVX512, AVX2 must be supported and @@ -341,6 +341,10 @@ def crunch_numbers(state): # The behaviour described by RRSBA depend on eIBRS being active. EIBRS: [RRSBA], + +SEV: [SEV_ES], + +SEV_ES: [SEV_SNP], } deep_features = tuple(sorted(deps.keys())) -- 2.30.2
[PATCH 3/5] x86/spec-ctrl: Remove open-coded check of SVM_FEATURE_SPEC_CTRL
Now that the SVM feature leaf has been included in normal feature handling, it is available early enough for init_speculation_mitigations() to use. No functional change. Signed-off-by: Andrew Cooper --- CC: Jan Beulich CC: Roger Pau Monné CC: Stefano Stabellini CC: Xenia Ragiadakou CC: Sergiy Kibrik CC: George Dunlap CC: Andrei Semenov CC: Vaishali Thakkar --- xen/arch/x86/include/asm/cpufeature.h | 3 +++ xen/arch/x86/spec_ctrl.c | 7 +-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/xen/arch/x86/include/asm/cpufeature.h b/xen/arch/x86/include/asm/cpufeature.h index 9bc553681f4a..77cfd900cb56 100644 --- a/xen/arch/x86/include/asm/cpufeature.h +++ b/xen/arch/x86/include/asm/cpufeature.h @@ -217,6 +217,9 @@ static inline bool boot_cpu_has(unsigned int feat) #define cpu_has_rfds_no boot_cpu_has(X86_FEATURE_RFDS_NO) #define cpu_has_rfds_clear boot_cpu_has(X86_FEATURE_RFDS_CLEAR) +/* CPUID level 0x800a.edx */ +#define cpu_has_v_spec_ctrl boot_cpu_has(X86_FEATURE_V_SPEC_CTRL) + /* Synthesized. */ #define cpu_has_arch_perfmonboot_cpu_has(X86_FEATURE_ARCH_PERFMON) #define cpu_has_cpuid_faulting boot_cpu_has(X86_FEATURE_CPUID_FAULTING) diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c index 40f6ae017010..0bda9d01def5 100644 --- a/xen/arch/x86/spec_ctrl.c +++ b/xen/arch/x86/spec_ctrl.c @@ -11,7 +11,6 @@ #include #include -#include #include #include #include @@ -1896,12 +1895,8 @@ void __init init_speculation_mitigations(void) * * No need for SCF_ist_sc_msr because Xen's value is restored * atomically WRT NMIs in the VMExit path. - * - * TODO: Adjust cpu_has_svm_spec_ctrl to be usable earlier on boot. */ -if ( opt_msr_sc_hvm && - (boot_cpu_data.extended_cpuid_level >= 0x800aU) && - (cpuid_edx(0x800aU) & (1u << SVM_FEATURE_SPEC_CTRL)) ) +if ( opt_msr_sc_hvm && cpu_has_v_spec_ctrl ) setup_force_cpu_cap(X86_FEATURE_SC_MSR_HVM); } -- 2.30.2
[PATCH 2/5] x86/cpu-policy: Add SVM features already used by Xen
These will replace svm_feature_flags and the SVM_FEATURE_* constants over the next few changes. Take the opportunity to rationalise some names. Drop the opencoded "inherit from host" logic in calculate_hvm_max_policy() and use 'h'/'!' annotations. The logic needs to operate on fs, not the policy object, given its position within the function. Drop some trailing whitespace introduced when this block of code was last moved. Signed-off-by: Andrew Cooper --- CC: Jan Beulich CC: Roger Pau Monné CC: Stefano Stabellini CC: Xenia Ragiadakou CC: Sergiy Kibrik CC: George Dunlap CC: Andrei Semenov CC: Vaishali Thakkar --- tools/misc/xen-cpuid.c | 11 +++ xen/arch/x86/cpu-policy.c | 17 + xen/include/public/arch-x86/cpufeatureset.h | 14 ++ xen/tools/gen-cpuid.py | 3 +++ 4 files changed, 33 insertions(+), 12 deletions(-) diff --git a/tools/misc/xen-cpuid.c b/tools/misc/xen-cpuid.c index ab09410a05d6..0d01b0e797f1 100644 --- a/tools/misc/xen-cpuid.c +++ b/tools/misc/xen-cpuid.c @@ -266,6 +266,17 @@ static const char *const str_m10Ah[32] = static const char *const str_eAd[32] = { +[ 0] = "npt", [ 1] = "v-lbr", +[ 2] = "svm-lock",[ 3] = "nrips", +[ 4] = "v-tsc-rate", [ 5] = "vmcb-cleanbits", +[ 6] = "flush-by-asid", [ 7] = "decode-assist", + +[10] = "pause-filter", +[12] = "pause-thresh", +/* 14 */ [15] = "v-loadsave", +[16] = "v-gif", +/* 18 */ [19] = "npt-sss", +[20] = "v-spec-ctrl", }; static const char *const str_e1Fa[32] = diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c index 4b6d96276399..da4401047e89 100644 --- a/xen/arch/x86/cpu-policy.c +++ b/xen/arch/x86/cpu-policy.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include @@ -748,22 +747,16 @@ static void __init calculate_hvm_max_policy(void) if ( !cpu_has_vmx ) __clear_bit(X86_FEATURE_PKS, fs); -/* +/* * Make adjustments to possible (nested) virtualization features exposed * to the guest */ -if ( p->extd.svm ) +if ( test_bit(X86_FEATURE_SVM, fs) ) { -/* Clamp to implemented features which require hardware support. */ -p->extd.raw[0xa].d &= ((1u << SVM_FEATURE_NPT) | - (1u << SVM_FEATURE_LBRV) | - (1u << SVM_FEATURE_NRIPS) | - (1u << SVM_FEATURE_PAUSEFILTER) | - (1u << SVM_FEATURE_DECODEASSISTS)); -/* Enable features which are always emulated. */ -p->extd.raw[0xa].d |= (1u << SVM_FEATURE_VMCBCLEAN); +/* Xen always emulates cleanbits. */ +__set_bit(X86_FEATURE_VMCB_CLEANBITS, fs); } - + guest_common_max_feature_adjustments(fs); guest_common_feature_adjustments(fs); diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h index 0f869214811e..80d252a38c2d 100644 --- a/xen/include/public/arch-x86/cpufeatureset.h +++ b/xen/include/public/arch-x86/cpufeatureset.h @@ -358,6 +358,20 @@ XEN_CPUFEATURE(RFDS_CLEAR, 16*32+28) /*!A Register File(s) cleared by VE /* Intel-defined CPU features, MSR_ARCH_CAPS 0x10a.edx, word 17 */ /* AMD-defined CPU features, CPUID level 0x800a.edx, word 18 */ +XEN_CPUFEATURE(NPT,18*32+ 0) /*h Nested PageTables */ +XEN_CPUFEATURE(V_LBR, 18*32+ 1) /*h Virtualised LBR */ +XEN_CPUFEATURE(SVM_LOCK, 18*32+ 2) /* SVM locking MSR */ +XEN_CPUFEATURE(NRIPS, 18*32+ 3) /*h Next-RIP saved on VMExit */ +XEN_CPUFEATURE(V_TSC_RATE, 18*32+ 4) /* Virtualised TSC Ratio */ +XEN_CPUFEATURE(VMCB_CLEANBITS, 18*32+ 5) /*! VMCB Clean-bits */ +XEN_CPUFEATURE(FLUSH_BY_ASID, 18*32+ 6) /* TLB Flush by ASID */ +XEN_CPUFEATURE(DECODE_ASSIST, 18*32+ 7) /*h Decode assists */ +XEN_CPUFEATURE(PAUSE_FILTER, 18*32+10) /*h Pause filter */ +XEN_CPUFEATURE(PAUSE_THRESH, 18*32+12) /* Pause filter threshold */ +XEN_CPUFEATURE(V_LOADSAVE, 18*32+15) /* Virtualised VMLOAD/SAVE */ +XEN_CPUFEATURE(V_GIF, 18*32+16) /* Virtualised GIF */ +XEN_CPUFEATURE(NPT_SSS,18*32+19) /* NPT Supervisor Shadow Stacks */ +XEN_CPUFEATURE(V_SPEC_CTRL,18*32+20) /* Virtualised MSR_SPEC_CTRL */ /* AMD-defined CPU features, CPUID level 0x801f.eax, word 19 */ diff --git a/xen/tools/gen-cpuid.py b/xen/tools/gen-cpuid.py index bf3f9ec01e6e..f07b1f4cf905 100755 --- a/xen/tools/gen-cpuid.py +++ b/xen/tools/gen-cpuid.py @@ -280,6 +280,9 @@ def crunch_numbers(state): # standard 3DNow in the earlier K6 processors. _3DNOW: [_3DNOWEXT], +# The SVM bit enumerates the whole SVM leave. +SVM: list(range(NPT, NPT + 32)), + #
Re: [PATCH v3 6/8] gzip: move output buffer into gunzip state
On 24.04.2024 18:34, Daniel P. Smith wrote: > --- a/xen/common/gzip/gunzip.c > +++ b/xen/common/gzip/gunzip.c > @@ -15,6 +15,8 @@ struct gunzip_state { > size_t insize; > /* Index of next byte to be processed in inbuf: */ > unsigned int inptr; > + > +unsigned long bytes_out; > }; The conversion to unsigned long from ... > @@ -42,7 +44,6 @@ typedef unsigned long ulg; > # define Tracecv(c, x) > #endif > > -static long __initdata bytes_out; ... this originally wants justifying in the (then no longer empty) description. It's not a lot that needs saying, but such a type change cannot go entirely silently. Jan
Re: [PATCH v3 5/8] gzip: move input buffer handling into gunzip state
On 24.04.2024 18:34, Daniel P. Smith wrote: > --- a/xen/common/gzip/gunzip.c > +++ b/xen/common/gzip/gunzip.c > @@ -10,13 +10,12 @@ struct gunzip_state { > unsigned char *window; > /* window pointer: */ > unsigned int wp; > -}; > - > -static unsigned char *__initdata inbuf; > -static unsigned int __initdata insize; > > -/* Index of next byte to be processed in inbuf: */ > -static unsigned int __initdata inptr; > +unsigned char *inbuf; > +size_t insize; > +/* Index of next byte to be processed in inbuf: */ > +unsigned int inptr; I'm puzzled by the (suddenly) different types, seeing that ... > +}; > > #define malloc(a) xmalloc_bytes(a) > #define free(a) xfree(a) > @@ -51,14 +50,14 @@ static __init void error(const char *x) > panic("%s\n", x); > } > > -static __init uch get_byte(void) { > -if ( inptr >= insize ) > +static __init uch get_byte(struct gunzip_state *s) { > +if ( s->inptr >= s->insize ) ... both are compared with one another. > @@ -1129,29 +1129,29 @@ static int __init gunzip(struct gunzip_state *s) > error("Input has invalid flags"); > return -1; > } > -NEXTBYTE(); /* Get timestamp */ > -NEXTBYTE(); > -NEXTBYTE(); > -NEXTBYTE(); > +NEXTBYTE(s); /* Get timestamp */ > +NEXTBYTE(s); > +NEXTBYTE(s); > +NEXTBYTE(s); > > -NEXTBYTE(); /* Ignore extra flags for the moment */ > -NEXTBYTE(); /* Ignore OS type for the moment */ > +NEXTBYTE(s); /* Ignore extra flags for the moment */ > +NEXTBYTE(s); /* Ignore OS type for the moment */ > > if ((flags & EXTRA_FIELD) != 0) { > -unsigned len = (unsigned)NEXTBYTE(); > -len |= ((unsigned)NEXTBYTE())<<8; > -while (len--) (void)NEXTBYTE(); > +unsigned len = (unsigned)NEXTBYTE(s); > +len |= ((unsigned)NEXTBYTE(s))<<8; > +while (len--) (void)NEXTBYTE(s); Would you mind moving the body of this while() to its own line, as you touch this anyway? > } > > /* Get original file name if it was truncated */ > if ((flags & ORIG_NAME) != 0) { > /* Discard the old name */ > -while (NEXTBYTE() != 0) /* null */ ; > +while (NEXTBYTE(s) != 0) /* null */ ; > } > > /* Discard file comment if any */ > if ((flags & COMMENT) != 0) { > -while (NEXTBYTE() != 0) /* null */ ; > +while (NEXTBYTE(s) != 0) /* null */ ; > } For these two doing the same may help, too. Jan
[PATCH 5/9] create-diff-object: move kpatch_include_symbol()
So it can be used by kpatch_process_special_sections() in further changes. Non functional change. Signed-off-by: Roger Pau Monné --- create-diff-object.c | 74 ++-- 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/create-diff-object.c b/create-diff-object.c index 6a751bf3b789..7674d972e301 100644 --- a/create-diff-object.c +++ b/create-diff-object.c @@ -1198,6 +1198,43 @@ void kpatch_update_ex_table_addend(struct kpatch_elf *kelf, } } +#define inc_printf(fmt, ...) \ + log_debug("%*s" fmt, recurselevel, "", ##__VA_ARGS__); + +static void kpatch_include_symbol(struct symbol *sym, int recurselevel) +{ + struct rela *rela; + struct section *sec; + + inc_printf("start include_symbol(%s)\n", sym->name); + sym->include = 1; + inc_printf("symbol %s is included\n", sym->name); + /* +* Check if sym is a non-local symbol (sym->sec is NULL) or +* if an unchanged local symbol. This a base case for the +* inclusion recursion. +*/ + if (!sym->sec || sym->sec->include || + (sym->type != STT_SECTION && sym->status == SAME)) + goto out; + sec = sym->sec; + sec->include = 1; + inc_printf("section %s is included\n", sec->name); + if (sec->secsym && sec->secsym != sym) { + sec->secsym->include = 1; + inc_printf("section symbol %s is included\n", sec->secsym->name); + } + if (!sec->rela) + goto out; + sec->rela->include = 1; + inc_printf("section %s is included\n", sec->rela->name); + list_for_each_entry(rela, >rela->relas, list) + kpatch_include_symbol(rela->sym, recurselevel+1); +out: + inc_printf("end include_symbol(%s)\n", sym->name); + return; +} + static void kpatch_regenerate_special_section(struct kpatch_elf *kelf, struct special_section *special, struct section *sec) @@ -1455,43 +1492,6 @@ static void kpatch_include_standard_string_elements(struct kpatch_elf *kelf) } } -#define inc_printf(fmt, ...) \ - log_debug("%*s" fmt, recurselevel, "", ##__VA_ARGS__); - -static void kpatch_include_symbol(struct symbol *sym, int recurselevel) -{ - struct rela *rela; - struct section *sec; - - inc_printf("start include_symbol(%s)\n", sym->name); - sym->include = 1; - inc_printf("symbol %s is included\n", sym->name); - /* -* Check if sym is a non-local symbol (sym->sec is NULL) or -* if an unchanged local symbol. This a base case for the -* inclusion recursion. -*/ - if (!sym->sec || sym->sec->include || - (sym->type != STT_SECTION && sym->status == SAME)) - goto out; - sec = sym->sec; - sec->include = 1; - inc_printf("section %s is included\n", sec->name); - if (sec->secsym && sec->secsym != sym) { - sec->secsym->include = 1; - inc_printf("section symbol %s is included\n", sec->secsym->name); - } - if (!sec->rela) - goto out; - sec->rela->include = 1; - inc_printf("section %s is included\n", sec->rela->name); - list_for_each_entry(rela, >rela->relas, list) - kpatch_include_symbol(rela->sym, recurselevel+1); -out: - inc_printf("end include_symbol(%s)\n", sym->name); - return; -} - static int kpatch_include_changed_functions(struct kpatch_elf *kelf) { struct symbol *sym; -- 2.44.0
[PATCH 9/9] create-diff-object: account for special section changes
When deciding whether there's content to generate a payload also take into account whether special sections have changed. Initially account for changes to alternative related section to cause the generation of a livepatch. Note that accounting for hook sections is already done by kpatch_include_hook_elements() when deciding whether there's changed content in the object file. .bugframe sections are also not accounted for since any section in a bugframe section will be accompanied by a change in the function the bugframe references (the placement of the BUG_INSTR will change in the caller function). Signed-off-by: Roger Pau Monné --- create-diff-object.c | 24 1 file changed, 24 insertions(+) diff --git a/create-diff-object.c b/create-diff-object.c index 8bfb6bf92a1b..957631097b8a 100644 --- a/create-diff-object.c +++ b/create-diff-object.c @@ -1663,6 +1663,28 @@ static int kpatch_include_hook_elements(struct kpatch_elf *kelf) return num_new_functions; } +static unsigned int include_special_sections(const struct kpatch_elf *kelf) +{ + const struct section *sec; + unsigned int nr = 0; + + /* +* Only account for changes in alternatives and exception table related +* sections. Hooks are already taken care of separately, and changes +* in bug_frame sections always go along with changes in the caller +* functions. +*/ + list_for_each_entry(sec, >sections, list) + if (sec->status != SAME && + (!strcmp(sec->name, ".altinstructions") || +!strcmp(sec->name, ".altinstr_replacement") || +!strcmp(sec->name, ".ex_table") || +!strcmp(sec->name, ".fixup"))) + nr++; + + return nr; +} + static int kpatch_include_new_globals(struct kpatch_elf *kelf) { struct symbol *sym; @@ -2469,6 +2491,8 @@ int main(int argc, char *argv[]) kpatch_include_debug_sections(kelf_patched); log_debug("Include hook elements\n"); num_changed += kpatch_include_hook_elements(kelf_patched); + log_debug("Include special sections\n"); + num_changed += include_special_sections(kelf_patched); log_debug("num_changed = %d\n", num_changed); log_debug("Include new globals\n"); new_globals_exist = kpatch_include_new_globals(kelf_patched); -- 2.44.0
[PATCH 7/9] create-diff-object: don't account for changes to .bug_frame.? sections
bug_frame related sections exclusively contain addresses that reference back to the address where the BUG_INSTR is executed. As such, any change to the contents of bug_frame sections (or it's relocations) will necessarily require a change in the caller function, as the placement of the BUG_INSTR must have changed. Take advantage of this relocation, and unconditionally mark the bug_frame sections themselves as not having changed, the logic in kpatch_regenerate_special_section() will already take care of including any bug_frame element group that references a function that has changed. This should be a non functional change in the payload generated by create-diff-object, but needs doing so that we can take into account changes to .altinstructions and .ex_table sections themselves without unnecessarily also pulling .bug_frame sections. Signed-off-by: Roger Pau Monné --- create-diff-object.c | 11 +++ 1 file changed, 11 insertions(+) diff --git a/create-diff-object.c b/create-diff-object.c index f4e4da063d0a..d1b1477be1cd 100644 --- a/create-diff-object.c +++ b/create-diff-object.c @@ -1284,6 +1284,17 @@ static void kpatch_regenerate_special_section(struct kpatch_elf *kelf, } } + /* +* For bug_frame sections don't care if the section itself or the +* relocations have changed, as any change in the bug_frames will be +* accompanied by a change in the caller function text, as the +* BUG_INSTR will have a different placement in the caller. +*/ + if (!strncmp(special->name, ".bug_frames.", strlen(".bug_frames."))) { + sec->status = SAME; + sec->base->status = SAME; + } + for ( ; src_offset < sec->base->sh.sh_size; src_offset += group_size) { group_size = special->group_size(kelf, src_offset); -- 2.44.0
[PATCH 3/9] livepatch-build: fix detection of structure sizes
The current runes assume that in the list of DWARF tags DW_AT_byte_size will come after DW_AT_name, but that's not always the case. On one of my builds I've seen: DW_AT_name: (indirect string, offset: 0x3c45): exception_table_entry DW_AT_declaration : 1 <1>: Abbrev Number: 5 (DW_TAG_const_type) DW_AT_type: <0xb617> <1>: Abbrev Number: 14 (DW_TAG_pointer_type) DW_AT_byte_size : 8 Instead of assuming such order, explicitly search for the DW_AT_byte_size tag when a match in the DW_AT_name one is found. Signed-off-by: Roger Pau Monné --- All this ad hoc parsing of DWARF data seems very fragile. This is an improvement over the current logic, but I would still prefer if we could find a more reliable way to obtain the struct sizes we need. --- livepatch-build | 7 +++ 1 file changed, 7 insertions(+) diff --git a/livepatch-build b/livepatch-build index f3ca9399d149..aad9849f2ba9 100755 --- a/livepatch-build +++ b/livepatch-build @@ -427,9 +427,16 @@ if [ "${SKIP}" != "build" ]; then SPECIAL_VARS=$(readelf -wi "$OUTPUT/xen-syms" | awk ' BEGIN { a = b = e = 0 } + # Ensure no fall through to the next tag without getting the size + (a == 1 || b == 1 || e == 1) && /DW_AT_name/ \ + {print "can not get special structure size" > "/dev/stderr"; exit 1} a == 0 && /DW_AT_name.* alt_instr/ {a = 1; next} b == 0 && /DW_AT_name.* bug_frame/ {b = 1; next} e == 0 && /DW_AT_name.* exception_table_entry/ {e = 1; next} + # Seek the line that contains the size + a == 1 && !/DW_AT_byte_size/ {next} + b == 1 && !/DW_AT_byte_size/ {next} + e == 1 && !/DW_AT_byte_size/ {next} # Use shell printf to (possibly) convert from hex to decimal a == 1 {printf("export ALT_STRUCT_SIZE=`printf \"%%d\" \"%s\"`\n", $4); a = 2} b == 1 {printf("export BUG_STRUCT_SIZE=`printf \"%%d\" \"%s\"`\n", $4); b = 2} -- 2.44.0
[PATCH 1/9] livepatch-build-tools: allow patch file name sizes up to 127 characters
XenServer uses quite long Xen version names, and encode such in the livepatch filename, and it's currently running out of space in the file name. Bump max filename size to 127, so it also matches the patch name length in the hypervisor interface. Note the size of the buffer is 128 character, and the last one is reserved for the null terminator. Signed-off-by: Roger Pau Monné --- Changes since v1: - Take into account the null terminator. --- livepatch-build | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/livepatch-build b/livepatch-build index 948b2acfc2f6..f3ca9399d149 100755 --- a/livepatch-build +++ b/livepatch-build @@ -72,8 +72,9 @@ function make_patch_name() fi # Only allow alphanumerics and '_' and '-' in the patch name. Everything -# else is replaced with '-'. Truncate to 48 chars. -echo ${PATCHNAME//[^a-zA-Z0-9_-]/-} |cut -c 1-48 +# else is replaced with '-'. Truncate to 127 chars +# (XEN_LIVEPATCH_NAME_SIZE - 1). +echo ${PATCHNAME//[^a-zA-Z0-9_-]/-} |cut -c -127 } # Do a full normal build -- 2.44.0
[PATCH 6/9] create-diff-object: detect changes in .altinstr_replacement and .fixup sections
The current logic to detect changes in special sections elements will only include group elements that reference function symbols that need including (either because they have changed or are new). This works fine in order to detect when a special section element needs including because of the function is points has changed or is newly introduced, but doesn't detect changes to the replacement code in .altinstr_replacement or .fixup sections, as the relocations in that case are against STT_SECTION symbols. Fix this by also including the special section group if the symbol the relocations points to is of type section. Signed-off-by: Roger Pau Monné --- create-diff-object.c | 29 +++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/create-diff-object.c b/create-diff-object.c index 7674d972e301..f4e4da063d0a 100644 --- a/create-diff-object.c +++ b/create-diff-object.c @@ -1158,11 +1158,21 @@ static int should_keep_rela_group(struct section *sec, int start, int size) struct rela *rela; int found = 0; - /* check if any relas in the group reference any changed functions */ + /* +* Check if any relas in the group reference any changed functions. +* +* Relocations in the .altinstructions and .ex_table special sections +* can also reference changed section symbols, since the replacement +* text in both cases resides on a section that has no function symbols +* (.altinstr_replacement and .fixup respectively). Account for that +* and also check whether the referenced symbols are section ones in +* order to decide whether the relocation group needs including. +*/ list_for_each_entry(rela, >relas, list) { if (rela->offset >= start && rela->offset < start + size && - rela->sym->type == STT_FUNC && + (rela->sym->type == STT_FUNC || +rela->sym->type == STT_SECTION) && rela->sym->sec->include) { found = 1; log_debug("new/changed symbol %s found in special section %s\n", @@ -1338,6 +1348,21 @@ static void kpatch_regenerate_special_section(struct kpatch_elf *kelf, rela->sym->include = 1; + /* +* Changes that cause only the +* .altinstr_replacement or .fixup sections to +* be modified need the target function of the +* replacement to also be marked as CHANGED, +* otherwise livepatch won't replace the +* function, and the new replacement code won't +* take effect. +*/ + if (rela->sym->type == STT_FUNC && + rela->sym->status == SAME) { + rela->sym->status = CHANGED; + kpatch_include_symbol(rela->sym, 0); + } + if (!strcmp(special->name, ".fixup")) kpatch_update_ex_table_addend(kelf, special, src_offset, -- 2.44.0
[PATCH 8/9] create-diff-object: account for changes in the special section itself
Changes to special sections are not accounted for right now. For example a change that only affects .altinstructions or .ex_table won't be included in the livepatch payload, if none of the symbols referenced by those sections have also changed. Since it's not possible to know which elements in the special group have changed if we detect a change in the special section the whole group (minus elements that have references to init section symbols) must be included. Signed-off-by: Roger Pau Monné --- create-diff-object.c | 33 ++--- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/create-diff-object.c b/create-diff-object.c index d1b1477be1cd..8bfb6bf92a1b 100644 --- a/create-diff-object.c +++ b/create-diff-object.c @@ -1170,13 +1170,32 @@ static int should_keep_rela_group(struct section *sec, int start, int size) */ list_for_each_entry(rela, >relas, list) { if (rela->offset >= start && - rela->offset < start + size && - (rela->sym->type == STT_FUNC || -rela->sym->type == STT_SECTION) && - rela->sym->sec->include) { - found = 1; - log_debug("new/changed symbol %s found in special section %s\n", - rela->sym->name, sec->name); + rela->offset < start + size) + { + /* +* Avoid including groups with relocations against +* symbols living in init sections, those are never +* included in livepatches. +*/ + if (is_init_section(rela->sym->sec)) + return 0; + + /* +* If the base section has changed, always include all +* groups (except those pointing to .init section +* symbols), as we have no way to differentiate which +* groups have changed. +*/ + if (sec->status != SAME || sec->base->status != SAME) + found = 1; + else if ((rela->sym->type == STT_FUNC || + rela->sym->type == STT_SECTION) && +rela->sym->sec->include) { + found = 1; + log_debug( + "new/changed symbol %s found in special section %s\n", + rela->sym->name, sec->name); + } } } -- 2.44.0
[PATCH 4/9] create-diff-object: document kpatch_regenerate_special_section() behavior
The purpose of kpatch_regenerate_special_section() is fairly opaque without spending a good amount of time and having quite a lot of knowledge about what the special sections contains. Introduce some comments in order to give context and describe the expected functionality. Signed-off-by: Roger Pau Monné --- create-diff-object.c | 33 + 1 file changed, 33 insertions(+) diff --git a/create-diff-object.c b/create-diff-object.c index d8a2afbf2774..6a751bf3b789 100644 --- a/create-diff-object.c +++ b/create-diff-object.c @@ -1210,6 +1210,12 @@ static void kpatch_regenerate_special_section(struct kpatch_elf *kelf, src_offset = 0; dest_offset = 0; + + /* +* Special sections processed here are array objects, hence in order to +* detect whether a special section needs processing attempt to get the +* element size. Returning a size of 0 means no processing required. +*/ group_size = special->group_size(kelf, src_offset); if (group_size == 0) { log_normal("Skipping regeneration of a special section: %s\n", @@ -1246,6 +1252,33 @@ static void kpatch_regenerate_special_section(struct kpatch_elf *kelf, if (src_offset + group_size > sec->base->sh.sh_size) group_size = sec->base->sh.sh_size - src_offset; + /* +* Special sections handled perform a bunch of different tasks, +* but they all have something in common: they are array like +* sections that reference functions in the object file being +* processed. +* +* .bug_frames.* relocations reference the symbol (plus offset) +* where the exception is triggered from. +* +* .altinstructions relocations contain references to +* coordinates where the alternatives are to be applied, plus +* coordinates that point to the replacement code in +* .altinstr_replacement. +* +* .ex_table relocations contain references to the coordinates +* where the fixup code should be executed, plus relocation +* coordinates that point to the text code to execte living in +* the .fixup section. +* +* .livepatch.hooks.* relocations point to the hook +* functions. +* +* Such dependencies allow to make a decision on whether an +* element in the array needs including in the livepatch: if +* the symbol pointed by the relocation is new or has changed +* the array element needs including. +*/ include = should_keep_rela_group(sec, src_offset, group_size); if (!include) -- 2.44.0
[PATCH 0/9] livepatch-build-tools: some bug fixes and improvements
Hello, The first 3 patches in the series attempt to fix some bugs, I don't think they will be specially controversial or difficult to review (patch 1 was already posted standalone). The rest of the patches are more convoluted, as they attempt to solve some shortcomings when attempting to create livepatches that change alternatives or exceptions. For example a patch that only changes content in alternative blocks (the code that ends up in the .altinstr_replacement section) won't work, as create-diff-object will report that there are no changes. I've attempted to test as many corner cases as I could come up with, but it's hard to figure all the possible changes, plus it's also fairly easy to inadvertently regress existing functionality. Thanks, Roger. Roger Pau Monne (9): livepatch-build-tools: allow patch file name sizes up to 127 characters create-diff-object: update default alt_instr size livepatch-build: fix detection of structure sizes create-diff-object: document kpatch_regenerate_special_section() behavior create-diff-object: move kpatch_include_symbol() create-diff-object: detect changes in .altinstr_replacement and .fixup sections create-diff-object: don't account for changes to .bug_frame.? sections create-diff-object: account for changes in the special section itself create-diff-object: account for special section changes create-diff-object.c | 196 +-- livepatch-build | 12 ++- 2 files changed, 161 insertions(+), 47 deletions(-) -- 2.44.0
[PATCH 2/9] create-diff-object: update default alt_instr size
The size of the alt_instr structure in Xen is 14 instead of 12 bytes, adjust it. Signed-off-by: Roger Pau Monné --- create-diff-object.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/create-diff-object.c b/create-diff-object.c index fed360a9aa68..d8a2afbf2774 100644 --- a/create-diff-object.c +++ b/create-diff-object.c @@ -1000,7 +1000,7 @@ static int altinstructions_group_size(struct kpatch_elf *kelf, int offset) char *str; if (!size) { str = getenv("ALT_STRUCT_SIZE"); - size = str ? atoi(str) : 12; + size = str ? atoi(str) : 14; } log_debug("altinstr_size=%d\n", size); -- 2.44.0
Re: [PATCH] revert "x86/mm: re-implement get_page_light() using an atomic increment"
On Mon, Apr 29, 2024 at 03:01:00PM +0200, Jan Beulich wrote: > revert "x86/mm: re-implement get_page_light() using an atomic increment" > > This reverts commit c40bc0576dcc5acd4d7e22ef628eb4642f568533. > > That change aimed at eliminating an open-coded lock-like construct, > which really isn't all that similar to, in particular, get_page(). The > function always succeeds. Any remaining concern would want taking care > of by placing block_lock_speculation() at the end of the function. > Since the function is called only during page (de)validation, any > possible performance concerns over such extra serialization could > likely be addressed by pre-validating (e.g. via pinning) page tables. > > The fundamental issue with the change being reverted is that it detects > bad state only after already having caused possible corruption. While > the system is going to be halted in such an event, there is a time > window during which the resulting incorrect state could be leveraged by > a clever (in particular: fast enough) attacker. > > Signed-off-by: Jan Beulich Acked-by: Roger Pau Monné Thanks.
Re: [PATCH v3 4/8] gzip: move window pointer into gunzip state
On 24.04.2024 18:34, Daniel P. Smith wrote: > Move the window pointer, outcnt/wp, into struct gunzip_data. It was > erroneously > labeled as outcnt and then define aliased to wp, this eliminates the aliasing > and only refers to as wp, the window pointer. > > Signed-off-by: Daniel P. Smith Acked-by: Jan Beulich
Re: [PATCH v3 2/8] gzip: refactor and expand macros
On 24.04.2024 18:34, Daniel P. Smith wrote: > This commit refactors macros into proper static functions. It in-place expands > the `flush_output` macro, allowing the clear removal of the dead code > underneath the `underrun` label. But it's NEXTBYTE() which uses the label, not flush_output(). I'm actually unconvinced of its expanding / removal. > --- a/xen/common/gzip/gunzip.c > +++ b/xen/common/gzip/gunzip.c > @@ -25,8 +25,6 @@ typedef unsigned char uch; > typedef unsigned short ush; > typedef unsigned long ulg; > > -#define get_byte() (inptr < insize ? inbuf[inptr++] : fill_inbuf()) > - > /* Diagnostic functions */ > #ifdef DEBUG > # define Assert(cond, msg) do { if (!(cond)) error(msg); } while (0) > @@ -52,10 +50,14 @@ static __init void error(const char *x) > panic("%s\n", x); > } > > -static __init int fill_inbuf(void) > -{ > -error("ran out of input data"); > -return 0; I'm not convinced of the removal of this as a separate function. It only calling error() right now could change going forward, so I'd at least expect a little bit of a justification. > +static __init uch get_byte(void) { Nit: Brace goes on its own line. Also for (effectively) new code it would be nice if __init (and alike) would be placed "canonically", i.e. between type and identifier. > --- a/xen/common/gzip/inflate.c > +++ b/xen/common/gzip/inflate.c > @@ -119,6 +119,18 @@ static char rcsid[] = "#Id: inflate.c,v 0.14 1993/06/10 > 13:27:04 jloup Exp #"; > > #endif /* !__XEN__ */ > > +/* > + * The inflate algorithm uses a sliding 32 K byte window on the uncompressed > + * stream to find repeated byte strings. This is implemented here as a > + * circular buffer. The index is updated simply by incrementing and then > + * ANDing with 0x7fff (32K-1). > + * > + * It is left to other modules to supply the 32 K area. It is assumed > + * to be usable as if it were declared "uch slide[32768];" or as just > + * "uch *slide;" and then malloc'ed in the latter case. The definition > + * must be in unzip.h, included above. Nit: s/definition/declaration/ ? > + */ > +#define wp outcnt > #define slide window > > /* > @@ -150,21 +162,6 @@ static int inflate_dynamic(void); > static int inflate_block(int *); > static int inflate(void); > > -/* > - * The inflate algorithm uses a sliding 32 K byte window on the uncompressed > - * stream to find repeated byte strings. This is implemented here as a > - * circular buffer. The index is updated simply by incrementing and then > - * ANDing with 0x7fff (32K-1). > - * > - * It is left to other modules to supply the 32 K area. It is assumed > - * to be usable as if it were declared "uch slide[32768];" or as just > - * "uch *slide;" and then malloc'ed in the latter case. The definition > - * must be in unzip.h, included above. Oh, an earlier comment just moves up. Is there really a need for this extra churn? > @@ -224,7 +221,7 @@ static const ush mask_bits[] = { > 0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0x > }; > > -#define NEXTBYTE() ({ int v = get_byte(); if (v < 0) goto underrun; (uch)v; > }) > +#define NEXTBYTE() (get_byte()) /* get_byte will panic on failure */ Nit: No need for the outer parentheses. > @@ -1148,8 +1135,8 @@ static int __init gunzip(void) > NEXTBYTE(); > NEXTBYTE(); > > -(void)NEXTBYTE(); /* Ignore extra flags for the moment */ > -(void)NEXTBYTE(); /* Ignore OS type for the moment */ > +NEXTBYTE(); /* Ignore extra flags for the moment */ > +NEXTBYTE(); /* Ignore OS type for the moment */ In Misra discussions there were indications that such casts may need (re-) introducing. Perhaps better leave this alone, the more when it's not really fitting the patch's purpose? Jan
Re: [PATCH v8 08/17] xen/riscv: introduce atomic.h
On 17.04.2024 12:04, Oleksii Kurochko wrote: > --- /dev/null > +++ b/xen/arch/riscv/include/asm/atomic.h > @@ -0,0 +1,281 @@ > +/* SPDX-License-Identifier: GPL-2.0-only */ > +/* > + * Taken and modified from Linux. > + * > + * The following changes were done: > + * - * atomic##prefix##_*xchg_*(atomic##prefix##_t *v, c_t n) were updated > + * to use__*xchg_generic() > + * - drop casts in write_atomic() as they are unnecessary > + * - drop introduction of WRITE_ONCE() and READ_ONCE(). > + * Xen provides ACCESS_ONCE() > + * - remove zero-length array access in read_atomic() > + * - drop defines similar to pattern > + * #define atomic_add_return_relaxed atomic_add_return_relaxed > + * - move not RISC-V specific functions to asm-generic/atomics-ops.h > + * > + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. > + * Copyright (C) 2012 Regents of the University of California > + * Copyright (C) 2017 SiFive > + * Copyright (C) 2024 Vates SAS > + */ > + > +#ifndef _ASM_RISCV_ATOMIC_H > +#define _ASM_RISCV_ATOMIC_H > + > +#include > + > +#include > +#include > +#include > +#include > + > +void __bad_atomic_size(void); > + > +/* > + * Legacy from Linux kernel. For some reason they wanted to have ordered > + * read/write access. Thereby read* is used instead of read*_cpu() > + */ > +static always_inline void read_atomic_size(const volatile void *p, > + void *res, > + unsigned int size) > +{ > +switch ( size ) > +{ > +case 1: *(uint8_t *)res = readb(p); break; > +case 2: *(uint16_t *)res = readw(p); break; > +case 4: *(uint32_t *)res = readl(p); break; > +#ifndef CONFIG_RISCV_32 > +case 8: *(uint32_t *)res = readq(p); break; > +#endif > +default: __bad_atomic_size(); break; > +} > +} > + > +#define read_atomic(p) ({ \ > +union { typeof(*(p)) val; char c[sizeof(*(p))]; } x_; \ > +read_atomic_size(p, x_.c, sizeof(*(p)));\ > +x_.val; \ > +}) > + > +static always_inline void _write_atomic(volatile void *p, > + unsigned long x, unsigned int size) > +{ > +switch ( size ) > +{ > +case 1: writeb(x, p); break; > +case 2: writew(x, p); break; > +case 4: writel(x, p); break; > +#ifndef CONFIG_RISCV_32 > +case 8: writeq(x, p); break; > +#endif > +default: __bad_atomic_size(); break; > +} > +} > + > +#define write_atomic(p, x) \ > +({ \ > +typeof(*(p)) x_ = (x); \ > +_write_atomic((p), x_, sizeof(*(p))); \ Nit: There are still excess parentheses here. > +x_; \ Why is this? The macro isn't supposed to "return" a value, is it? > +}) > + > +static always_inline void _add_sized(volatile void *p, > + unsigned long x, unsigned int size) > +{ > +switch ( size ) > +{ > +case 1: > +{ > +volatile uint8_t *ptr = (volatile uint8_t *)p; Here and below: Why the casts? Jan
[XEN PATCH v2 1/2] evtchn: Add error status indicators for evtchn_status hypercall
When the evtchn_status hypercall fails, it is not possible to determine the cause of the error, since the library call returns -1 to the tool and not the errno. Because of this, lsevtchn is unable to determine whether to continue event channel enumeration upon an evtchn_status hypercall error. Add error status indicators for the eventchn_status hypercall for lsevtchn to terminate its loop on, and keep other errors as failed hypercalls. Signed-off-by: Matthew Barnes --- xen/common/event_channel.c | 12 +++- xen/include/public/event_channel.h | 2 ++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c index aceee0695f9f..0f11e71c3e6f 100644 --- a/xen/common/event_channel.c +++ b/xen/common/event_channel.c @@ -1030,7 +1030,17 @@ int evtchn_status(evtchn_status_t *status) d = rcu_lock_domain_by_any_id(dom); if ( d == NULL ) -return -ESRCH; +{ +status->status = EVTCHNSTAT_dom_invalid; +return 0; +} + +if ( !port_is_valid(d, port) ) +{ +status->status = EVTCHNSTAT_port_invalid; +rcu_unlock_domain(d); +return 0; +} chn = _evtchn_from_port(d, port); if ( !chn ) diff --git a/xen/include/public/event_channel.h b/xen/include/public/event_channel.h index 0d91a1c4afab..29cbf43945b3 100644 --- a/xen/include/public/event_channel.h +++ b/xen/include/public/event_channel.h @@ -200,6 +200,8 @@ struct evtchn_status { #define EVTCHNSTAT_pirq 3 /* Channel is bound to a phys IRQ line. */ #define EVTCHNSTAT_virq 4 /* Channel is bound to a virtual IRQ line */ #define EVTCHNSTAT_ipi 5 /* Channel is bound to a virtual IPI line */ +#define EVTCHNSTAT_dom_invalid 6 /* Given domain ID is not a valid domain */ +#define EVTCHNSTAT_port_invalid 7 /* Given port is not within valid range */ uint32_t status; uint32_t vcpu; /* VCPU to which this channel is bound. */ union { -- 2.34.1
[XEN PATCH v2 0/2] Enumerate all allocated evtchns in lsevtchn
Currently, lsevtchn aborts its event channel enumeration when it hits its first hypercall error, namely: * When an event channel doesn't exist at the specified port * When the event channel is owned by Xen This results in lsevtchn missing potential relevant event channels with higher port numbers, since lsevtchn cannot determine the cause of hypercall errors. This patch adds error status indicators for the evtchn_status hypercall for when no further event channels will be yielded for higher port numbers, allowing lsevtchn to terminate when all event channels have been enumerated over. Matthew Barnes (2): evtchn: Add error status indicators for evtchn_status hypercall tools/lsevtchn: Use new status identifiers in loop tools/xcutils/lsevtchn.c | 11 ++- xen/common/event_channel.c | 12 +++- xen/include/public/event_channel.h | 2 ++ 3 files changed, 23 insertions(+), 2 deletions(-) -- 2.34.1
[XEN PATCH v2 2/2] tools/lsevtchn: Use new status identifiers in loop
lsevtchn terminates the loop when the hypercall returns an error, even if there are still event channels with higher port numbers to be enumerated over. Continue the loop even on hypercall errors, and use the new status identifiers for the evtchn_status hypercall, namely "port invalid" and "domain invalid", to determine when to break the loop. Signed-off-by: Matthew Barnes --- tools/xcutils/lsevtchn.c | 11 ++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tools/xcutils/lsevtchn.c b/tools/xcutils/lsevtchn.c index d1710613ddc5..4a48620cd72a 100644 --- a/tools/xcutils/lsevtchn.c +++ b/tools/xcutils/lsevtchn.c @@ -24,11 +24,20 @@ int main(int argc, char **argv) status.port = port; rc = xc_evtchn_status(xch, ); if ( rc < 0 ) -break; +continue; if ( status.status == EVTCHNSTAT_closed ) continue; +if ( status.status == EVTCHNSTAT_dom_invalid ) +{ +printf("Domain ID '%d' does not correspond to valid domain.\n", domid); +break; +} + +if ( status.status == EVTCHNSTAT_port_invalid ) +break; + printf("%4d: VCPU %u: ", port, status.vcpu); switch ( status.status ) -- 2.34.1
Re: [PATCH v8 06/17] xen/riscv: introduce cmpxchg.h
On 17.04.2024 12:04, Oleksii Kurochko wrote: > +/* > + * This function doesn't exist, so you'll get a linker error > + * if something tries to do an invalid xchg(). > + */ > +extern void __bad_xchg(volatile void *ptr, int size); > + > +static always_inline unsigned long __xchg(volatile void *ptr, unsigned long > new, int size) > +{ > +unsigned long ret; > + > +switch ( size ) > +{ > +case 1: > +ret = emulate_xchg_1_2((volatile uint8_t *)ptr, new, ".aq", ".aqrl"); > +break; > +case 2: > +ret = emulate_xchg_1_2((volatile uint16_t *)ptr, new, ".aq", > ".aqrl"); > +break; > +case 4: > +_amoswap_generic((volatile uint32_t *)ptr, new, ret, ".w.aqrl"); > +break; > +#ifndef CONFIG_RISCV_32 > +case 8: > +_amoswap_generic((volatile uint64_t *)ptr, new, ret, ".d.aqrl"); > +break; > +#endif > +default: > +__bad_xchg(ptr, size), ret = 0; > +} I see no real reason to use a comma expression here, the more that "break" needs adding anyway. I wonder why here you don't use ... > +/* This function doesn't exist, so you'll get a linker error > + if something tries to do an invalid cmpxchg(). */ > +extern unsigned long __bad_cmpxchg(volatile void *ptr, int size); > + > +/* > + * Atomic compare and exchange. Compare OLD with MEM, if identical, > + * store NEW in MEM. Return the initial value in MEM. Success is > + * indicated by comparing RETURN with OLD. > + */ > +static always_inline unsigned long __cmpxchg(volatile void *ptr, > + unsigned long old, > + unsigned long new, > + int size) > +{ > +unsigned long ret; > + > +switch ( size ) > +{ > +case 1: > +ret = emulate_cmpxchg_1_2((volatile uint8_t *)ptr, old, new, > + ".aq", ".aqrl"); > +break; > +case 2: > +ret = emulate_cmpxchg_1_2((volatile uint16_t *)ptr, old, new, > + ".aq", ".aqrl"); > +break; > +case 4: > +ret = _generic_cmpxchg((volatile uint32_t *)ptr, old, new, > + ".w.aq", ".w.aqrl"); > +break; > +#ifndef CONFIG_32BIT > +case 8: > +ret = _generic_cmpxchg((volatile uint64_t *)ptr, old, new, > + ".d.aq", ".d.aqrl"); > +break; > +#endif > +default: > +return __bad_cmpxchg(ptr, size); ... the approach used here. Also (style nit) the comment on __bad_cmpxchg() is malformed, unlike that ahead of __bad_xchg(). Jan
Re: [PATCH v1] xen/riscv: improve check-extension() macro
On 26.04.2024 17:23, Oleksii Kurochko wrote: > Now, the check-extension() macro has 1 argument instead of 2. > This change helps to reduce redundancy around usage of extensions > name (in the case of the zbb extension, the name was used 3 times). > > To implement this, a new variable was introduced: > -insn > which represents the instruction support that is being checked. > > Additionally, zbb-insn is updated to use $(comma) instead of ",". Which is merely just-in-case, I suppose, but not strictly necessary anymore? > Signed-off-by: Oleksii Kurochko > Suggested-by: Jan Beulich Reviewed-by: Jan Beulich Just as a remark: Tags want to be put in chronological order. Jan
Re: [PATCH] revert "x86/mm: re-implement get_page_light() using an atomic increment"
On 29/04/2024 2:01 pm, Jan Beulich wrote: > revert "x86/mm: re-implement get_page_light() using an atomic increment" > > This reverts commit c40bc0576dcc5acd4d7e22ef628eb4642f568533. > > That change aimed at eliminating an open-coded lock-like construct, > which really isn't all that similar to, in particular, get_page(). The > function always succeeds. Any remaining concern would want taking care > of by placing block_lock_speculation() at the end of the function. > Since the function is called only during page (de)validation, any > possible performance concerns over such extra serialization could > likely be addressed by pre-validating (e.g. via pinning) page tables. > > The fundamental issue with the change being reverted is that it detects > bad state only after already having caused possible corruption. While > the system is going to be halted in such an event, there is a time > window during which the resulting incorrect state could be leveraged by > a clever (in particular: fast enough) attacker. > > Signed-off-by: Jan Beulich Acked-by: Andrew Cooper
[PATCH] revert "x86/mm: re-implement get_page_light() using an atomic increment"
revert "x86/mm: re-implement get_page_light() using an atomic increment" This reverts commit c40bc0576dcc5acd4d7e22ef628eb4642f568533. That change aimed at eliminating an open-coded lock-like construct, which really isn't all that similar to, in particular, get_page(). The function always succeeds. Any remaining concern would want taking care of by placing block_lock_speculation() at the end of the function. Since the function is called only during page (de)validation, any possible performance concerns over such extra serialization could likely be addressed by pre-validating (e.g. via pinning) page tables. The fundamental issue with the change being reverted is that it detects bad state only after already having caused possible corruption. While the system is going to be halted in such an event, there is a time window during which the resulting incorrect state could be leveraged by a clever (in particular: fast enough) attacker. Signed-off-by: Jan Beulich --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -2582,10 +2582,16 @@ bool get_page(struct page_info *page, const struct domain *domain) */ static void get_page_light(struct page_info *page) { -unsigned long old_pgc = arch_fetch_and_add(>count_info, 1); +unsigned long x, nx, y = page->count_info; -BUG_ON(!(old_pgc & PGC_count_mask)); /* Not allocated? */ -BUG_ON(!((old_pgc + 1) & PGC_count_mask)); /* Overflow? */ +do { +x = y; +nx = x + 1; +BUG_ON(!(x & PGC_count_mask)); /* Not allocated? */ +BUG_ON(!(nx & PGC_count_mask)); /* Overflow? */ +y = cmpxchg(>count_info, x, nx); +} +while ( unlikely(y != x) ); } static int validate_page(struct page_info *page, unsigned long type,
Re: [XEN PATCH] automation/eclair_enalysis: amend configuration for some MISRA rules
On 2024-04-29 14:44, Alessandro Zucchelli wrote: Adjust ECLAIR configuration for rules: R21.14, R21.15, R21.16 by taking into account mem* macros defined in the Xen sources as if they were equivalent to the ones in Standard Library. Signed-off-by: Alessandro Zucchelli --- automation/eclair_analysis/ECLAIR/analysis.ecl | 17 + 1 file changed, 17 insertions(+) diff --git a/automation/eclair_analysis/ECLAIR/analysis.ecl b/automation/eclair_analysis/ECLAIR/analysis.ecl index a604582da3..f3b634baba 100644 --- a/automation/eclair_analysis/ECLAIR/analysis.ecl +++ b/automation/eclair_analysis/ECLAIR/analysis.ecl @@ -19,6 +19,23 @@ map_strings("scheduled-analysis",analysis_kind) -enable=B.EXPLAIN +-doc_begin="These configurations serve the purpose of recognizing the 'mem*' macros as +their Standard Library equivalents." + +-config=MC3R1.R21.14,call_select+= +{"macro(^memcmp$)&_arg(1..2, skip(__non_syntactic_paren_cast_stmts, node(string_literal)))", + "any()", violation, "%{__callslct_any_base_fmt()}", {{arg, "%{__callslct_arg_fmt()}"}}} + +-config=MC3R1.R21.15,call_args+= +{"macro(^mem(cmp|move|cpy)$)", {1, 2}, "unqual_pointee_compatible", + "%{__argscmpr_culprit_fmt()}", "%{__argscmpr_evidence_fmt()}"} + +-config=MC3R1.R21.16,call_select+= +{"macro(^memcmp$)&_arg(1..2, skip(__non_syntactic_paren_stmts, type(canonical(__memcmp_pte_types", + "any()", violation, "%{__callslct_any_base_fmt()}", {{arg,"%{__callslct_arg_type_fmt()}"}}} + +-doc_end + -eval_file=toolchain.ecl -eval_file=public_APIs.ecl if(not(scheduled_analysis), Typo in the title: should be automation/eclair_analysis. Sorry. -- Alessandro Zucchelli, B.Sc. Software Engineer, BUGSENG (https://bugseng.com)
[XEN PATCH] automation/eclair_enalysis: amend configuration for some MISRA rules
Adjust ECLAIR configuration for rules: R21.14, R21.15, R21.16 by taking into account mem* macros defined in the Xen sources as if they were equivalent to the ones in Standard Library. Signed-off-by: Alessandro Zucchelli --- automation/eclair_analysis/ECLAIR/analysis.ecl | 17 + 1 file changed, 17 insertions(+) diff --git a/automation/eclair_analysis/ECLAIR/analysis.ecl b/automation/eclair_analysis/ECLAIR/analysis.ecl index a604582da3..f3b634baba 100644 --- a/automation/eclair_analysis/ECLAIR/analysis.ecl +++ b/automation/eclair_analysis/ECLAIR/analysis.ecl @@ -19,6 +19,23 @@ map_strings("scheduled-analysis",analysis_kind) -enable=B.EXPLAIN +-doc_begin="These configurations serve the purpose of recognizing the 'mem*' macros as +their Standard Library equivalents." + +-config=MC3R1.R21.14,call_select+= +{"macro(^memcmp$)&_arg(1..2, skip(__non_syntactic_paren_cast_stmts, node(string_literal)))", + "any()", violation, "%{__callslct_any_base_fmt()}", {{arg, "%{__callslct_arg_fmt()}"}}} + +-config=MC3R1.R21.15,call_args+= +{"macro(^mem(cmp|move|cpy)$)", {1, 2}, "unqual_pointee_compatible", + "%{__argscmpr_culprit_fmt()}", "%{__argscmpr_evidence_fmt()}"} + +-config=MC3R1.R21.16,call_select+= +{"macro(^memcmp$)&_arg(1..2, skip(__non_syntactic_paren_stmts, type(canonical(__memcmp_pte_types", + "any()", violation, "%{__callslct_any_base_fmt()}", {{arg,"%{__callslct_arg_type_fmt()}"}}} + +-doc_end + -eval_file=toolchain.ecl -eval_file=public_APIs.ecl if(not(scheduled_analysis), -- 2.25.1
Re: [XEN PATCH 03/10] automation/eclair_analysis: deviate macro count_args_ for MISRA Rule 20.7
On 2024-04-25 02:28, Stefano Stabellini wrote: On Tue, 23 Apr 2024, Nicola Vetrini wrote: The count_args_ macro violates Rule 20.7, but it can't be made compliant with Rule 20.7 without breaking its functionality. Since it's very unlikely for this macro to be misused, it is deviated. That is OK but can't we use the SAF- framework to do it, given that it is just one macro? If not, this is also OK. It would be more fragile, for no substantial gain No functional change. Signed-off-by: Nicola Vetrini --- automation/eclair_analysis/ECLAIR/deviations.ecl | 6 ++ docs/misra/deviations.rst| 6 ++ 2 files changed, 12 insertions(+) diff --git a/automation/eclair_analysis/ECLAIR/deviations.ecl b/automation/eclair_analysis/ECLAIR/deviations.ecl index d21f112a9b94..c17e2f5a0886 100644 --- a/automation/eclair_analysis/ECLAIR/deviations.ecl +++ b/automation/eclair_analysis/ECLAIR/deviations.ecl @@ -445,6 +445,12 @@ not to parenthesize their arguments." -config=MC3R1.R20.7,reports+={safe, "any_area(any_loc(any_exp(macro(^alternative_(v)?call[0-9]$"} -doc_end +-doc_begin="The argument 'x' of the count_args_ macro can't be parenthesized as +the rule would require, without breaking the functionality of the macro. The uses +of this macro do not lead to developer confusion, and can thus be deviated." +-config=MC3R1.R20.7,reports+={safe, "any_area(any_loc(any_exp(macro(^count_args_$"} +-doc_end + -doc_begin="Uses of variadic macros that have one of their arguments defined as a macro and used within the body for both ordinary parameter expansion and as an operand to the # or ## operators have a behavior that is well-understood and diff --git a/docs/misra/deviations.rst b/docs/misra/deviations.rst index ed0c1e8ed0bf..e228ae2e715f 100644 --- a/docs/misra/deviations.rst +++ b/docs/misra/deviations.rst @@ -371,6 +371,12 @@ Deviations related to MISRA C:2012 Rules: sanity checks in place. - Tagged as `safe` for ECLAIR. + * - R20.7 + - The macro `count_args_` is not compliant with the rule, but is not likely + to incur in the risk of being misused or lead to developer confusion, and + refactoring it to add parentheses breaks its functionality. + - Tagged as `safe` for ECLAIR. + * - R20.12 - Variadic macros that use token pasting often employ the gcc extension `ext_paste_comma`, as detailed in `C-language-toolchain.rst`, which is -- 2.34.1 -- Nicola Vetrini, BSc Software Engineer, BUGSENG srl (https://bugseng.com)
Re: CVE-2024-26908: x86/xen: Add some null pointer checking to smp.c
Hi, I'd like to dispute CVE-2024-26908: the issue fixed by upstream commit 3693bb4465e6e32a204a5b86d3ec7e6b9f7e67c2 can in no way be triggered by an unprivileged user or by a remote attack of the system, as it requires hotplug of (virtual) cpus to the running system. This can be done only by either a host admin or by an admin of the guest which might suffer the out-of-memory situation. Please revoke this CVE. Juergen OpenPGP_0xB0DE9DD628BF132F.asc Description: OpenPGP public key OpenPGP_signature.asc Description: OpenPGP digital signature
Re: [PATCH 3/3] x86/boot: Refactor pvh_load_kernel() to have an initrd_len local
On 26.04.2024 16:01, Andrew Cooper wrote: > The expression get more complicated when ->mod_end isn't being abused as a > size field. Introduce and use a initrd_len local variable. > > No functional change. > > Signed-off-by: Andrew Cooper Reviewed-by: Jan Beulich
Re: [PATCH 2/3] x86/boot: Explain discard_initial_images() and untangle PV initrd handling
On 26.04.2024 16:01, Andrew Cooper wrote: > discard_initial_images() frees the memory backing the boot modules. It is > called by dom0_construct_pv{,h}(), but obtains the module list by global > pointer which is why there is no apparent link with the initrd pointer. > > Generally, module contents are copied into dom0 as it's being built, but the > initrd for PV dom0 might be directly mapped instead of copied. > > dom0_construct_pv() does it's own ad-hoc freeing of the module in the copy > case, and points the global reference at the new copy, then sets the size to > 0. This only functions because init_domheap_pages(x, x) happens to be a nop. > > Delete the ad-hoc freeing, and leave it to discard_initial_images(). This > requires (not) adjusting initd->mod_start in the copy case, and only setting > the size to 0 in the mapping case. > > Alter discard_initial_images() to explicitly check for an ignored module, and > explain what's going on. This is more robust and will allow for fixing other > problems with module handling. > > The later logic in dom0_construct_pv() now cannot use initrd->mod_start, but > that's fine because initrd_mfn is already a duplicate of the information > wanted, and is more consistent with initrd_{pfn,len} used elsewhere. > > Invalidate the initrd pointer with LIST_POISON1 to make it clearer that it > shouldn't be used. > > No practical change in behaviour, but a substantial reduction in the > complexity of how this works. > > Signed-off-by: Andrew Cooper > --- > CC: Jan Beulich > CC: Roger Pau Monné > CC: Stefano Stabellini > CC: Daniel Smith > CC: Christopher Clark > > In other akward questions, why does initial_images_nrpages() account for all > modules when only 1 or 2 are relevant for how we construct dom0 ? > --- > xen/arch/x86/pv/dom0_build.c | 22 +++--- > xen/arch/x86/setup.c | 9 - > 2 files changed, 19 insertions(+), 12 deletions(-) > > diff --git a/xen/arch/x86/pv/dom0_build.c b/xen/arch/x86/pv/dom0_build.c > index d8043fa58a27..64d9984b8308 100644 > --- a/xen/arch/x86/pv/dom0_build.c > +++ b/xen/arch/x86/pv/dom0_build.c > @@ -630,18 +630,20 @@ int __init dom0_construct_pv(struct domain *d, > } > memcpy(page_to_virt(page), mfn_to_virt(initrd->mod_start), > initrd_len); > -mpt_alloc = (paddr_t)initrd->mod_start << PAGE_SHIFT; > -init_domheap_pages(mpt_alloc, > - mpt_alloc + PAGE_ALIGN(initrd_len)); > -initrd->mod_start = initrd_mfn = mfn_x(page_to_mfn(page)); > +initrd_mfn = mfn_x(page_to_mfn(page)); > } > else > { > while ( count-- ) > if ( assign_pages(mfn_to_page(_mfn(mfn++)), 1, d, 0) ) > BUG(); > +/* > + * Mapped rather than copied. Tell discard_initial_images() to > + * ignore it. > + */ > +initrd->mod_end = 0; > } > -initrd->mod_end = 0; > +initrd = LIST_POISON1; /* No longer valid to use. */ > > iommu_memory_setup(d, "initrd", mfn_to_page(_mfn(initrd_mfn)), > PFN_UP(initrd_len), _flags); > @@ -653,12 +655,10 @@ int __init dom0_construct_pv(struct domain *d, > if ( domain_tot_pages(d) < nr_pages ) > printk(" (%lu pages to be allocated)", > nr_pages - domain_tot_pages(d)); > -if ( initrd ) > -{ > -mpt_alloc = (paddr_t)initrd->mod_start << PAGE_SHIFT; > +if ( initrd_len ) > printk("\n Init. ramdisk: %"PRIpaddr"->%"PRIpaddr, > - mpt_alloc, mpt_alloc + initrd_len); > -} > + pfn_to_paddr(initrd_mfn), > + pfn_to_paddr(initrd_mfn) + initrd_len); > > printk("\nVIRTUAL MEMORY ARRANGEMENT:\n"); > printk(" Loaded kernel: %p->%p\n", _p(vkern_start), _p(vkern_end)); Between what this and the following hunk touch there is if ( count < initrd_pfn || count >= initrd_pfn + PFN_UP(initrd_len) ) mfn = pfn++; else mfn = initrd_mfn++; I can't help thinking that this invalidates ... > @@ -881,7 +881,7 @@ int __init dom0_construct_pv(struct domain *d, > if ( pfn >= initrd_pfn ) > { > if ( pfn < initrd_pfn + PFN_UP(initrd_len) ) > -mfn = initrd->mod_start + (pfn - initrd_pfn); > +mfn = initrd_mfn + (pfn - initrd_pfn); > else > mfn -= PFN_UP(initrd_len); > } ... the use of the variable here. > --- a/xen/arch/x86/setup.c > +++ b/xen/arch/x86/setup.c > @@ -294,7 +294,7 @@ unsigned long __init initial_images_nrpages(nodeid_t node) > return nr; > } > > -void __init discard_initial_images(void) > +void __init discard_initial_images(void) /* a.k.a. free multiboot modules */ > { > unsigned int i; > > @@ -302,6 +302,13 @@ void __init
Re: [PATCH 1/3] x86/boot: Explain how moving mod[0] works
On 26.04.2024 16:01, Andrew Cooper wrote: > modules_headroom is a misleading name as it applies strictly to mod[0] only, > and the movement loop is deeply unintuitive and completely undocumented. > > Provide help to whomever needs to look at this code next. > > Signed-off-by: Andrew Cooper Acked-by: Jan Beulich
Re: [PATCH v6 8/8] xen: allow up to 16383 cpus
On 29.04.24 13:04, Julien Grall wrote: Hi Juergen, Sorry for the late reply. On 29/04/2024 11:33, Juergen Gross wrote: On 08.04.24 09:10, Jan Beulich wrote: On 27.03.2024 16:22, Juergen Gross wrote: With lock handling now allowing up to 16384 cpus (spinlocks can handle 65535 cpus, rwlocks can handle 16384 cpus), raise the allowed limit for the number of cpus to be configured to 16383. The new limit is imposed by IOMMU_CMD_BUFFER_MAX_ENTRIES and QINVAL_MAX_ENTRY_NR required to be larger than 2 * CONFIG_NR_CPUS. Signed-off-by: Juergen Gross Acked-by: Jan Beulich I'd prefer this to also gain an Arm ack, though. Any comment from Arm side? Can you clarify what the new limits mean in term of (security) support? Are we now claiming that Xen will work perfectly fine on platforms with up to 16383? If so, I can't comment for x86, but for Arm, I am doubtful that it would work without any (at least performance) issues. AFAIK, this is also an untested configuration. In fact I would be surprised if Xen on Arm was tested with more than a couple of hundreds cores (AFAICT the Ampere CPUs has 192 CPUs). I think we should add a security support limit for the number of physical cpus similar to the memory support limit we already have in place. For x86 I'd suggest 4096 cpus for security support (basically the limit we have with this patch), but I'm open for other suggestions, too. I have no idea about any sensible limits for Arm32/Arm64. Juergen
Re: [PATCH v6 8/8] xen: allow up to 16383 cpus
Hi Juergen, Sorry for the late reply. On 29/04/2024 11:33, Juergen Gross wrote: On 08.04.24 09:10, Jan Beulich wrote: On 27.03.2024 16:22, Juergen Gross wrote: With lock handling now allowing up to 16384 cpus (spinlocks can handle 65535 cpus, rwlocks can handle 16384 cpus), raise the allowed limit for the number of cpus to be configured to 16383. The new limit is imposed by IOMMU_CMD_BUFFER_MAX_ENTRIES and QINVAL_MAX_ENTRY_NR required to be larger than 2 * CONFIG_NR_CPUS. Signed-off-by: Juergen Gross Acked-by: Jan Beulich I'd prefer this to also gain an Arm ack, though. Any comment from Arm side? Can you clarify what the new limits mean in term of (security) support? Are we now claiming that Xen will work perfectly fine on platforms with up to 16383? If so, I can't comment for x86, but for Arm, I am doubtful that it would work without any (at least performance) issues. AFAIK, this is also an untested configuration. In fact I would be surprised if Xen on Arm was tested with more than a couple of hundreds cores (AFAICT the Ampere CPUs has 192 CPUs). Cheers, -- Julien Grall
Re: [PATCH 1/2] tools/{c,o}xenstored: Don't link against libsystemd
On 25.04.24 19:32, Andrew Cooper wrote: libsystemd is a giant dependency for one single function, but in the wake of the xz backdoor, it turns out that even systemd leadership recommend against linking against libsystemd for sd_notify(). Since commit 7b61011e1450 ("tools: make xenstore domain easy configurable") in Xen 4.8, the launch-xenstore script invokes systemd-notify directly, so its not even necessary for the xenstored's to call sd_notify() themselves. You are aware that in the daemon case the call of systemd-notify does not signal readyness of xenstored? It is just called with the "--booted" parameter in order to detect whether systemd is active or not. So in order to just drop the sd_notify() call from xenstored you need to modify the launch-xenstore script, too. Juergen Therefore, just drop the calls to sd_notify() and stop linking against libsystemd. No functional change. Signed-off-by: Andrew Cooper --- CC: Anthony PERARD CC: Juergen Gross CC: Christian Lindig CC: Edwin Török CC: Stefano Stabellini --- tools/ocaml/xenstored/Makefile| 12 +-- tools/ocaml/xenstored/systemd.ml | 15 - tools/ocaml/xenstored/systemd.mli | 16 - tools/ocaml/xenstored/systemd_stubs.c | 47 --- tools/ocaml/xenstored/xenstored.ml| 1 - tools/xenstored/Makefile | 5 --- tools/xenstored/posix.c | 9 - 7 files changed, 1 insertion(+), 104 deletions(-) delete mode 100644 tools/ocaml/xenstored/systemd.ml delete mode 100644 tools/ocaml/xenstored/systemd.mli delete mode 100644 tools/ocaml/xenstored/systemd_stubs.c diff --git a/tools/ocaml/xenstored/Makefile b/tools/ocaml/xenstored/Makefile index e8aaecf2e630..1e4b51cc5432 100644 --- a/tools/ocaml/xenstored/Makefile +++ b/tools/ocaml/xenstored/Makefile @@ -4,8 +4,6 @@ include $(OCAML_TOPLEVEL)/common.make # Include configure output (config.h) CFLAGS += -include $(XEN_ROOT)/tools/config.h -CFLAGS-$(CONFIG_SYSTEMD) += $(SYSTEMD_CFLAGS) -LDFLAGS-$(CONFIG_SYSTEMD) += $(SYSTEMD_LIBS) CFLAGS += $(CFLAGS-y) CFLAGS += $(APPEND_CFLAGS) @@ -25,13 +23,6 @@ poll_OBJS = poll poll_C_OBJS = select_stubs OCAML_LIBRARY = syslog poll -LIBS += systemd.cma systemd.cmxa -systemd_OBJS = systemd -systemd_C_OBJS = systemd_stubs -OCAML_LIBRARY += systemd - -LIBS_systemd += $(LDFLAGS-y) - OBJS = paths \ define \ stdext \ @@ -56,12 +47,11 @@ OBJS = paths \ process \ xenstored -INTF = symbol.cmi trie.cmi syslog.cmi systemd.cmi poll.cmi +INTF = symbol.cmi trie.cmi syslog.cmi poll.cmi XENSTOREDLIBS = \ unix.cmxa \ -ccopt -L -ccopt . syslog.cmxa \ - -ccopt -L -ccopt . systemd.cmxa \ -ccopt -L -ccopt . poll.cmxa \ -ccopt -L -ccopt $(OCAML_TOPLEVEL)/libs/mmap $(OCAML_TOPLEVEL)/libs/mmap/xenmmap.cmxa \ -ccopt -L -ccopt $(OCAML_TOPLEVEL)/libs/eventchn $(OCAML_TOPLEVEL)/libs/eventchn/xeneventchn.cmxa \ diff --git a/tools/ocaml/xenstored/systemd.ml b/tools/ocaml/xenstored/systemd.ml deleted file mode 100644 index 39127f712d72.. --- a/tools/ocaml/xenstored/systemd.ml +++ /dev/null @@ -1,15 +0,0 @@ -(* - * Copyright (C) 2014 Luis R. Rodriguez - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) - -external sd_notify_ready: unit -> unit = "ocaml_sd_notify_ready" diff --git a/tools/ocaml/xenstored/systemd.mli b/tools/ocaml/xenstored/systemd.mli deleted file mode 100644 index 18b9331031f9.. --- a/tools/ocaml/xenstored/systemd.mli +++ /dev/null @@ -1,16 +0,0 @@ -(* - * Copyright (C) 2014 Luis R. Rodriguez - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) - -(** Tells systemd we're ready *) -external sd_notify_ready: unit -> unit = "ocaml_sd_notify_ready" diff --git a/tools/ocaml/xenstored/systemd_stubs.c b/tools/ocaml/xenstored/systemd_stubs.c deleted file mode 100644 index f4c875075abe.. --- a/tools/ocaml/xenstored/systemd_stubs.c +++ /dev/null @@ -1,47 +0,0 @@ -/*
[xen-unstable test] 185860: tolerable FAIL
flight 185860 xen-unstable real [real] http://logs.test-lab.xenproject.org/osstest/logs/185860/ Failures :-/ but no regressions. Tests which did not succeed, but are not blocking: test-amd64-amd64-xl-qemut-win7-amd64 19 guest-stopfail like 185839 test-amd64-amd64-xl-qemuu-ws16-amd64 19 guest-stopfail like 185839 test-armhf-armhf-libvirt 16 saverestore-support-checkfail like 185839 test-amd64-amd64-xl-qemuu-win7-amd64 19 guest-stopfail like 185839 test-amd64-amd64-xl-qemut-ws16-amd64 19 guest-stopfail like 185839 test-amd64-amd64-qemuu-nested-amd 20 debian-hvm-install/l1/l2 fail like 185839 test-amd64-amd64-libvirt-xsm 15 migrate-support-checkfail never pass test-amd64-amd64-libvirt 15 migrate-support-checkfail never pass test-arm64-arm64-xl-xsm 15 migrate-support-checkfail never pass test-arm64-arm64-xl-xsm 16 saverestore-support-checkfail never pass test-arm64-arm64-libvirt-xsm 15 migrate-support-checkfail never pass test-arm64-arm64-libvirt-xsm 16 saverestore-support-checkfail never pass test-arm64-arm64-xl 15 migrate-support-checkfail never pass test-arm64-arm64-xl 16 saverestore-support-checkfail never pass test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check fail never pass test-arm64-arm64-xl-credit2 15 migrate-support-checkfail never pass test-arm64-arm64-xl-credit2 16 saverestore-support-checkfail never pass test-armhf-armhf-xl-multivcpu 15 migrate-support-checkfail never pass test-armhf-armhf-xl-multivcpu 16 saverestore-support-checkfail never pass test-arm64-arm64-xl-thunderx 15 migrate-support-checkfail never pass test-arm64-arm64-xl-thunderx 16 saverestore-support-checkfail never pass test-amd64-amd64-libvirt-qcow2 14 migrate-support-checkfail never pass test-armhf-armhf-xl-credit2 15 migrate-support-checkfail never pass test-armhf-armhf-xl-credit2 16 saverestore-support-checkfail never pass test-amd64-amd64-libvirt-raw 14 migrate-support-checkfail never pass test-armhf-armhf-xl-arndale 15 migrate-support-checkfail never pass test-armhf-armhf-xl-arndale 16 saverestore-support-checkfail never pass test-armhf-armhf-xl 15 migrate-support-checkfail never pass test-armhf-armhf-xl 16 saverestore-support-checkfail never pass test-amd64-amd64-libvirt-vhd 14 migrate-support-checkfail never pass test-arm64-arm64-libvirt-raw 14 migrate-support-checkfail never pass test-arm64-arm64-libvirt-raw 15 saverestore-support-checkfail never pass test-arm64-arm64-xl-credit1 15 migrate-support-checkfail never pass test-arm64-arm64-xl-credit1 16 saverestore-support-checkfail never pass test-arm64-arm64-xl-vhd 14 migrate-support-checkfail never pass test-arm64-arm64-xl-vhd 15 saverestore-support-checkfail never pass test-armhf-armhf-libvirt 15 migrate-support-checkfail never pass test-armhf-armhf-xl-qcow214 migrate-support-checkfail never pass test-armhf-armhf-xl-qcow215 saverestore-support-checkfail never pass test-armhf-armhf-xl-credit1 15 migrate-support-checkfail never pass test-armhf-armhf-xl-credit1 16 saverestore-support-checkfail never pass test-armhf-armhf-libvirt-vhd 14 migrate-support-checkfail never pass test-armhf-armhf-libvirt-vhd 15 saverestore-support-checkfail never pass test-armhf-armhf-xl-rtds 15 migrate-support-checkfail never pass test-armhf-armhf-xl-rtds 16 saverestore-support-checkfail never pass test-armhf-armhf-xl-raw 14 migrate-support-checkfail never pass test-armhf-armhf-xl-raw 15 saverestore-support-checkfail never pass version targeted for testing: xen be5b08dd6ea6ef0f01caf537bdae125fa66a2230 baseline version: xen be5b08dd6ea6ef0f01caf537bdae125fa66a2230 Last test of basis 185860 2024-04-29 01:53:41 Z0 days Testing same since (not found) 0 attempts jobs: build-amd64-xsm pass build-arm64-xsm pass build-i386-xsm pass build-amd64-xtf pass build-amd64 pass build-arm64 pass build-armhf pass build-i386 pass build-amd64-libvirt pass build-arm64-libvirt pass
Re: [PATCH v6 8/8] xen: allow up to 16383 cpus
On 08.04.24 09:10, Jan Beulich wrote: On 27.03.2024 16:22, Juergen Gross wrote: With lock handling now allowing up to 16384 cpus (spinlocks can handle 65535 cpus, rwlocks can handle 16384 cpus), raise the allowed limit for the number of cpus to be configured to 16383. The new limit is imposed by IOMMU_CMD_BUFFER_MAX_ENTRIES and QINVAL_MAX_ENTRY_NR required to be larger than 2 * CONFIG_NR_CPUS. Signed-off-by: Juergen Gross Acked-by: Jan Beulich I'd prefer this to also gain an Arm ack, though. Any comment from Arm side? Juergen Jan --- a/xen/arch/Kconfig +++ b/xen/arch/Kconfig @@ -6,7 +6,7 @@ config PHYS_ADDR_T_32 config NR_CPUS int "Maximum number of CPUs" - range 1 4095 + range 1 16383 default "256" if X86 default "8" if ARM && RCAR3 default "4" if ARM && QEMU OpenPGP_0xB0DE9DD628BF132F.asc Description: OpenPGP public key OpenPGP_signature.asc Description: OpenPGP digital signature
Re: [PATCH v2 1/7] x86/p2m: Add braces for better code clarity
On 29.04.2024 12:26, Petr Beneš wrote: > On Mon, Apr 29, 2024 at 9:07 AM Jan Beulich wrote: >> >> On 28.04.2024 18:52, Petr Beneš wrote: >>> From: Petr Beneš >>> >>> No functional change. >>> >>> Signed-off-by: Petr Beneš >> >> Where did Stefano's R-b go? > > Oh no, I missed that one. Should I do v3? Not just for this, I'd say. Just don't forget it again, if the patch needs re-posting. Jan
Re: [PATCH v2 1/7] x86/p2m: Add braces for better code clarity
On Mon, Apr 29, 2024 at 9:07 AM Jan Beulich wrote: > > On 28.04.2024 18:52, Petr Beneš wrote: > > From: Petr Beneš > > > > No functional change. > > > > Signed-off-by: Petr Beneš > > Where did Stefano's R-b go? > > Jan Oh no, I missed that one. Should I do v3?
Re: [XEN PATCH v3 5/5] xen/arm: ffa: support notification
Hi Julien, On Fri, Apr 26, 2024 at 7:58 PM Julien Grall wrote: > > Hi Jens, > > On 26/04/2024 09:47, Jens Wiklander wrote: > > +static void notif_irq_enable(void *info) > > +{ > > +struct notif_irq_info *irq_info = info; > > + > > +irq_info->ret = setup_irq(irq_info->irq, 0, irq_info->action); > In v2, you were using request_irq(). But now you seem to be open-coding > it. Can you explain why? It's because request_irq() does a memory allocation that can't be done in interrupt context. > > > +if ( irq_info->ret ) > > +printk(XENLOG_ERR "ffa: request_irq irq %u failed: error %d\n", > > + irq_info->irq, irq_info->ret); > > +} > > + > > +void ffa_notif_init(void) > > +{ > > +const struct arm_smccc_1_2_regs arg = { > > +.a0 = FFA_FEATURES, > > +.a1 = FFA_FEATURE_SCHEDULE_RECV_INTR, > > +}; > > +struct notif_irq_info irq_info = { }; > > +struct arm_smccc_1_2_regs resp; > > +unsigned int cpu; > > + > > +arm_smccc_1_2_smc(, ); > > +if ( resp.a0 != FFA_SUCCESS_32 ) > > +return; > > + > > +irq_info.irq = resp.a2; > > +if ( irq_info.irq < GIC_SGI_STATIC_MAX || irq_info.irq >= NR_GIC_SGI ) > > +{ > > +printk(XENLOG_ERR "ffa: notification initialization failed: > > conflicting SGI %u\n", > > + irq_info.irq); > > +return; > > +} > > + > > +/* > > + * SGIs are per-CPU so we must enable the IRQ on each CPU. We use an > > + * IPI to call notif_irq_enable() on each CPU including the current > > + * CPU. The struct irqaction is preallocated since we can't allocate > > + * memory while in interrupt context. > > + */ > > +for_each_online_cpu(cpu) > Even though we currently don't support CPU hotplug, you want to add a > CPU Notifier to also register the IRQ when a CPU is onlined > ffa_notif_init(). > > For an example, see time.c. We may also want to consider to enable TEE > in presmp_initcalls() so we don't need to have a for_each_online_cpu(). I was considering that too. I'll update the code. Thanks, Jens
Re: [XEN PATCH v3 5/5] xen/arm: ffa: support notification
Hi Bertrand, On Mon, Apr 29, 2024 at 9:20 AM Bertrand Marquis wrote: [...] > >> +static void notif_irq_handler(int irq, void *data) > >> +{ > >> +const struct arm_smccc_1_2_regs arg = { > >> +.a0 = FFA_NOTIFICATION_INFO_GET_64, > >> +}; > >> +struct arm_smccc_1_2_regs resp; > >> +unsigned int id_pos; > >> +unsigned int list_count; > >> +uint64_t ids_count; > >> +unsigned int n; > >> +int32_t res; > >> + > >> +do { > >> +arm_smccc_1_2_smc(, ); > >> +res = ffa_get_ret_code(); > >> +if ( res ) > >> +{ > >> +if ( res != FFA_RET_NO_DATA ) > >> +printk(XENLOG_ERR "ffa: notification info get failed: > >> error %d\n", > >> + res); > >> +return; > >> +} > >> + > >> +ids_count = resp.a2 >> FFA_NOTIF_INFO_GET_ID_LIST_SHIFT; > >> +list_count = ( resp.a2 >> FFA_NOTIF_INFO_GET_ID_COUNT_SHIFT ) & > >> + FFA_NOTIF_INFO_GET_ID_COUNT_MASK; > >> + > >> +id_pos = 0; > >> +for ( n = 0; n < list_count; n++ ) > >> +{ > >> +unsigned int count = ((ids_count >> 2 * n) & 0x3) + 1; > >> +struct domain *d; > >> + > >> +d = ffa_get_domain_by_vm_id(get_id_from_resp(, id_pos)); > > > > Thinking a bit more about the question from Bertrand about get_domain_id() > > vs rcu_lock_domain_by_id(). I am actually not sure whether either are ok > > here. > > > > If I am not mistaken, d->arch.tee will be freed as part of the domain > > teardown process. This means you can have the following scenario: > > > > CPU0: ffa_get_domain_by_vm_id() (return the domain as it is alive) > > > > CPU1: call domain_kill() > > CPU1: teardown is called, free d->arch.tee (the pointer is not set to NULL) > > > > d->arch.tee is now a dangling pointer > > > > CPU0: access d->arch.tee > > > > This implies you may need to gain a global lock (I don't have a better idea > > so far) to protect the IRQ handler against domains teardown. > > > > Did I miss anything? > > I think you are right which is why I was thinking to use > rcu_lock_live_remote_domain_by_id to only get a reference > to the domain if it is not dying. > > From the comment in sched.h: > /* > * rcu_lock_domain_by_id() is more efficient than get_domain_by_id(). > * This is the preferred function if the returned domain reference > * is short lived, but it cannot be used if the domain reference needs > * to be kept beyond the current scope (e.g., across a softirq). > * The returned domain reference must be discarded using rcu_unlock_domain(). > */ > > Now the question of short lived should be challenged but I do not think we can > consider the current code as "long lived". > > It would be a good idea to start a mailing list thread discussion with other > maintainers on the subject to confirm. > @Jens: as i will be off for some time, would you mind doing it ? Sure, first I'll send out a new patch set with the current comments addressed. I'll update to use rcu_lock_live_remote_domain_by_id() both because it makes more sense than the current code, and also to have a good base for the discussion. Thanks, Jens
Re: [PATCH] x86/cpu-policy: Annotate the accumulated features
On 29/04/2024 8:49 am, Jan Beulich wrote: > On 26.04.2024 18:08, Andrew Cooper wrote: >> Some features need accumulating rather than intersecting to make migration >> safe. Introduce the new '|' attribute for this purpose. >> >> Right now, it's only used by the Xapi toolstack, but it will be used by >> xl/libxl when the full policy-object work is complete, and until then it's >> still a useful hint for hand-crafted cpuid= lines in vm.cfg files. >> >> Signed-off-by: Andrew Cooper > Reviewed-by: Jan Beulich Thanks. > The one feature you don't annotate such that I'm not entirely sure about is > NO_FPU_SEL: On one hand it tells code not to rely on / use the selector > fields in FPU state. It's sadly far more complicated than this. This feature, and it's AMD partner RSTR_FP_ERR_PTRS, where introduced to stop windows BSOD-ing under virt, and came with an accidental breakage of x86emul/DoSBox/etc which Intel and AMD have declined to fix. If you recall, prior to these features, the hypervisor needs to divine the operand size of Window's {F,}X{SAVE,RESTORE} instructions, as it blindly does a memcmp() across the region to confirm that the interrupt handler didn't corrupt any state. Both features are discontinuities across which is is not safe to migrate. Advertising "reliably store as 0" on older systems will still cause windows to BSOD on occasion, whereas not advertising it on newer systems will suggest that legacy emulators will work, when they don't. I don't have any good idea of how to make this work nicely, other than having some admin booleans in the xl.cfg file saying "I certify I'm not using a legacy emulator in my VM" to override the safety check. Other discontinuities I'm aware of are the introduction of DAZ (changing MXCSR_MASK), and any migration which changes LBR_FORMAT. ~Andrew
Re: [XEN PATCH v3 5/5] xen/arm: ffa: support notification
Hi Julien, On Fri, Apr 26, 2024 at 9:07 PM Julien Grall wrote: > > Hi Jens, > > On 26/04/2024 09:47, Jens Wiklander wrote: > > diff --git a/xen/arch/arm/irq.c b/xen/arch/arm/irq.c > > index d7306aa64d50..5224898265a5 100644 > > --- a/xen/arch/arm/irq.c > > +++ b/xen/arch/arm/irq.c > > @@ -155,7 +155,7 @@ void __init init_IRQ(void) > > { > > /* SGIs are always edge-triggered */ > > if ( irq < NR_GIC_SGI ) > > -local_irqs_type[irq] = DT_IRQ_TYPE_EDGE_RISING; > > +local_irqs_type[irq] = IRQ_TYPE_EDGE_RISING; > > This changes seems unrelated to this patch. This wants to be separate > (if you actually intended to change it). I'm sorry, my bad. I meant for this to go into "xen/arm: allow dynamically assigned SGI handlers". > > > else > > local_irqs_type[irq] = IRQ_TYPE_INVALID; > > } > > diff --git a/xen/arch/arm/tee/Makefile b/xen/arch/arm/tee/Makefile > > index f0112a2f922d..7c0f46f7f446 100644 > > --- a/xen/arch/arm/tee/Makefile > > +++ b/xen/arch/arm/tee/Makefile > > @@ -2,5 +2,6 @@ obj-$(CONFIG_FFA) += ffa.o > > obj-$(CONFIG_FFA) += ffa_shm.o > > obj-$(CONFIG_FFA) += ffa_partinfo.o > > obj-$(CONFIG_FFA) += ffa_rxtx.o > > +obj-$(CONFIG_FFA) += ffa_notif.o > > obj-y += tee.o > > obj-$(CONFIG_OPTEE) += optee.o > > diff --git a/xen/arch/arm/tee/ffa.c b/xen/arch/arm/tee/ffa.c > > index 5209612963e1..912e905a27bd 100644 > > --- a/xen/arch/arm/tee/ffa.c > > +++ b/xen/arch/arm/tee/ffa.c > > @@ -39,6 +39,9 @@ > >* - at most 32 shared memory regions per guest > >* o FFA_MSG_SEND_DIRECT_REQ: > >* - only supported from a VM to an SP > > + * o FFA_NOTIFICATION_*: > > + * - only supports global notifications, that is, per vCPU notifications > > + * are not supported > >* > >* There are some large locked sections with ffa_tx_buffer_lock and > >* ffa_rx_buffer_lock. Especially the ffa_tx_buffer_lock spinlock used > > @@ -194,6 +197,8 @@ out: > > > > static void handle_features(struct cpu_user_regs *regs) > > { > > +struct domain *d = current->domain; > > +struct ffa_ctx *ctx = d->arch.tee; > > uint32_t a1 = get_user_reg(regs, 1); > > unsigned int n; > > > > @@ -240,6 +245,30 @@ static void handle_features(struct cpu_user_regs *regs) > > BUILD_BUG_ON(PAGE_SIZE != FFA_PAGE_SIZE); > > ffa_set_regs_success(regs, 0, 0); > > break; > > +case FFA_FEATURE_NOTIF_PEND_INTR: > > +if ( ctx->notif.enabled ) > > +ffa_set_regs_success(regs, GUEST_FFA_NOTIF_PEND_INTR_ID, 0); > > +else > > +ffa_set_regs_error(regs, FFA_RET_NOT_SUPPORTED); > > +break; > > +case FFA_FEATURE_SCHEDULE_RECV_INTR: > > +if ( ctx->notif.enabled ) > > +ffa_set_regs_success(regs, GUEST_FFA_SCHEDULE_RECV_INTR_ID, 0); > > +else > > +ffa_set_regs_error(regs, FFA_RET_NOT_SUPPORTED); > > +break; > > + > > +case FFA_NOTIFICATION_BIND: > > +case FFA_NOTIFICATION_UNBIND: > > +case FFA_NOTIFICATION_GET: > > +case FFA_NOTIFICATION_SET: > > +case FFA_NOTIFICATION_INFO_GET_32: > > +case FFA_NOTIFICATION_INFO_GET_64: > > +if ( ctx->notif.enabled ) > > +ffa_set_regs_success(regs, 0, 0); > > +else > > +ffa_set_regs_error(regs, FFA_RET_NOT_SUPPORTED); > > +break; > > default: > > ffa_set_regs_error(regs, FFA_RET_NOT_SUPPORTED); > > break; > > @@ -305,6 +334,22 @@ static bool ffa_handle_call(struct cpu_user_regs *regs) > >get_user_reg(regs, > > 1)), > > get_user_reg(regs, 3)); > > break; > > +case FFA_NOTIFICATION_BIND: > > +e = ffa_handle_notification_bind(regs); > > +break; > > +case FFA_NOTIFICATION_UNBIND: > > +e = ffa_handle_notification_unbind(regs); > > +break; > > +case FFA_NOTIFICATION_INFO_GET_32: > > +case FFA_NOTIFICATION_INFO_GET_64: > > +ffa_handle_notification_info_get(regs); > > +return true; > > +case FFA_NOTIFICATION_GET: > > +ffa_handle_notification_get(regs); > > +return true; > > +case FFA_NOTIFICATION_SET: > > +e = ffa_handle_notification_set(regs); > > +break; > > > > default: > > gprintk(XENLOG_ERR, "ffa: unhandled fid 0x%x\n", fid); > > @@ -322,6 +367,7 @@ static bool ffa_handle_call(struct cpu_user_regs *regs) > > static int ffa_domain_init(struct domain *d) > > { > > struct ffa_ctx *ctx; > > +int ret; > > > > if ( !ffa_version ) > > return -ENODEV; > > @@ -345,10 +391,11 @@ static int ffa_domain_init(struct domain *d) > >* error, so no need for cleanup in this function. > >*/ > > > > -if ( !ffa_partinfo_domain_init(d) ) > > -return -EIO; > > +ret = ffa_partinfo_domain_init(d); > > +if ( ret ) >
Re: [PATCH v5 1/7] x86/msi: passthrough all MSI-X vector ctrl writes to device model
On Wed, Mar 13, 2024 at 04:16:06PM +0100, Marek Marczykowski-Górecki wrote: > QEMU needs to know whether clearing maskbit of a vector is really > clearing, or was already cleared before. Currently Xen sends only > clearing that bit to the device model, but not setting it, so QEMU > cannot detect it. Because of that, QEMU is working this around by > checking via /dev/mem, but that isn't the proper approach. > > Give all necessary information to QEMU by passing all ctrl writes, > including masking a vector. Advertise the new behavior via > XENVER_get_features, so QEMU can know it doesn't need to access /dev/mem > anymore. > > While this commit doesn't move the whole maskbit handling to QEMU (as > discussed on xen-devel as one of the possibilities), it is a necessary > first step anyway. Including telling QEMU it will get all the required > information to do so. The actual implementation would need to include: > - a hypercall for QEMU to control just maskbit (without (re)binding the >interrupt again > - a methor for QEMU to tell Xen it will actually do the work ^ method > Those are not part of this series. > > Signed-off-by: Marek Marczykowski-Górecki > --- > I did not added any control to enable/disable this new behavior (as > Roger have suggested for possible non-QEMU ioreqs). I don't see how the > new behavior could be problematic for some existing ioreq server (they > already received writes to those addresses, just not all of them), > but if that's really necessary, I can probably add a command line option > to restore previous behavior system-wide. That's fine I guess, as you say such ioreq servers should already know how to handle the ranges, and if anything the current behavior of device models not receiving all accesses is likely the bogus (or unexpected at least). Acked-by: Roger Pau Monné Thanks, Roger.
Re: [PATCH] x86/cpu-policy: Annotate the accumulated features
On 26.04.2024 18:08, Andrew Cooper wrote: > Some features need accumulating rather than intersecting to make migration > safe. Introduce the new '|' attribute for this purpose. > > Right now, it's only used by the Xapi toolstack, but it will be used by > xl/libxl when the full policy-object work is complete, and until then it's > still a useful hint for hand-crafted cpuid= lines in vm.cfg files. > > Signed-off-by: Andrew Cooper Reviewed-by: Jan Beulich The one feature you don't annotate such that I'm not entirely sure about is NO_FPU_SEL: On one hand it tells code not to rely on / use the selector fields in FPU state. That would be a reason to mark it here. Otoh the specific behavior of storing zero of course won't occur on older hardware, being an argument for having things the way you do. Perhaps a sentence or two could be added to the description? Jan
Re: [XEN PATCH v3 5/5] xen/arm: ffa: support notification
Hi Julien, > On 26 Apr 2024, at 21:07, Julien Grall wrote: > > Hi Jens, > > On 26/04/2024 09:47, Jens Wiklander wrote: >> diff --git a/xen/arch/arm/irq.c b/xen/arch/arm/irq.c >> index d7306aa64d50..5224898265a5 100644 >> --- a/xen/arch/arm/irq.c >> +++ b/xen/arch/arm/irq.c >> @@ -155,7 +155,7 @@ void __init init_IRQ(void) >> { >> /* SGIs are always edge-triggered */ >> if ( irq < NR_GIC_SGI ) >> -local_irqs_type[irq] = DT_IRQ_TYPE_EDGE_RISING; >> +local_irqs_type[irq] = IRQ_TYPE_EDGE_RISING; > > This changes seems unrelated to this patch. This wants to be separate (if you > actually intended to change it). > >> else >> local_irqs_type[irq] = IRQ_TYPE_INVALID; >> } >> diff --git a/xen/arch/arm/tee/Makefile b/xen/arch/arm/tee/Makefile >> index f0112a2f922d..7c0f46f7f446 100644 >> --- a/xen/arch/arm/tee/Makefile >> +++ b/xen/arch/arm/tee/Makefile >> @@ -2,5 +2,6 @@ obj-$(CONFIG_FFA) += ffa.o >> obj-$(CONFIG_FFA) += ffa_shm.o >> obj-$(CONFIG_FFA) += ffa_partinfo.o >> obj-$(CONFIG_FFA) += ffa_rxtx.o >> +obj-$(CONFIG_FFA) += ffa_notif.o >> obj-y += tee.o >> obj-$(CONFIG_OPTEE) += optee.o >> diff --git a/xen/arch/arm/tee/ffa.c b/xen/arch/arm/tee/ffa.c >> index 5209612963e1..912e905a27bd 100644 >> --- a/xen/arch/arm/tee/ffa.c >> +++ b/xen/arch/arm/tee/ffa.c >> @@ -39,6 +39,9 @@ >> * - at most 32 shared memory regions per guest >> * o FFA_MSG_SEND_DIRECT_REQ: >> * - only supported from a VM to an SP >> + * o FFA_NOTIFICATION_*: >> + * - only supports global notifications, that is, per vCPU notifications >> + * are not supported >> * >> * There are some large locked sections with ffa_tx_buffer_lock and >> * ffa_rx_buffer_lock. Especially the ffa_tx_buffer_lock spinlock used >> @@ -194,6 +197,8 @@ out: >>static void handle_features(struct cpu_user_regs *regs) >> { >> +struct domain *d = current->domain; >> +struct ffa_ctx *ctx = d->arch.tee; >> uint32_t a1 = get_user_reg(regs, 1); >> unsigned int n; >> @@ -240,6 +245,30 @@ static void handle_features(struct cpu_user_regs *regs) >> BUILD_BUG_ON(PAGE_SIZE != FFA_PAGE_SIZE); >> ffa_set_regs_success(regs, 0, 0); >> break; >> +case FFA_FEATURE_NOTIF_PEND_INTR: >> +if ( ctx->notif.enabled ) >> +ffa_set_regs_success(regs, GUEST_FFA_NOTIF_PEND_INTR_ID, 0); >> +else >> +ffa_set_regs_error(regs, FFA_RET_NOT_SUPPORTED); >> +break; >> +case FFA_FEATURE_SCHEDULE_RECV_INTR: >> +if ( ctx->notif.enabled ) >> +ffa_set_regs_success(regs, GUEST_FFA_SCHEDULE_RECV_INTR_ID, 0); >> +else >> +ffa_set_regs_error(regs, FFA_RET_NOT_SUPPORTED); >> +break; >> + >> +case FFA_NOTIFICATION_BIND: >> +case FFA_NOTIFICATION_UNBIND: >> +case FFA_NOTIFICATION_GET: >> +case FFA_NOTIFICATION_SET: >> +case FFA_NOTIFICATION_INFO_GET_32: >> +case FFA_NOTIFICATION_INFO_GET_64: >> +if ( ctx->notif.enabled ) >> +ffa_set_regs_success(regs, 0, 0); >> +else >> +ffa_set_regs_error(regs, FFA_RET_NOT_SUPPORTED); >> +break; >> default: >> ffa_set_regs_error(regs, FFA_RET_NOT_SUPPORTED); >> break; >> @@ -305,6 +334,22 @@ static bool ffa_handle_call(struct cpu_user_regs *regs) >> get_user_reg(regs, 1)), >> get_user_reg(regs, 3)); >> break; >> +case FFA_NOTIFICATION_BIND: >> +e = ffa_handle_notification_bind(regs); >> +break; >> +case FFA_NOTIFICATION_UNBIND: >> +e = ffa_handle_notification_unbind(regs); >> +break; >> +case FFA_NOTIFICATION_INFO_GET_32: >> +case FFA_NOTIFICATION_INFO_GET_64: >> +ffa_handle_notification_info_get(regs); >> +return true; >> +case FFA_NOTIFICATION_GET: >> +ffa_handle_notification_get(regs); >> +return true; >> +case FFA_NOTIFICATION_SET: >> +e = ffa_handle_notification_set(regs); >> +break; >>default: >> gprintk(XENLOG_ERR, "ffa: unhandled fid 0x%x\n", fid); >> @@ -322,6 +367,7 @@ static bool ffa_handle_call(struct cpu_user_regs *regs) >> static int ffa_domain_init(struct domain *d) >> { >> struct ffa_ctx *ctx; >> +int ret; >>if ( !ffa_version ) >> return -ENODEV; >> @@ -345,10 +391,11 @@ static int ffa_domain_init(struct domain *d) >> * error, so no need for cleanup in this function. >> */ >> -if ( !ffa_partinfo_domain_init(d) ) >> -return -EIO; >> +ret = ffa_partinfo_domain_init(d); >> +if ( ret ) >> +return ret; >> -return 0; >> +return ffa_notif_domain_init(d); >> } >>static void ffa_domain_teardown_continue(struct ffa_ctx *ctx, bool >> first_time) >> @@ -423,6 +470,7 @@ static int ffa_domain_teardown(struct domain *d) >> return
Re: MISRA and -Wextra-semi
On 26.04.2024 13:33, Andrew Cooper wrote: > Hi, > > Based on a call a long while back, I experimented with -Wextra-semi. > This is what lead to 8e36c668ca107 "xen: Drop superfluous semi-colons". > > However, there are a number of problems with getting this working > fully. First, we need workarounds like this: > > diff --git a/xen/include/xen/config.h b/xen/include/xen/config.h > index d888b2314daf..12e99c6dded4 100644 > --- a/xen/include/xen/config.h > +++ b/xen/include/xen/config.h > @@ -26,7 +26,7 @@ > > #include > > -#define EXPORT_SYMBOL(var) > +#define EXPORT_SYMBOL(var) typedef int var##_ignore_t For this specifically, could we perhaps finally get rid of most (all?) EXPORT_SYMBOL()? If not all, then at least as far as permitting the stub #define to be moved to linux-compat.h? Jan
Re: [PATCH v2 1/7] x86/p2m: Add braces for better code clarity
On 28.04.2024 18:52, Petr Beneš wrote: > From: Petr Beneš > > No functional change. > > Signed-off-by: Petr Beneš Where did Stefano's R-b go? Jan
Re: [PATCH 07/15] xen/overlay: Enable device tree overlay assignment to running domains
On 29.04.2024 05:36, Henry Wang wrote: > Hi Jan, Julien, Stefano, > > On 4/24/2024 2:05 PM, Jan Beulich wrote: >> On 24.04.2024 05:34, Henry Wang wrote: >>> --- a/xen/include/public/sysctl.h >>> +++ b/xen/include/public/sysctl.h >>> @@ -1197,7 +1197,9 @@ struct xen_sysctl_dt_overlay { >>> #define XEN_SYSCTL_DT_OVERLAY_ADD 1 >>> #define XEN_SYSCTL_DT_OVERLAY_REMOVE2 >>> uint8_t overlay_op; /* IN: Add or remove. */ >>> -uint8_t pad[3]; /* IN: Must be zero. */ >>> +bool domain_mapping;/* IN: True of False. */ >>> +uint8_t pad[2]; /* IN: Must be zero. */ >>> +uint32_t domain_id; >>> }; >> If you merely re-purposed padding fields, all would be fine without >> bumping the interface version. Yet you don't, albeit for an unclear >> reason: Why uint32_t rather than domid_t? And on top of that - why a >> separate boolean when you could use e.g. DOMID_INVALID to indicate >> "no domain mapping"? > > I think both of your suggestion make great sense. I will follow the > suggestion in v2. > >> That said - anything taking a domain ID is certainly suspicious in a >> sysctl. Judging from the description you really mean this to be a >> domctl. Anything else will require extra justification. > > I also think a domctl is better. I had a look at the history of the > already merged series, it looks like in the first version of merged part > 1 [1], the hypercall was implemented as the domctl in the beginning but > later in v2 changed to sysctl. I think this makes sense as the scope of > that time is just to make Xen aware of the device tree node via Xen > device tree. > > However this is now a problem for the current part where the scope (and > the end goal) is extended to assign the added device to Linux Dom0/DomU > via device tree overlays. I am not sure which way is better, should we > repurposing the sysctl to domctl or maybe add another domctl (I am > worrying about the duplication because basically we need the same sysctl > functionality but now with a domid in it)? What do you think? I'm taking it that what is a sysctl right now legitimately is. Therefore folding both into domctl would at least be bending the rules of what should be sysctl and what domctl. It would need clarifying what (fake) domain such a (folded) domctl ought to operate on for the case that's currently a sysctl. That then may (or may not) be justification for such folding. Jan > @Stefano: Since I am not 100% if I understand the whole story behind > this feature, would you mind checking if I am providing correct > information above and sharing your opinions on this? Thank you very much! > > [1] > https://lore.kernel.org/xen-devel/13240b69-f7bb-6a64-b89c-b7c2cbb7e...@xen.org/ > > Kind regards, > Henry > >> Jan >
Re: [PATCH v5 3/7] x86/hvm: Allow access to registers on the same page as MSI-X table
On 26.04.2024 17:26, Marek Marczykowski-Górecki wrote: > On Thu, Apr 25, 2024 at 01:15:34PM +0200, Jan Beulich wrote: >> On 13.03.2024 16:16, Marek Marczykowski-Górecki wrote: >>> +hwaddr = fix_to_virt(fixmap_idx) + PAGE_OFFSET(address); >>> + >>> +switch ( len ) >>> +{ >>> +case 1: >>> +*pval = readb(hwaddr); >>> +break; >>> + >>> +case 2: >>> +*pval = readw(hwaddr); >>> +break; >>> + >>> +case 4: >>> +*pval = readl(hwaddr); >>> +break; >>> + >>> +case 8: >>> +*pval = readq(hwaddr); >>> +break; >>> + >>> +default: >>> +ASSERT_UNREACHABLE(); >> >> Misra demands "break;" to be here for release builds. In fact I wonder >> why "*pval = ~0UL;" isn't put here, too. Question of course is whether >> in such a case a true error indicator wouldn't be yet better. > > I don't think it possible for the msixtbl_read() (that calls > adjacent_read()) to be called with other sizes. I agree, but scanners won't know. Jan > The default label is here exactly to make it obvious for the reader. >
Re: [PATCH] xen/spinlock: use correct pointer
On 26.04.2024 16:33, Stewart Hildebrand wrote: > On 4/26/24 02:31, Jan Beulich wrote: >> On 25.04.2024 22:45, Stewart Hildebrand wrote: >>> The ->profile member is at different offsets in struct rspinlock and >>> struct spinlock. When initializing the profiling bits of an rspinlock, >>> an unrelated member in struct rspinlock was being overwritten, leading >>> to mild havoc. Use the correct pointer. >>> >>> Fixes: b053075d1a7b ("xen/spinlock: make struct lock_profile rspinlock_t >>> aware") >>> Signed-off-by: Stewart Hildebrand >> >> Reviewed-by: Jan Beulich > > Thanks! > >> >>> --- a/xen/common/spinlock.c >>> +++ b/xen/common/spinlock.c >>> @@ -789,7 +789,11 @@ static int __init cf_check lock_prof_init(void) >>> { >>> (*q)->next = lock_profile_glb_q.elem_q; >>> lock_profile_glb_q.elem_q = *q; >>> -(*q)->ptr.lock->profile = *q; >>> + >>> +if ( (*q)->is_rlock ) >>> +(*q)->ptr.rlock->profile = *q; >>> +else >>> +(*q)->ptr.lock->profile = *q; >>> } >>> >>> _lock_profile_register_struct(LOCKPROF_TYPE_GLOBAL, >> >> Just to mention it: Strictly speaking spinlock_profile_print_elem()'s >> >> printk("%s: addr=%p, lockval=%08x, ", data->name, data->ptr.lock, >> lockval); >> >> isn't quite right either (and I would be surprised if Misra didn't have >> to say something about it). > > I'd be happy to send a patch for that instance, too. Would you like a > Reported-by: tag? I'm inclined to say no, not worth it, but it's really up to you. In fact I'm not sure we need to change that; it all depends on whether ... > That patch would look something like: > > --- a/xen/common/spinlock.c > +++ b/xen/common/spinlock.c > @@ -637,22 +637,25 @@ static void cf_check spinlock_profile_print_elem(struct > lock_profile *data, > { > unsigned int cpu; > unsigned int lockval; > +void *lockaddr; > > if ( data->is_rlock ) > { > cpu = data->ptr.rlock->debug.cpu; > lockval = data->ptr.rlock->tickets.head_tail; > +lockaddr = data->ptr.rlock; > } > else > { > cpu = data->ptr.lock->debug.cpu; > lockval = data->ptr.lock->tickets.head_tail; > +lockaddr = data->ptr.lock; > } > > printk("%s ", lock_profile_ancs[type].name); > if ( type != LOCKPROF_TYPE_GLOBAL ) > printk("%d ", idx); > -printk("%s: addr=%p, lockval=%08x, ", data->name, data->ptr.lock, > lockval); > +printk("%s: addr=%p, lockval=%08x, ", data->name, lockaddr, lockval); > if ( cpu == SPINLOCK_NO_CPU ) > printk("not locked\n"); > else > > > That case is benign since the pointer is not dereferenced. So the > rationale would primarily be for consistency (and possibly satisfying > Misra). ... Misra takes issue with the "wrong" member of a union being used, which iirc is UB, but which I'm afraid elsewhere we do all the time. Jan