date:20240429

Re: [PATCH 4/5] x86/svm: Switch SVM features over normal cpu_has_*

2024-04-29 Thread Vaishali Thakkar


On 4/29/24 5:16 PM, Andrew Cooper wrote:

Delete the boot time rendering of advanced features.  It's entirely ad-hoc and
not even everything printed here is used by Xen.  It is available in
`xen-cpuid` now.

With (only) svm_load_segs_{,prefetch}() declared now in svm.h, only svm.c and
domain.c which need the header.  Clean up all others.

No functional change.

Signed-off-by: Andrew Cooper 


Reviewed-by: Vaishali Thakkar 


---
CC: Jan Beulich 
CC: Roger Pau Monné 
CC: Stefano Stabellini 
CC: Xenia Ragiadakou 
CC: Sergiy Kibrik 
CC: George Dunlap 
CC: Andrei Semenov 
CC: Vaishali Thakkar 
---
  xen/arch/x86/hvm/svm/asid.c|  5 ++-
  xen/arch/x86/hvm/svm/emulate.c |  3 +-
  xen/arch/x86/hvm/svm/intr.c|  1 -
  xen/arch/x86/hvm/svm/nestedsvm.c   | 14 
  xen/arch/x86/hvm/svm/svm.c | 50 +++---
  xen/arch/x86/hvm/svm/vmcb.c|  1 -
  xen/arch/x86/include/asm/cpufeature.h  | 10 ++
  xen/arch/x86/include/asm/hvm/svm/svm.h | 36 ---
  8 files changed, 31 insertions(+), 89 deletions(-)

diff --git a/xen/arch/x86/hvm/svm/asid.c b/xen/arch/x86/hvm/svm/asid.c
index 7977a8e86b53..6117a362d310 100644
--- a/xen/arch/x86/hvm/svm/asid.c
+++ b/xen/arch/x86/hvm/svm/asid.c
@@ -6,7 +6,6 @@

  #include 
  #include 
-#include 

  #include "svm.h"

@@ -39,7 +38,7 @@ void svm_asid_handle_vmrun(void)
  {
  vmcb_set_asid(vmcb, true);
  vmcb->tlb_control =
-cpu_has_svm_flushbyasid ? TLB_CTRL_FLUSH_ASID : TLB_CTRL_FLUSH_ALL;
+cpu_has_flush_by_asid ? TLB_CTRL_FLUSH_ASID : TLB_CTRL_FLUSH_ALL;
  return;
  }

@@ -48,7 +47,7 @@ void svm_asid_handle_vmrun(void)

  vmcb->tlb_control =
  !need_flush ? TLB_CTRL_NO_FLUSH :
-cpu_has_svm_flushbyasid ? TLB_CTRL_FLUSH_ASID : TLB_CTRL_FLUSH_ALL;
+cpu_has_flush_by_asid ? TLB_CTRL_FLUSH_ASID : TLB_CTRL_FLUSH_ALL;
  }

  /*
diff --git a/xen/arch/x86/hvm/svm/emulate.c b/xen/arch/x86/hvm/svm/emulate.c
index 93ac1d3435f9..da6e21b2e270 100644
--- a/xen/arch/x86/hvm/svm/emulate.c
+++ b/xen/arch/x86/hvm/svm/emulate.c
@@ -11,7 +11,6 @@
  #include 
  #include 
  #include 
-#include 
  #include 

  #include "svm.h"
@@ -20,7 +19,7 @@ static unsigned long svm_nextrip_insn_length(struct vcpu *v)
  {
  struct vmcb_struct *vmcb = v->arch.hvm.svm.vmcb;

-if ( !cpu_has_svm_nrips )
+if ( !cpu_has_nrips )
  return 0;

  #ifndef NDEBUG
diff --git a/xen/arch/x86/hvm/svm/intr.c b/xen/arch/x86/hvm/svm/intr.c
index 4805c5567213..facd2894a2c6 100644
--- a/xen/arch/x86/hvm/svm/intr.c
+++ b/xen/arch/x86/hvm/svm/intr.c
@@ -17,7 +17,6 @@
  #include 
  #include 
  #include 
-#include 
  #include  /* for nestedhvm_vcpu_in_guestmode */
  #include 
  #include 
diff --git a/xen/arch/x86/hvm/svm/nestedsvm.c b/xen/arch/x86/hvm/svm/nestedsvm.c
index 35a2cbfd7d13..255af112661f 100644
--- a/xen/arch/x86/hvm/svm/nestedsvm.c
+++ b/xen/arch/x86/hvm/svm/nestedsvm.c
@@ -6,7 +6,6 @@
   */

  #include 
-#include 
  #include 
  #include 
  #include 
@@ -1620,7 +1619,7 @@ void svm_nested_features_on_efer_update(struct vcpu *v)
  {
  if ( !vmcb->virt_ext.fields.vloadsave_enable &&
   paging_mode_hap(v->domain) &&
- cpu_has_svm_vloadsave )
+ cpu_has_v_loadsave )
  {
  vmcb->virt_ext.fields.vloadsave_enable = 1;
  general2_intercepts  = vmcb_get_general2_intercepts(vmcb);
@@ -1629,8 +1628,7 @@ void svm_nested_features_on_efer_update(struct vcpu *v)
  vmcb_set_general2_intercepts(vmcb, general2_intercepts);
  }

-if ( !vmcb->_vintr.fields.vgif_enable &&
- cpu_has_svm_vgif )
+if ( !vmcb->_vintr.fields.vgif_enable && cpu_has_v_gif )
  {
  vintr = vmcb_get_vintr(vmcb);
  vintr.fields.vgif = svm->ns_gif;
@@ -1675,8 +1673,8 @@ void __init start_nested_svm(struct hvm_function_table 
*hvm_function_table)
   */
  hvm_function_table->caps.nested_virt =
  hvm_function_table->caps.hap &&
-cpu_has_svm_lbrv &&
-cpu_has_svm_nrips &&
-cpu_has_svm_flushbyasid &&
-cpu_has_svm_decode;
+cpu_has_v_lbr &&
+cpu_has_nrips &&
+cpu_has_flush_by_asid &&
+cpu_has_decode_assist;
  }
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index 4719fffae589..16eb875aab94 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -1287,7 +1287,7 @@ static void cf_check svm_inject_event(const struct 
x86_event *event)
   * that hardware doesn't perform DPL checking on injection.
   */
  if ( event->type == X86_EVENTTYPE_PRI_SW_EXCEPTION ||
- (!cpu_has_svm_nrips && (event->type >= X86_EVENTTYPE_SW_INTERRUPT)) )
+ (!cpu_has_nrips && (event->type >= X86_EVENTTYPE_SW_INTERRUPT)) )
  svm_emul_swint_injection(&_event);

  switch ( _event.vector | -(_event.type ==

[xen-4.17-testing test] 185864: tolerable FAIL - PUSHED

2024-04-29 Thread osstest service owner

flight 185864 xen-4.17-testing real [real]
flight 185874 xen-4.17-testing real-retest [real]
http://logs.test-lab.xenproject.org/osstest/logs/185864/
http://logs.test-lab.xenproject.org/osstest/logs/185874/

Failures :-/ but no regressions.

Tests which are failing intermittently (not blocking):
 test-armhf-armhf-xl-multivcpu 10 host-ping-check-xen fail pass in 185874-retest

Tests which did not succeed, but are not blocking:
 test-armhf-armhf-xl-multivcpu 15 migrate-support-check fail in 185874 never 
pass
 test-armhf-armhf-xl-multivcpu 16 saverestore-support-check fail in 185874 
never pass
 test-armhf-armhf-libvirt 16 saverestore-support-checkfail  like 185711
 test-amd64-amd64-xl-qemuu-win7-amd64 19 guest-stopfail like 185711
 test-amd64-amd64-xl-qemut-win7-amd64 19 guest-stopfail like 185711
 test-amd64-amd64-xl-qemut-ws16-amd64 19 guest-stopfail like 185711
 test-amd64-amd64-xl-qemuu-ws16-amd64 19 guest-stopfail like 185711
 test-amd64-amd64-qemuu-nested-amd 20 debian-hvm-install/l1/l2 fail like 185711
 test-arm64-arm64-xl-thunderx 15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-thunderx 16 saverestore-support-checkfail   never pass
 test-amd64-amd64-libvirt-xsm 15 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt 15 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt-qcow2 14 migrate-support-checkfail never pass
 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check 
fail never pass
 test-arm64-arm64-xl  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-credit2  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-credit2  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-credit1  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-credit1  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-libvirt-xsm 15 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-xsm 16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-credit2  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit2  16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-rtds 15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-rtds 16 saverestore-support-checkfail   never pass
 test-armhf-armhf-libvirt 15 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt-vhd 14 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-credit1  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit1  16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-arndale  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-arndale  16 saverestore-support-checkfail   never pass
 test-amd64-amd64-libvirt-raw 14 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-raw 14 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-raw 15 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-vhd  14 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-vhd  15 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-qcow214 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-qcow215 saverestore-support-checkfail   never pass
 test-armhf-armhf-libvirt-vhd 14 migrate-support-checkfail   never pass
 test-armhf-armhf-libvirt-vhd 15 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-raw  14 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-raw  15 saverestore-support-checkfail   never pass

version targeted for testing:
 xen  effcf70f020ff12d34c80e2abde0ecb00ce92bda
baseline version:
 xen  5d9a931fe2c1310dbfd946bbc1e22a177add4f5c

Last test of basis   185711  2024-04-17 07:07:14 Z   12 days
Testing same since   185864  2024-04-29 08:08:55 Z0 days1 attempts


People who touched revisions under test:
  Jan Beulich 
  Roger Pau Monné 

jobs:
 build-amd64-xsm  pass
 build-arm64-xsm  pass
 build-i386-xsm   pass
 build-amd64-xtf  pass
 build-amd64

[ovmf test] 185873: all pass - PUSHED

2024-04-29 Thread osstest service owner

flight 185873 ovmf real [real]
http://logs.test-lab.xenproject.org/osstest/logs/185873/

Perfect :-)
All tests in this flight passed as required
version targeted for testing:
 ovmf 88781ccd744c73acbbbe9767627860a538b9f3a2
baseline version:
 ovmf 094727264f887e275444bd11d9d99c651a85c2e4

Last test of basis   185868  2024-04-29 10:45:51 Z0 days
Testing same since   185873  2024-04-30 02:41:15 Z0 days1 attempts


People who touched revisions under test:
  Wenxing Hou 

jobs:
 build-amd64-xsm  pass
 build-i386-xsm   pass
 build-amd64  pass
 build-i386   pass
 build-amd64-libvirt  pass
 build-i386-libvirt   pass
 build-amd64-pvopspass
 build-i386-pvops pass
 test-amd64-amd64-xl-qemuu-ovmf-amd64 pass



sg-report-flight on osstest.test-lab.xenproject.org
logs: /home/logs/logs
images: /home/logs/images

Logs, config files, etc. are available at
http://logs.test-lab.xenproject.org/osstest/logs

Explanation of these reports, and of osstest in general, is at
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README.email;hb=master
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README;hb=master

Test harness code can be found at
http://xenbits.xen.org/gitweb?p=osstest.git;a=summary


Pushing revision :

To xenbits.xen.org:/home/xen/git/osstest/ovmf.git
   094727264f..88781ccd74  88781ccd744c73acbbbe9767627860a538b9f3a2 -> 
xen-tested-master

Re: [PATCH 07/15] xen/overlay: Enable device tree overlay assignment to running domains

2024-04-29 Thread Henry Wang


Hi Julien,

On 4/30/2024 1:34 AM, Julien Grall wrote:

On 29/04/2024 04:36, Henry Wang wrote:

Hi Jan, Julien, Stefano,


Hi Henry,


On 4/24/2024 2:05 PM, Jan Beulich wrote:

On 24.04.2024 05:34, Henry Wang wrote:

--- a/xen/include/public/sysctl.h
+++ b/xen/include/public/sysctl.h
@@ -1197,7 +1197,9 @@ struct xen_sysctl_dt_overlay {
  #define XEN_SYSCTL_DT_OVERLAY_ADD   1
  #define XEN_SYSCTL_DT_OVERLAY_REMOVE    2
  uint8_t overlay_op; /* IN: Add or remove. */
-    uint8_t pad[3]; /* IN: Must be zero. */
+    bool domain_mapping;    /* IN: True of False. */
+    uint8_t pad[2]; /* IN: Must be zero. */
+    uint32_t domain_id;
  };

If you merely re-purposed padding fields, all would be fine without
bumping the interface version. Yet you don't, albeit for an unclear
reason: Why uint32_t rather than domid_t? And on top of that - why a
separate boolean when you could use e.g. DOMID_INVALID to indicate
"no domain mapping"?


I think both of your suggestion make great sense. I will follow the 
suggestion in v2.



That said - anything taking a domain ID is certainly suspicious in a
sysctl. Judging from the description you really mean this to be a
domctl. Anything else will require extra justification.


I also think a domctl is better. I had a look at the history of the 
already merged series, it looks like in the first version of merged 
part 1 [1], the hypercall was implemented as the domctl in the 
beginning but later in v2 changed to sysctl. I think this makes sense 
as the scope of that time is just to make Xen aware of the device 
tree node via Xen device tree.


However this is now a problem for the current part where the 
scope (and the end goal) is extended to assign the added device to 
Linux Dom0/DomU via device tree overlays. I am not sure which way is 
better, should we repurposing the sysctl to domctl or maybe add 
another domctl (I am worrying about the duplication because basically 
we need the same sysctl functionality but now with a domid in it)? 
What do you think?


I am not entirely sure this is a good idea to try to add the device in 
Xen and attach it to the guests at the same time. 
Imagine the following situation:


1) Add and attach devices
2) The domain is rebooted
3) Detach and remove devices

After step 2, you technically have a new domain. You could have also a 
case where this is a completely different guest. So the flow would 
look a little bit weird (you create the DT overlay with domain A but 
remove with domain B).


So, at the moment, it feels like the add/attach (resp detech/remove) 
operations should happen separately.


Can you clarify why you want to add devices to Xen and attach to a 
guest within a single hypercall?


Sorry I don't know if there is any specific thoughts on the design of 
using a single hypercall to do both add devices to Xen device tree and 
assign the device to the guest. In fact seeing your above comments, I 
think separating these two functionality to two xl commands using 
separated hypercalls would indeed be a better idea. Thank you for the 
suggestion!


To make sure I understand correctly, would you mind confirming if below 
actions for v2 make sense to you? Thanks!
- Only use the XEN_SYSCTL_DT_OVERLAY_{ADD, REMOVE} sysctls to add/remove 
overlay to Xen device tree
- Introduce the xl dt-overlay attach  command and respective 
domctls to do the device assignment for the overlay to domain.


Kind regards,
Henry



Cheers,

Re: [PATCH 02/15] xen/arm/gic: Enable interrupt assignment to running VM

2024-04-29 Thread Henry Wang


Hi Julien,

Sorry for the late reply,

On 4/25/2024 10:28 PM, Julien Grall wrote:

Hi,

On 25/04/2024 08:06, Henry Wang wrote:

Hi Julien,

On 4/24/2024 8:58 PM, Julien Grall wrote:

Hi Henry,

On 24/04/2024 04:34, Henry Wang wrote:

From: Vikram Garhwal 

Enable interrupt assign/remove for running VMs in CONFIG_OVERLAY_DTB.

Currently, irq_route and mapping is only allowed at the domain 
creation. Adding

exception for CONFIG_OVERLAY_DTB.


AFAICT, this is mostly reverting b8577547236f ("xen/arm: Restrict 
when a physical IRQ can be routed/removed from/to a domain").




Signed-off-by: Vikram Garhwal 
Signed-off-by: Stefano Stabellini 
Signed-off-by: Henry Wang 
---
  xen/arch/arm/gic.c | 4 
  1 file changed, 4 insertions(+)

diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
index 44c40e86de..a775f886ed 100644
--- a/xen/arch/arm/gic.c
+++ b/xen/arch/arm/gic.c
@@ -140,8 +140,10 @@ int gic_route_irq_to_guest(struct domain *d, 
unsigned int virq,

   * back to the physical IRQ. To prevent get unsync, restrict the
   * routing to when the Domain is been created.
   */


The above comment explains why the check was added. But the commit 
message doesn't explain why this can be disregarded for your use-case.


Looking at the history, I don't think you can simply remove the checks.

Regardless that...


+#ifndef CONFIG_OVERLAY_DTB


... I am against such #ifdef. A distros may want to have OVERLAY_DTB 
enabled, yet the user will not use it.


Instead, you want to remove the check once the code can properly 
handle routing an IRQ the domain is created or ...



  if ( d->creation_finished )
  return -EBUSY;
+#endif
    ret = vgic_connect_hw_irq(d, NULL, virq, desc, true);
  if ( ret )
@@ -171,8 +173,10 @@ int gic_remove_irq_from_guest(struct domain 
*d, unsigned int virq,

   * Removing an interrupt while the domain is running may have
   * undesirable effect on the vGIC emulation.
   */
+#ifndef CONFIG_OVERLAY_DTB
  if ( !d->is_dying )
  return -EBUSY;
+#endif


... removed before they domain is destroyed.


Thanks for your feeedback. After checking the b8577547236f commit 
message I think I now understand your point. Do you have any 
suggestion about how can I properly add the support to route/remove 
the IRQ to running domains? Thanks.


I spent some time going through the GIC/vGIC code and had some 
discussions with Stefano and Stewart during the last couple of days, let 
me see if I can describe the use case properly now to continue the 
discussion:


We have some use cases that requires assigning devices to domains after 
domain boot time. For example, suppose there is an FPGA on the board 
which can simulate a device, and the bitstream for the FPGA is provided 
and programmed after domain boot. So we need a way to assign the device 
to the running domain. This series tries to implement this use case by 
using device tree overlay - users can firstly add the overlay to Xen 
dtb, assign the device in the overlay to a domain by the xl command, 
then apply the overlay to Linux.


I haven't really look at that code in quite a while. I think we need 
to make sure that the virtual and physical IRQ state matches at the 
time we do the routing.


I am undecided on whether we want to simply prevent the action to 
happen or try to reset the state.


There is also the question of what to do if the guest is enabling the 
vIRQ before it is routed.


Sorry for bothering, would you mind elaborating a bit more about the two 
cases that you mentioned above? Commit b8577547236f ("xen/arm: Restrict 
when a physical IRQ can be routed/removed from/to a domain") only said 
there will be undesirable effects, so I am not sure if I understand the 
concerns raised above and the consequences of these two use cases. I am 
probably wrong, I think when we add the overlay, we are probably fine as 
the interrupt is not being used before. Also since we only load the 
device driver after the IRQ is routed to the guest, I am not sure the 
guest can enable the vIRQ before it is routed.


Kind regards,
Henry

Overall, someone needs to spend some time reading the code and then 
make a proposal (this could be just documentation if we believe it is 
safe to do). Both the current vGIC and the new one may need an update.


Cheers,

Re: [VirtIO] Support for various devices in Xen

2024-04-29 Thread Viresh Kumar

On 30-04-24, 03:11, Andrei Cherechesu wrote:
> Are there any other virtio device types you managed to test so far
> besides these ones (over virtio-mmio/virtio-grant)? Has anyone
> tested the rust-vmm vhost-device backends from Viresh with Xen?

I have tested them earlier with Xen emulated with the help of Qemu.
Steps are mentioned here:

https://github.com/vireshk/xen-vhost-frontend

-- 
viresh

Re: [PATCH 1/3] xen/arm/dom0less-build: Alloc magic pages for Dom0less DomUs from hypervisor

2024-04-29 Thread Henry Wang


Hi Daniel,

On 4/30/2024 8:27 AM, Daniel P. Smith wrote:

On 4/25/24 23:14, Henry Wang wrote:

There are use cases (for example using the PV driver) in Dom0less
setup that require Dom0less DomUs start immediately with Dom0, but
initialize XenStore later after Dom0's successful boot and call to
the init-dom0less application.

An error message can seen from the init-dom0less application on
1:1 direct-mapped domains:
```
Allocating magic pages
memory.c:238:d0v0 mfn 0x39000 doesn't belong to d1
Error on alloc magic pages
```
This is because currently the magic pages for Dom0less DomUs are
populated by the init-dom0less app through populate_physmap(), and
populate_physmap() automatically assumes gfn == mfn for 1:1 direct
mapped domains. This cannot be true for the magic pages that are
allocated later from the init-dom0less application executed in Dom0.
For domain using statically allocated memory but not 1:1 direct-mapped,
similar error "failed to retrieve a reserved page" can be seen as the
reserved memory list is empty at that time.

To solve above issue, this commit allocates the magic pages for
Dom0less DomUs at the domain construction time. The base address/PFN
of the magic page region will be noted and communicated to the
init-dom0less application in Dom0.


Might I suggest we not refer to these as magic pages? I would consider 
them as hypervisor reserved pages for the VM to have access to virtual 
platform capabilities. We may see this expand in the future for some 
unforeseen, new capability.


I think magic page is a specific terminology to refer to these pages, 
see alloc_magic_pages() for both x86 and Arm. I will reword the last 
paragraph of the commit message to refer them as "hypervisor reserved 
pages (currently used as magic pages on Arm)" if this sounds good to you.


Kind regards,
Henry

Re: [PATCH 2/3] xen/arm, tools: Add a new HVM_PARAM_MAGIC_BASE_PFN key in HVMOP

2024-04-29 Thread Henry Wang


Hi Daniel,

On 4/30/2024 8:31 AM, Daniel P. Smith wrote:


On 4/26/24 02:21, Jan Beulich wrote:

On 26.04.2024 05:14, Henry Wang wrote:

--- a/xen/include/public/hvm/params.h
+++ b/xen/include/public/hvm/params.h
@@ -76,6 +76,7 @@
   */
  #define HVM_PARAM_STORE_PFN    1
  #define HVM_PARAM_STORE_EVTCHN 2
+#define HVM_PARAM_MAGIC_BASE_PFN    3
    #define HVM_PARAM_IOREQ_PFN    5


Considering all adjacent values are used, it is overwhelmingly likely 
that

3 was once used, too. Such re-use needs to be done carefully. Since you
need this for Arm only, that's likely okay, but doesn't go without (a)
saying and (b) considering the possible future case of dom0less becoming
arch-agnostic, or hyperlaunch wanting to extend the scope. Plus (c) imo
this also needs at least a comment, maybe even an #ifdef, seeing how 
x86-

focused most of the rest of this header is.


I would recommend having two new params,


Sounds good. I can do the suggestion in v2.



#define HVM_PARAM_HV_RSRV_BASE_PVH 3
#define HVM_PARAM_HV_RSRV_SIZE 4


I think 4 is currently in use, so I think I will find another couple of 
numbers in the end for both of them. Instead of reusing 3 and 4.


Kind regards,
Henry



This will communicate how many pages have been reserved and where 
those pages are located.


v/r,
dps

Re: [PATCH 2/3] xen/arm, tools: Add a new HVM_PARAM_MAGIC_BASE_PFN key in HVMOP

2024-04-29 Thread Daniel P. Smith


On 4/29/24 20:35, Daniel P. Smith wrote:

On 4/25/24 23:14, Henry Wang wrote:

For use cases such as Dom0less PV drivers, a mechanism to communicate
Dom0less DomU's static data with the runtime control plane (Dom0) is
needed. Since on Arm HVMOP is already the existing approach to address
such use cases (for example the allocation of HVM_PARAM_CALLBACK_IRQ),
add a new HVMOP key HVM_PARAM_MAGIC_BASE_PFN for storing the magic
page region base PFN. The value will be set at Dom0less DomU
construction time after Dom0less DomU's magic page region has been
allocated.

To keep consistent, also set the value for HVM_PARAM_MAGIC_BASE_PFN
for libxl guests in alloc_magic_pages().

Reported-by: Alec Kwapis 
Signed-off-by: Henry Wang 
---
  tools/libs/guest/xg_dom_arm.c   | 2 ++
  xen/arch/arm/dom0less-build.c   | 2 ++
  xen/arch/arm/hvm.c  | 1 +
  xen/include/public/hvm/params.h | 1 +
  4 files changed, 6 insertions(+)

diff --git a/tools/libs/guest/xg_dom_arm.c 
b/tools/libs/guest/xg_dom_arm.c

index 8cc7f27dbb..3c08782d1d 100644
--- a/tools/libs/guest/xg_dom_arm.c
+++ b/tools/libs/guest/xg_dom_arm.c
@@ -74,6 +74,8 @@ static int alloc_magic_pages(struct xc_dom_image *dom)
  xc_clear_domain_page(dom->xch, dom->guest_domid, base + 
MEMACCESS_PFN_OFFSET);

  xc_clear_domain_page(dom->xch, dom->guest_domid, dom->vuart_gfn);
+    xc_hvm_param_set(dom->xch, dom->guest_domid, 
HVM_PARAM_MAGIC_BASE_PFN,

+    base);
  xc_hvm_param_set(dom->xch, dom->guest_domid, HVM_PARAM_CONSOLE_PFN,
  dom->console_pfn);
  xc_hvm_param_set(dom->xch, dom->guest_domid, HVM_PARAM_STORE_PFN,
diff --git a/xen/arch/arm/dom0less-build.c 
b/xen/arch/arm/dom0less-build.c

index 40dc85c759..72187c167d 100644
--- a/xen/arch/arm/dom0less-build.c
+++ b/xen/arch/arm/dom0less-build.c
@@ -861,6 +861,8 @@ static int __init construct_domU(struct domain *d,
  free_domheap_pages(magic_pg, 
get_order_from_pages(NR_MAGIC_PAGES));

  return rc;
  }
+
+    d->arch.hvm.params[HVM_PARAM_MAGIC_BASE_PFN] = gfn_x(gfn);
  }
  return rc;
diff --git a/xen/arch/arm/hvm.c b/xen/arch/arm/hvm.c
index 0989309fea..fa6141e30c 100644
--- a/xen/arch/arm/hvm.c
+++ b/xen/arch/arm/hvm.c
@@ -55,6 +55,7 @@ static int hvm_allow_get_param(const struct domain 
*d, unsigned int param)

  case HVM_PARAM_STORE_EVTCHN:
  case HVM_PARAM_CONSOLE_PFN:
  case HVM_PARAM_CONSOLE_EVTCHN:
+    case HVM_PARAM_MAGIC_BASE_PFN:
  return 0;


I know you are just adding, so more of a question for the Arm maintainers:

Why does Arm have a pair of private access control functions subverting 
XSM?


On closer look, I see x86 has done the same. While the way this is set 
up bothers me, reviewing the history it was clearly decided that only 
controlling use of the op by a src domain against a target domain was 
sufficient. Ultimately, it is seeing a mini access control policy being 
codified in the access code is what is bothering me here. Fixing this 
would require a complete rework of xsm_hvm_param, and while it would 
correct the access control from a purist perspective, the security 
benefit would be very low.


v/r,
dps

Re: [PATCH] xen/xsm: Wire up get_dom0_console

2024-04-29 Thread Daniel P. Smith


On 4/26/24 00:04, Jason Andryuk wrote:

An XSM hook for get_dom0_console is currently missing.  Using XSM with
a PVH dom0 shows:
(XEN) FLASK: Denying unknown platform_op: 64.

Wire up the hook, and allow it for dom0.

Fixes: 4dd160583c ("x86/platform: introduce hypercall to get initial video console 
settings")
Signed-off-by: Jason Andryuk 
---


Acked-by: Daniel P. Smith

Re: [PATCH 2/3] xen/arm, tools: Add a new HVM_PARAM_MAGIC_BASE_PFN key in HVMOP

2024-04-29 Thread Daniel P. Smith


On 4/25/24 23:14, Henry Wang wrote:

For use cases such as Dom0less PV drivers, a mechanism to communicate
Dom0less DomU's static data with the runtime control plane (Dom0) is
needed. Since on Arm HVMOP is already the existing approach to address
such use cases (for example the allocation of HVM_PARAM_CALLBACK_IRQ),
add a new HVMOP key HVM_PARAM_MAGIC_BASE_PFN for storing the magic
page region base PFN. The value will be set at Dom0less DomU
construction time after Dom0less DomU's magic page region has been
allocated.

To keep consistent, also set the value for HVM_PARAM_MAGIC_BASE_PFN
for libxl guests in alloc_magic_pages().

Reported-by: Alec Kwapis 
Signed-off-by: Henry Wang 
---
  tools/libs/guest/xg_dom_arm.c   | 2 ++
  xen/arch/arm/dom0less-build.c   | 2 ++
  xen/arch/arm/hvm.c  | 1 +
  xen/include/public/hvm/params.h | 1 +
  4 files changed, 6 insertions(+)

diff --git a/tools/libs/guest/xg_dom_arm.c b/tools/libs/guest/xg_dom_arm.c
index 8cc7f27dbb..3c08782d1d 100644
--- a/tools/libs/guest/xg_dom_arm.c
+++ b/tools/libs/guest/xg_dom_arm.c
@@ -74,6 +74,8 @@ static int alloc_magic_pages(struct xc_dom_image *dom)
  xc_clear_domain_page(dom->xch, dom->guest_domid, base + 
MEMACCESS_PFN_OFFSET);
  xc_clear_domain_page(dom->xch, dom->guest_domid, dom->vuart_gfn);
  
+xc_hvm_param_set(dom->xch, dom->guest_domid, HVM_PARAM_MAGIC_BASE_PFN,

+base);
  xc_hvm_param_set(dom->xch, dom->guest_domid, HVM_PARAM_CONSOLE_PFN,
  dom->console_pfn);
  xc_hvm_param_set(dom->xch, dom->guest_domid, HVM_PARAM_STORE_PFN,
diff --git a/xen/arch/arm/dom0less-build.c b/xen/arch/arm/dom0less-build.c
index 40dc85c759..72187c167d 100644
--- a/xen/arch/arm/dom0less-build.c
+++ b/xen/arch/arm/dom0less-build.c
@@ -861,6 +861,8 @@ static int __init construct_domU(struct domain *d,
  free_domheap_pages(magic_pg, 
get_order_from_pages(NR_MAGIC_PAGES));
  return rc;
  }
+
+d->arch.hvm.params[HVM_PARAM_MAGIC_BASE_PFN] = gfn_x(gfn);
  }
  
  return rc;

diff --git a/xen/arch/arm/hvm.c b/xen/arch/arm/hvm.c
index 0989309fea..fa6141e30c 100644
--- a/xen/arch/arm/hvm.c
+++ b/xen/arch/arm/hvm.c
@@ -55,6 +55,7 @@ static int hvm_allow_get_param(const struct domain *d, 
unsigned int param)
  case HVM_PARAM_STORE_EVTCHN:
  case HVM_PARAM_CONSOLE_PFN:
  case HVM_PARAM_CONSOLE_EVTCHN:
+case HVM_PARAM_MAGIC_BASE_PFN:
  return 0;


I know you are just adding, so more of a question for the Arm maintainers:

Why does Arm have a pair of private access control functions subverting XSM?

v/r,
dps

Re: [PATCH 2/3] xen/arm, tools: Add a new HVM_PARAM_MAGIC_BASE_PFN key in HVMOP

2024-04-29 Thread Daniel P. Smith




On 4/26/24 02:21, Jan Beulich wrote:

On 26.04.2024 05:14, Henry Wang wrote:

--- a/xen/include/public/hvm/params.h
+++ b/xen/include/public/hvm/params.h
@@ -76,6 +76,7 @@
   */
  #define HVM_PARAM_STORE_PFN1
  #define HVM_PARAM_STORE_EVTCHN 2
+#define HVM_PARAM_MAGIC_BASE_PFN3
  
  #define HVM_PARAM_IOREQ_PFN5


Considering all adjacent values are used, it is overwhelmingly likely that
3 was once used, too. Such re-use needs to be done carefully. Since you
need this for Arm only, that's likely okay, but doesn't go without (a)
saying and (b) considering the possible future case of dom0less becoming
arch-agnostic, or hyperlaunch wanting to extend the scope. Plus (c) imo
this also needs at least a comment, maybe even an #ifdef, seeing how x86-
focused most of the rest of this header is.


I would recommend having two new params,

#define HVM_PARAM_HV_RSRV_BASE_PVH 3
#define HVM_PARAM_HV_RSRV_SIZE 4

This will communicate how many pages have been reserved and where those 
pages are located.


v/r,
dps

Re: [PATCH 1/3] xen/arm/dom0less-build: Alloc magic pages for Dom0less DomUs from hypervisor

2024-04-29 Thread Daniel P. Smith


On 4/25/24 23:14, Henry Wang wrote:

There are use cases (for example using the PV driver) in Dom0less
setup that require Dom0less DomUs start immediately with Dom0, but
initialize XenStore later after Dom0's successful boot and call to
the init-dom0less application.

An error message can seen from the init-dom0less application on
1:1 direct-mapped domains:
```
Allocating magic pages
memory.c:238:d0v0 mfn 0x39000 doesn't belong to d1
Error on alloc magic pages
```
This is because currently the magic pages for Dom0less DomUs are
populated by the init-dom0less app through populate_physmap(), and
populate_physmap() automatically assumes gfn == mfn for 1:1 direct
mapped domains. This cannot be true for the magic pages that are
allocated later from the init-dom0less application executed in Dom0.
For domain using statically allocated memory but not 1:1 direct-mapped,
similar error "failed to retrieve a reserved page" can be seen as the
reserved memory list is empty at that time.

To solve above issue, this commit allocates the magic pages for
Dom0less DomUs at the domain construction time. The base address/PFN
of the magic page region will be noted and communicated to the
init-dom0less application in Dom0.


Might I suggest we not refer to these as magic pages? I would consider 
them as hypervisor reserved pages for the VM to have access to virtual 
platform capabilities. We may see this expand in the future for some 
unforeseen, new capability.

[xen-4.18-testing test] 185865: tolerable FAIL - PUSHED

2024-04-29 Thread osstest service owner

flight 185865 xen-4.18-testing real [real]
http://logs.test-lab.xenproject.org/osstest/logs/185865/

Failures :-/ but no regressions.

Tests which did not succeed, but are not blocking:
 test-armhf-armhf-libvirt 16 saverestore-support-checkfail  like 185724
 test-amd64-amd64-xl-qemut-win7-amd64 19 guest-stopfail like 185724
 test-amd64-amd64-xl-qemuu-win7-amd64 19 guest-stopfail like 185724
 test-amd64-amd64-xl-qemut-ws16-amd64 19 guest-stopfail like 185724
 test-amd64-amd64-xl-qemuu-ws16-amd64 19 guest-stopfail like 185724
 test-amd64-amd64-qemuu-nested-amd 20 debian-hvm-install/l1/l2 fail like 185724
 test-amd64-amd64-libvirt 15 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt-xsm 15 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check 
fail never pass
 test-arm64-arm64-xl  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-thunderx 15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-thunderx 16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-credit1  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-credit2  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-credit1  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-credit2  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-libvirt-xsm 15 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-xsm 16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-arndale  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-arndale  16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-credit2  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit2  16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-credit1  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit1  16 saverestore-support-checkfail   never pass
 test-amd64-amd64-libvirt-qcow2 14 migrate-support-checkfail never pass
 test-armhf-armhf-libvirt 15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-rtds 15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-rtds 16 saverestore-support-checkfail   never pass
 test-amd64-amd64-libvirt-raw 14 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt-vhd 14 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-libvirt-raw 14 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-raw 15 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-vhd  14 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-vhd  15 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-qcow214 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-qcow215 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-multivcpu 15 migrate-support-checkfail  never pass
 test-armhf-armhf-xl-multivcpu 16 saverestore-support-checkfail  never pass
 test-armhf-armhf-libvirt-vhd 14 migrate-support-checkfail   never pass
 test-armhf-armhf-libvirt-vhd 15 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-raw  14 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-raw  15 saverestore-support-checkfail   never pass

version targeted for testing:
 xen  f0ff1d9cb96041a84a24857a6464628240deed4f
baseline version:
 xen  2d38302c33b117aa9a417056db241aefc840c2f0

Last test of basis   185724  2024-04-17 15:40:35 Z   12 days
Testing same since   185865  2024-04-29 08:08:54 Z0 days1 attempts


People who touched revisions under test:
  Jan Beulich 
  Roger Pau Monné 
  Ross Lagerwall 

jobs:
 build-amd64-xsm  pass
 build-arm64-xsm  pass
 build-i386-xsm   pass
 build-amd64-xtf  pass
 build-amd64  pass
 build-arm64  pass
 build-armhf  pass
 build-i386   pass

Re: [VirtIO] Support for various devices in Xen

2024-04-29 Thread Andrei Cherechesu

On 12/04/2024 11:35, Edgar E. Iglesias wrote:
> On Fri, Apr 12, 2024 at 1:23 AM Stefano Stabellini
>  wrote:
>
> -Vikram +Edgar
>
> On Thu, 11 Apr 2024, Andrei Cherechesu wrote:
>>> I've managed to successfully get a DomU up and running with the rootfs 
>>> based on virtio-blk. I'm running QEMU 8.2.1, Xen 4.18 + Vikram's downstream 
>>> patches, Linux 6.6.12-rt, built through yocto with some changes to 
>>> xen-tools and QEMU recipes.
>>>
>>> However, when also enabling PV networking through virtio-net, it seems that 
>>> DomU cannot successfully boot. The device model args passed by xen-tools 
>>> when invoking QEMU look exactly like what Vikram said they should.
>>>
>>> While executing `xl -v create ..` I can see some error regarding the device 
>>> model crashing:
>>>
>>> libxl: debug: libxl_exec.c:127:libxl_report_child_exitstatus: 
>>> domain 1 device model (dying as expected) [300] died due to fatal signal 
>>> Killed
>>>
>>> But the error is not fatal and the DomU spawn goes on, it boots but never 
>>> reaches prompt. It seems that kernel crashes silently at some point. 
>>> Though, the networking interface is present since udev tries to rename it 
>>> right before boot hangs:
>>>
>>> [4.376715] vif vif-0 enX0: renamed from eth1
>>>
>>> Why would the QEMU DM process be killed, though? Invalid memory access?
>>>
>>> Here are the full logs for the "xl create" command [0] and for DomU's dmesg 
>>> [1].
>>> Any ideas as to why that might happen, some debugging insights, or maybe 
>>> some configuration details I could have overlooked?
>>>
>>> Thank you very much for your help once again.
> Hi Andrei,
>
> I'll share some info about my setup:
> I'm using:
>
> Xen upstream/master + virtio patches that Vikram shared
> Commit 63f66058b5 on this repo/branch:
> https://github.com/edgarigl/xen/tree/edgar/virtio-base
>
> QEMU 02e16ab9f4 upstream/master
> Linux 09e5c48fea17 upstream/master (from March)
> Yocto rootfs.
> I had a look at your logs but I can't tell why it's failing on your side.
> I've not tried using a virtio-blk as a rootfs on my side, perhaps related.
> It would be useful to see a diff of your Xen tree compared to plain
> 4.18 or whatever base you've got.
> You probably don't have
> https://github.com/edgarigl/xen/commit/63f66058b508180107963ea37217bc88d813df8f
> but if that was the problem, I'd thought virtio wouldn't work at all
> with your kernel it could also be related.
>
> My guest config looks like this:
> name = "g0"
> memory = 1024
> vcpus = 1
> kernel = "Image"
> ramdisk = "core-image-minimal-qemuarm64.rootfs.cpio.gz"
> extra = "root=/dev/ram0 console=ttyAMA0"
> vif = [ 'model=virtio-net,type=ioemu,bridge=xenbr0' ]
> disk = [ '/etc/xen/file.img,,xvda,backendtype=qdisk,specification=virtio' ]
Hi Edgar, Stefano,


Thank you for your support. Just wanted to let you know that I've managed to 
run everything just fine. After some more debugging, I figured the fix I needed 
was actually in QEMU, one which had been already merged upstream (available in 
v9.0.0) by Peng Fan [0], following a discussion a few months ago on this list. 
And since I'm running v8.2.1, my QEMU did not have it.

So I can confirm granting DomU access to rootfs from an SDCard partition over 
virtio-blk also works. However, if I use the usual Xen PV drivers for block + 
virtio-net, it does not work (device model fails to spawn). But if I'm using 
virtio-blk + Xen PV Net, it works :). Also both VirtIOs at the same time work.

This is the configuration:
    vif = [ 'model=virtio-net,type=ioemu' ]
    disk = [ 'phy:/dev/mmcblk0p3,xvda,backendtype=qdisk,specification=virtio' ]
    extra = "root=/dev/vda ..."

Also tested them with foreign mappings and with grant based transports.

Are there any other virtio device types you managed to test so far besides 
these ones (over virtio-mmio/virtio-grant)? Has anyone tested the rust-vmm 
vhost-device backends from Viresh with Xen?


Regards,
Andrei C

[0] https://github.com/qemu/qemu/commit/9253d83062268209533df4b29859e5b51a2dc324

>
> xl launches QEMU with the following args:
> /usr/bin/qemu-system-aarch64 -xen-domid 1 -no-shutdown -chardev
> socket,id=libxl-cmd,path=/var/run/xen/qmp-libxl-1,server=on,wait=off
> -mon chardev=libxl-cmd,mode=control -chardev
> socket,id=libxenstat-cmd,path=/var/run/xen/qmp-libxenstat-1,server=on,wait=off
> -mon chardev=libxenstat-cmd,mode=control -nodefaults -no-user-config
> -xen-attach -name g0 -vnc none -display none -nographic -global
> virtio-mmio.force-legacy=false -device
> virtio-net-device,id=nic0,netdev=net0,mac=00:16:3e:13:86:9c,iommu_platform=on
> -netdev type=tap,id=net0,ifname=vif1.0-emu,br=xenbr0,script=no,downscript=no
> -machine xenpvh -m 1024 -device
> virtio-blk-device,drive=image,iommu_platform=on -drive
> if=none,id=image,format=raw,file=/etc/xen/file.img -global
> virtio-mmio.force-legacy=false
>
> Cheers,
> Edgar
>
>
>> Edgar (CCed) has recently setup a working system with QEMU and the
>> xenpvh machine for

Re: [XEN PATCH v3 5/5] xen/arm: ffa: support notification

2024-04-29 Thread Julien Grall


Hi Bertrand,

On 29/04/2024 08:20, Bertrand Marquis wrote:

 From the comment in sched.h:
/*
  * rcu_lock_domain_by_id() is more efficient than get_domain_by_id().
  * This is the preferred function if the returned domain reference
  * is short lived,  but it cannot be used if the domain reference needs
  * to be kept beyond the current scope (e.g., across a softirq).
  * The returned domain reference must be discarded using rcu_unlock_domain().
  */

Now the question of short lived should be challenged but I do not think we can
consider the current code as "long lived".


Actually, I am not entirely sure you can use put_domain() in interrupt 
context. If you look at the implementation of domain_destroy() it takes 
a spin lock without disabling the interrupts.


The same spinlock is taken in domain_create(). So there is a potential 
deadlock.


Which means the decision between the two is not only about liveness.

Cheers,

--
Julien Grall

[linux-6.1 test] 185863: regressions - FAIL

2024-04-29 Thread osstest service owner

flight 185863 linux-6.1 real [real]
http://logs.test-lab.xenproject.org/osstest/logs/185863/

Regressions :-(

Tests which did not succeed and are blocking,
including tests which could not be run:
 build-armhf-pvops 6 kernel-build fail REGR. vs. 185746

Tests which did not succeed, but are not blocking:
 test-armhf-armhf-examine  1 build-check(1)   blocked  n/a
 test-armhf-armhf-libvirt  1 build-check(1)   blocked  n/a
 test-armhf-armhf-libvirt-vhd  1 build-check(1)   blocked  n/a
 test-armhf-armhf-xl   1 build-check(1)   blocked  n/a
 test-armhf-armhf-xl-arndale   1 build-check(1)   blocked  n/a
 test-armhf-armhf-xl-credit1   1 build-check(1)   blocked  n/a
 test-armhf-armhf-xl-credit2   1 build-check(1)   blocked  n/a
 test-armhf-armhf-xl-multivcpu  1 build-check(1)   blocked  n/a
 test-armhf-armhf-xl-qcow2 1 build-check(1)   blocked  n/a
 test-armhf-armhf-xl-raw   1 build-check(1)   blocked  n/a
 test-armhf-armhf-xl-rtds  1 build-check(1)   blocked  n/a
 test-amd64-amd64-xl-qemuu-win7-amd64 19 guest-stopfail like 185746
 test-amd64-amd64-xl-qemuu-ws16-amd64 19 guest-stopfail like 185746
 test-amd64-amd64-xl-qemut-win7-amd64 19 guest-stopfail like 185746
 test-amd64-amd64-xl-qemut-ws16-amd64 19 guest-stopfail like 185746
 test-amd64-amd64-qemuu-nested-amd 20 debian-hvm-install/l1/l2 fail like 185746
 test-amd64-amd64-libvirt 15 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt-xsm 15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-thunderx 15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-thunderx 16 saverestore-support-checkfail   never pass
 test-arm64-arm64-libvirt-xsm 15 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-xsm 16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-credit1  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-credit2  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-credit1  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-credit2  16 saverestore-support-checkfail   never pass
 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check 
fail never pass
 test-arm64-arm64-xl-xsm  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  16 saverestore-support-checkfail   never pass
 test-amd64-amd64-libvirt-qcow2 14 migrate-support-checkfail never pass
 test-amd64-amd64-libvirt-raw 14 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt-vhd 14 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-raw 14 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-raw 15 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-vhd  14 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-vhd  15 saverestore-support-checkfail   never pass

version targeted for testing:
 linuxf2295faba5e8249ae4082791bfc1664c88fff83a
baseline version:
 linux6741e066ec7633450d3186946035c1f80c4226b8

Last test of basis   185746  2024-04-20 18:14:28 Z9 days
Testing same since   185832  2024-04-27 15:44:00 Z2 days5 attempts


People who touched revisions under test:
  Ai Chao 
  Alan Stern 
  Alex Deucher 
  Alexander Gordeev 
  Alexander Usyskin 
  Alexey Izbyshev 
  Alvaro Karsz 
  Andi Shyti 
  Andrew Morton 
  AngeloGioacchino Del Regno 
  Ard Biesheuvel 
  Arnd Bergmann 
  Arınç ÜNAL 
  Bjorn Helgaas 
  bolan wang 
  Borislav Petkov (AMD) 
  Borislav Petkov 
  Brenton Simpson 
  Carlos Llamas 
  Carolina Jubran 
  Catalin Marinas 
  Chinmoy Ghosh 
  Christophe JAILLET 
  Chuanhong Guo 
  Chuck Lever 
  Coia Prant 
  Daniel Golle 
  Daniele Palmas 
  Danilo Krummrich 
  Dave Airlie 
  Dave Hansen 
  David S. Miller 
  David Yang 
  Dillon Varone 
  Dmitry Baryshkov 
  Dmitry Torokhov 
  Emil Kronborg 
  Eric Biggers 
  Finn Thain 
  Florian Fainelli 
  Florian Westphal 
  Geoffrey D. Bennett 
  Gil Fine 
  Greg Kroah-Hartman 
  H. Peter Anvin (Intel) 
  Hamza Mahfooz 
  Hans de Goede 
  Hardik Gajjar 
  Hawking Zhang 
  Hou Wenlong 
  Ilpo Järvinen 
  Ingo Molnar 
  Jakub Kicinski 
  Janusz Krzysztofik 
  Jarkko Nikula 
  Jason A. Donenfeld 
  Jason Wang 
  Jens Axboe 
  Jeongjun Park 
  Jerry Meng 
  Jiri Kosina 
  Johan Hovold 
  Johan Hovold 
  Jose Ignacio Tornos Martinez 
  Josh Poimboeuf 
  Kai-Heng Feng 
  Kelvin Cao 
  kernelci.org bot 
  Konrad Dybcio 
  Kuniyuki Iwashima 
  Lei Chen

Re: [PATCH v2 2/3] x86: detect PIC aliasing on ports other than 0x[2A][01]

2024-04-29 Thread Nicola Vetrini


On 2023-12-18 15:48, Jan Beulich wrote:

... in order to also deny Dom0 access through the alias ports. Without
this it is only giving the impression of denying access to both PICs.
Unlike for CMOS/RTC, do detection very early, to avoid disturbing 
normal

operation later on.

Like for CMOS/RTC a fundamental assumption of the probing is that reads
from the probed alias port won't have side effects in case it does not
alias the respective PIC's one.

Signed-off-by: Jan Beulich 
---
v2: Use new command line option. s/pic/8252A/. Re-base over new earlier
patch. Use ISOLATE_LSB().



Hi,

coming back to this patch, which I believe didn't receive much feedback 
and thus wasn't committed, for a reason:



--- a/xen/arch/x86/i8259.c
+++ b/xen/arch/x86/i8259.c
@@ -19,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 


Here asm/setup is included, which provides the declaration for init_IRQ, 
defined below in the file




 void __init init_IRQ(void)
@@ -343,6 +396,8 @@ void __init init_IRQ(void)



which is defined here. This patch would, among other things, address a 
MISRA C Rule 8.4 violation ("A compatible declaration shall be visible 
when an object or function with external linkage is defined"). I did 
send a patch concerned only with the MISRA violation, but correctly it 
was pointed out that this one was doing that and more. Perhaps someone 
can have a look at this?



 init_8259A(0);

+probe_8259A_alias();
+
 for (irq = 0; platform_legacy_irq(irq); irq++) {
 struct irq_desc *desc = irq_to_desc(irq);

--- a/xen/arch/x86/include/asm/setup.h
+++ b/xen/arch/x86/include/asm/setup.h
@@ -46,6 +46,8 @@ extern uint8_t kbd_shift_flags;
 extern unsigned long highmem_start;
 #endif

+extern unsigned int i8259A_alias_mask;
+
 extern int8_t opt_smt;
 extern int8_t opt_probe_port_aliases;


--
Nicola Vetrini, BSc
Software Engineer, BUGSENG srl (https://bugseng.com)

Re: [PATCH 1/3] x86/boot: Explain how moving mod[0] works

2024-04-29 Thread Jason Andryuk


On 2024-04-26 10:01, Andrew Cooper wrote:

modules_headroom is a misleading name as it applies strictly to mod[0] only,
and the movement loop is deeply unintuitive and completely undocumented.

Provide help to whomever needs to look at this code next.

Signed-off-by: Andrew Cooper 
---
CC: Jan Beulich 
CC: Roger Pau Monné 
CC: Stefano Stabellini 
CC: Daniel Smith 
CC: Christopher Clark 
---
  xen/arch/x86/setup.c | 5 +
  1 file changed, 5 insertions(+)

diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index caf235c6286d..59907fae095f 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -1432,6 +1432,11 @@ void asmlinkage __init noreturn __start_xen(unsigned 
long mbi_p)
  /* Is the region suitable for relocating the multiboot modules? */
  for ( j = mbi->mods_count - 1; j >= 0; j-- )
  {
+/*
+ * 'headroom' is a guess for the decompressed size and
+ * decompressor overheads of mod[0] (the dom0 kernel).  When we
+ * move mod[0], we incorperate this as extra space at the start.


  incorporate

With that:
Reviewed-by: Jason Andryuk 

Thanks,
Jason


+ */
  unsigned long headroom = j ? 0 : modules_headroom;
  unsigned long size = PAGE_ALIGN(headroom + mod[j].mod_end);

[PATCH v2 3/4] x86/cpu-policy: Simplify recalculate_xstate()

2024-04-29 Thread Andrew Cooper

Make use of the new xstate_uncompressed_size() helper rather than maintaining
the running calculation while accumulating feature components.

The rest of the CPUID data can come direct from the raw cpu policy.  All
per-component data form an ABI through the behaviour of the X{SAVE,RSTOR}*
instructions.

Use for_each_set_bit() rather than opencoding a slightly awkward version of
it.  Mask the attributes in ecx down based on the visible features.  This
isn't actually necessary for any components or attributes defined at the time
of writing (up to AMX), but is added out of an abundance of caution.

Signed-off-by: Andrew Cooper 
---
CC: Jan Beulich 
CC: Roger Pau Monné 

v2:
 * Tie ALIGN64 to xsavec rather than xsaves.
---
 xen/arch/x86/cpu-policy.c | 55 +++
 xen/arch/x86/include/asm/xstate.h |  1 +
 2 files changed, 21 insertions(+), 35 deletions(-)

diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c
index 4b6d96276399..fc7933be8577 100644
--- a/xen/arch/x86/cpu-policy.c
+++ b/xen/arch/x86/cpu-policy.c
@@ -193,8 +193,7 @@ static void sanitise_featureset(uint32_t *fs)
 static void recalculate_xstate(struct cpu_policy *p)
 {
 uint64_t xstates = XSTATE_FP_SSE;
-uint32_t xstate_size = XSTATE_AREA_MIN_SIZE;
-unsigned int i, Da1 = p->xstate.Da1;
+unsigned int i, ecx_mask = 0, Da1 = p->xstate.Da1;
 
 /*
  * The Da1 leaf is the only piece of information preserved in the common
@@ -206,61 +205,47 @@ static void recalculate_xstate(struct cpu_policy *p)
 return;
 
 if ( p->basic.avx )
-{
 xstates |= X86_XCR0_YMM;
-xstate_size = max(xstate_size,
-  xstate_offsets[X86_XCR0_YMM_POS] +
-  xstate_sizes[X86_XCR0_YMM_POS]);
-}
 
 if ( p->feat.mpx )
-{
 xstates |= X86_XCR0_BNDREGS | X86_XCR0_BNDCSR;
-xstate_size = max(xstate_size,
-  xstate_offsets[X86_XCR0_BNDCSR_POS] +
-  xstate_sizes[X86_XCR0_BNDCSR_POS]);
-}
 
 if ( p->feat.avx512f )
-{
 xstates |= X86_XCR0_OPMASK | X86_XCR0_ZMM | X86_XCR0_HI_ZMM;
-xstate_size = max(xstate_size,
-  xstate_offsets[X86_XCR0_HI_ZMM_POS] +
-  xstate_sizes[X86_XCR0_HI_ZMM_POS]);
-}
 
 if ( p->feat.pku )
-{
 xstates |= X86_XCR0_PKRU;
-xstate_size = max(xstate_size,
-  xstate_offsets[X86_XCR0_PKRU_POS] +
-  xstate_sizes[X86_XCR0_PKRU_POS]);
-}
 
-p->xstate.max_size  =  xstate_size;
+/* Subleaf 0 */
+p->xstate.max_size =
+xstate_uncompressed_size(xstates & ~XSTATE_XSAVES_ONLY);
 p->xstate.xcr0_low  =  xstates & ~XSTATE_XSAVES_ONLY;
 p->xstate.xcr0_high = (xstates & ~XSTATE_XSAVES_ONLY) >> 32;
 
+/* Subleaf 1 */
 p->xstate.Da1 = Da1;
+if ( p->xstate.xsavec )
+ecx_mask |= XSTATE_ALIGN64;
+
 if ( p->xstate.xsaves )
 {
+ecx_mask |= XSTATE_XSS;
 p->xstate.xss_low   =  xstates & XSTATE_XSAVES_ONLY;
 p->xstate.xss_high  = (xstates & XSTATE_XSAVES_ONLY) >> 32;
 }
-else
-xstates &= ~XSTATE_XSAVES_ONLY;
 
-for ( i = 2; i < min(63UL, ARRAY_SIZE(p->xstate.comp)); ++i )
+/* Subleafs 2+ */
+xstates &= ~XSTATE_FP_SSE;
+BUILD_BUG_ON(ARRAY_SIZE(p->xstate.comp) < 63);
+for_each_set_bit ( i, , 63 )
 {
-uint64_t curr_xstate = 1UL << i;
-
-if ( !(xstates & curr_xstate) )
-continue;
-
-p->xstate.comp[i].size   = xstate_sizes[i];
-p->xstate.comp[i].offset = xstate_offsets[i];
-p->xstate.comp[i].xss= curr_xstate & XSTATE_XSAVES_ONLY;
-p->xstate.comp[i].align  = curr_xstate & xstate_align;
+/*
+ * Pass through size (eax) and offset (ebx) directly.  Visbility of
+ * attributes in ecx limited by visible features in Da1.
+ */
+p->xstate.raw[i].a = raw_cpu_policy.xstate.raw[i].a;
+p->xstate.raw[i].b = raw_cpu_policy.xstate.raw[i].b;
+p->xstate.raw[i].c = raw_cpu_policy.xstate.raw[i].c & ecx_mask;
 }
 }
 
diff --git a/xen/arch/x86/include/asm/xstate.h 
b/xen/arch/x86/include/asm/xstate.h
index f5115199d4f9..bfb66dd766b6 100644
--- a/xen/arch/x86/include/asm/xstate.h
+++ b/xen/arch/x86/include/asm/xstate.h
@@ -40,6 +40,7 @@ extern uint32_t mxcsr_mask;
 #define XSTATE_XSAVES_ONLY 0
 #define XSTATE_COMPACTION_ENABLED  (1ULL << 63)
 
+#define XSTATE_XSS (1U << 0)
 #define XSTATE_ALIGN64 (1U << 1)
 
 extern u64 xfeature_mask;
-- 
2.30.2

[PATCH v2 4/4] x86/cpuid: Fix handling of xsave dynamic leaves

2024-04-29 Thread Andrew Cooper

If max leaf is greater than 0xd but xsave not available to the guest, then the
current XSAVE size should not be filled in.  This is a latent bug for now as
the guest max leaf is 0xd, but will become problematic in the future.

The comment concerning XSS state is wrong.  VT-x doesn't manage host/guest
state automatically, but there is provision for "host only" bits to be set, so
the implications are still accurate.

Introduce {xstate,hw}_compressed_size() helpers to mirror the uncompressed
ones.

This in turn higlights a bug in xstate_init().  Defaulting this_cpu(xss) to ~0
requires a forced write to clear it back out.  This in turn highlights that
it's only a safe default on systems with XSAVES.

Signed-off-by: Andrew Cooper 
---
CC: Jan Beulich 
CC: Roger Pau Monné 

The more I think about it, the more I think that cross-checking with hardware
is a bad move.  It's horribly expensive, and for supervisor states in
particular, liable to interfere with functionality.

v2:
 * Cope with blind reads of CPUID.0xD[1] prior to %xcr0 having been set up.
---
 xen/arch/x86/cpuid.c  | 24 
 xen/arch/x86/include/asm/xstate.h |  1 +
 xen/arch/x86/xstate.c | 64 ++-
 3 files changed, 72 insertions(+), 17 deletions(-)

diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c
index 7a38e032146a..a822e80c7ea7 100644
--- a/xen/arch/x86/cpuid.c
+++ b/xen/arch/x86/cpuid.c
@@ -330,23 +330,15 @@ void guest_cpuid(const struct vcpu *v, uint32_t leaf,
 case XSTATE_CPUID:
 switch ( subleaf )
 {
-case 1:
-if ( !p->xstate.xsavec && !p->xstate.xsaves )
-break;
-
-/*
- * TODO: Figure out what to do for XSS state.  VT-x manages host
- * vs guest MSR_XSS automatically, so as soon as we start
- * supporting any XSS states, the wrong XSS will be in context.
- */
-BUILD_BUG_ON(XSTATE_XSAVES_ONLY != 0);
-fallthrough;
 case 0:
-/*
- * Read CPUID[0xD,0/1].EBX from hardware.  They vary with enabled
- * XSTATE, and appropriate XCR0|XSS are in context.
- */
-res->b = cpuid_count_ebx(leaf, subleaf);
+if ( p->basic.xsave )
+res->b = xstate_uncompressed_size(v->arch.xcr0);
+break;
+
+case 1:
+if ( p->xstate.xsavec )
+res->b = xstate_compressed_size(v->arch.xcr0 |
+v->arch.msrs->xss.raw);
 break;
 }
 break;
diff --git a/xen/arch/x86/include/asm/xstate.h 
b/xen/arch/x86/include/asm/xstate.h
index bfb66dd766b6..da1d89d2f416 100644
--- a/xen/arch/x86/include/asm/xstate.h
+++ b/xen/arch/x86/include/asm/xstate.h
@@ -109,6 +109,7 @@ void xstate_free_save_area(struct vcpu *v);
 int xstate_alloc_save_area(struct vcpu *v);
 void xstate_init(struct cpuinfo_x86 *c);
 unsigned int xstate_uncompressed_size(uint64_t xcr0);
+unsigned int xstate_compressed_size(uint64_t xstates);
 
 static inline uint64_t xgetbv(unsigned int index)
 {
diff --git a/xen/arch/x86/xstate.c b/xen/arch/x86/xstate.c
index a94f4025fce5..b2cf3ae6acee 100644
--- a/xen/arch/x86/xstate.c
+++ b/xen/arch/x86/xstate.c
@@ -614,6 +614,65 @@ unsigned int xstate_uncompressed_size(uint64_t xcr0)
 return size;
 }
 
+static unsigned int hw_compressed_size(uint64_t xstates)
+{
+uint64_t curr_xcr0 = get_xcr0(), curr_xss = get_msr_xss();
+unsigned int size;
+bool ok;
+
+ok = set_xcr0(xstates & ~XSTATE_XSAVES_ONLY);
+ASSERT(ok);
+set_msr_xss(xstates & XSTATE_XSAVES_ONLY);
+
+size = cpuid_count_ebx(XSTATE_CPUID, 1);
+
+ok = set_xcr0(curr_xcr0);
+ASSERT(ok);
+set_msr_xss(curr_xss);
+
+return size;
+}
+
+unsigned int xstate_compressed_size(uint64_t xstates)
+{
+unsigned int i, size = XSTATE_AREA_MIN_SIZE;
+
+if ( xstates == 0 ) /* TODO: clean up paths passing 0 in here. */
+return 0;
+
+if ( xstates <= (X86_XCR0_SSE | X86_XCR0_FP) )
+return size;
+
+/*
+ * For the compressed size, every component matters.  Some are
+ * automatically rounded up to 64 first.
+ */
+xstates &= ~XSTATE_FP_SSE;
+for_each_set_bit ( i, , 63 )
+{
+if ( test_bit(i, _align) )
+size = ROUNDUP(size, 64);
+
+size += xstate_sizes[i];
+}
+
+/* In debug builds, cross-check our calculation with hardware. */
+if ( IS_ENABLED(CONFIG_DEBUG) )
+{
+unsigned int hwsize;
+
+xstates |= XSTATE_FP_SSE;
+hwsize = hw_compressed_size(xstates);
+
+if ( size != hwsize )
+printk_once(XENLOG_ERR "%s(%#"PRIx64") size %#x != hwsize %#x\n",
+__func__, xstates, size, hwsize);
+size = hwsize;
+}
+
+return size;
+}
+
 static bool valid_xcr0(uint64_t xcr0)
 {
 /* FP must be unconditionally set. */
@@ -681,7 +740,8 @@ void

[PATCH v2 0/4] x86/xstate: Fixes to size calculations

2024-04-29 Thread Andrew Cooper

Various fixes and improvements to xsave image size calculations.

Since v1:
 * Rebase over exposing XSAVEC.  This has highlighted several latent bugs.
 * Rework the uncompressed size handling.  LWP / APX_F make for much sadness.

Be aware that Intel and AMD have different ABIs for the xstate layout.

Andrew Cooper (4):
  x86/hvm: Defer the size calculation in hvm_save_cpu_xsave_states()
  x86/xstate: Rework xstate_ctxt_size() as xstate_uncompressed_size()
  x86/cpu-policy: Simplify recalculate_xstate()
  x86/cpuid: Fix handling of xsave dynamic leaves

 xen/arch/x86/cpu-policy.c |  55 ++-
 xen/arch/x86/cpuid.c  |  24 +++
 xen/arch/x86/domctl.c |   2 +-
 xen/arch/x86/hvm/hvm.c|  12 +++-
 xen/arch/x86/include/asm/xstate.h |   4 +-
 xen/arch/x86/xstate.c | 111 --
 6 files changed, 145 insertions(+), 63 deletions(-)

-- 
2.30.2

[PATCH v2 2/4] x86/xstate: Rework xstate_ctxt_size() as xstate_uncompressed_size()

2024-04-29 Thread Andrew Cooper

We're soon going to need a compressed helper of the same form.

The size of the uncompressed image depends on the single element with the
largest offset+size.  Sadly this isn't always the element with the largest
index.

Retain the cross-check with hardware in debug builds, but forgo it normal
builds.  In particular, this means that the migration paths don't need to mess
with XCR0 just to sanity check the buffer size.

No practical change.

Signed-off-by: Andrew Cooper 
---
CC: Jan Beulich 
CC: Roger Pau Monné 

v2:
 * Scan all features.  LWP/APX_F are out-of-order.
---
 xen/arch/x86/domctl.c |  2 +-
 xen/arch/x86/hvm/hvm.c|  2 +-
 xen/arch/x86/include/asm/xstate.h |  2 +-
 xen/arch/x86/xstate.c | 45 +++
 4 files changed, 43 insertions(+), 8 deletions(-)

diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c
index 9a72d57333e9..c2f2016ed45a 100644
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -833,7 +833,7 @@ long arch_do_domctl(
 uint32_t offset = 0;
 
 #define PV_XSAVE_HDR_SIZE (2 * sizeof(uint64_t))
-#define PV_XSAVE_SIZE(xcr0) (PV_XSAVE_HDR_SIZE + xstate_ctxt_size(xcr0))
+#define PV_XSAVE_SIZE(xcr0) (PV_XSAVE_HDR_SIZE + 
xstate_uncompressed_size(xcr0))
 
 ret = -ESRCH;
 if ( (evc->vcpu >= d->max_vcpus) ||
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 9594e0a5c530..9e677d891d6c 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -1191,7 +1191,7 @@ HVM_REGISTER_SAVE_RESTORE(CPU, hvm_save_cpu_ctxt, NULL, 
hvm_load_cpu_ctxt, 1,
 
 #define HVM_CPU_XSAVE_SIZE(xcr0) (offsetof(struct hvm_hw_cpu_xsave, \
save_area) + \
-  xstate_ctxt_size(xcr0))
+  xstate_uncompressed_size(xcr0))
 
 static int cf_check hvm_save_cpu_xsave_states(
 struct vcpu *v, hvm_domain_context_t *h)
diff --git a/xen/arch/x86/include/asm/xstate.h 
b/xen/arch/x86/include/asm/xstate.h
index c08c267884f0..f5115199d4f9 100644
--- a/xen/arch/x86/include/asm/xstate.h
+++ b/xen/arch/x86/include/asm/xstate.h
@@ -107,7 +107,7 @@ void compress_xsave_states(struct vcpu *v, const void *src, 
unsigned int size);
 void xstate_free_save_area(struct vcpu *v);
 int xstate_alloc_save_area(struct vcpu *v);
 void xstate_init(struct cpuinfo_x86 *c);
-unsigned int xstate_ctxt_size(u64 xcr0);
+unsigned int xstate_uncompressed_size(uint64_t xcr0);
 
 static inline uint64_t xgetbv(unsigned int index)
 {
diff --git a/xen/arch/x86/xstate.c b/xen/arch/x86/xstate.c
index 99cedb4f5e24..a94f4025fce5 100644
--- a/xen/arch/x86/xstate.c
+++ b/xen/arch/x86/xstate.c
@@ -183,7 +183,7 @@ void expand_xsave_states(const struct vcpu *v, void *dest, 
unsigned int size)
 /* Check there is state to serialise (i.e. at least an XSAVE_HDR) */
 BUG_ON(!v->arch.xcr0_accum);
 /* Check there is the correct room to decompress into. */
-BUG_ON(size != xstate_ctxt_size(v->arch.xcr0_accum));
+BUG_ON(size != xstate_uncompressed_size(v->arch.xcr0_accum));
 
 if ( !(xstate->xsave_hdr.xcomp_bv & XSTATE_COMPACTION_ENABLED) )
 {
@@ -245,7 +245,7 @@ void compress_xsave_states(struct vcpu *v, const void *src, 
unsigned int size)
 u64 xstate_bv, valid;
 
 BUG_ON(!v->arch.xcr0_accum);
-BUG_ON(size != xstate_ctxt_size(v->arch.xcr0_accum));
+BUG_ON(size != xstate_uncompressed_size(v->arch.xcr0_accum));
 ASSERT(!xsave_area_compressed(src));
 
 xstate_bv = ((const struct xsave_struct *)src)->xsave_hdr.xstate_bv;
@@ -567,16 +567,51 @@ static unsigned int hw_uncompressed_size(uint64_t xcr0)
 return size;
 }
 
-/* Fastpath for common xstate size requests, avoiding reloads of xcr0. */
-unsigned int xstate_ctxt_size(u64 xcr0)
+unsigned int xstate_uncompressed_size(uint64_t xcr0)
 {
+unsigned int size = XSTATE_AREA_MIN_SIZE, i;
+
 if ( xcr0 == xfeature_mask )
 return xsave_cntxt_size;
 
 if ( xcr0 == 0 ) /* TODO: clean up paths passing 0 in here. */
 return 0;
 
-return hw_uncompressed_size(xcr0);
+if ( xcr0 <= (X86_XCR0_SSE | X86_XCR0_FP) )
+return size;
+
+/*
+ * For the non-legacy states, search all activate states and find the
+ * maximum offset+size.  Some states (e.g. LWP, APX_F) are out-of-order
+ * with respect their index.
+ */
+xcr0 &= ~XSTATE_FP_SSE;
+for_each_set_bit ( i, , 63 )
+{
+unsigned int s;
+
+ASSERT(xstate_offsets[i] && xstate_sizes[i]);
+
+s = xstate_offsets[i] && xstate_sizes[i];
+
+size = max(size, s);
+}
+
+/* In debug builds, cross-check our calculation with hardware. */
+if ( IS_ENABLED(CONFIG_DEBUG) )
+{
+unsigned int hwsize;
+
+xcr0 |= XSTATE_FP_SSE;
+hwsize = hw_uncompressed_size(xcr0);
+
+if ( size != hwsize )
+printk_once(XENLOG_ERR "%s(%#"PRIx64") size %#x != hwsize %#x\n",
+__func__,

[PATCH v2 1/4] x86/hvm: Defer the size calculation in hvm_save_cpu_xsave_states()

2024-04-29 Thread Andrew Cooper

HVM_CPU_XSAVE_SIZE() may rewrite %xcr0 twice.  Defer the caluclation it until
after we've decided to write out an XSAVE record.

Note in hvm_load_cpu_xsave_states() that there were versions of Xen which
wrote out a useless XSAVE record.  This sadly limits out ability to tidy up
the existing infrastructure.  Also leave a note in xstate_ctxt_size() that 0
still needs tolerating for now.

No functional change.

Signed-off-by: Andrew Cooper 
---
CC: Jan Beulich 
CC: Roger Pau Monné 

v2:
 * New
---
 xen/arch/x86/hvm/hvm.c | 10 --
 xen/arch/x86/xstate.c  |  2 +-
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 0ce45b177cf4..9594e0a5c530 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -1197,12 +1197,13 @@ static int cf_check hvm_save_cpu_xsave_states(
 struct vcpu *v, hvm_domain_context_t *h)
 {
 struct hvm_hw_cpu_xsave *ctxt;
-unsigned int size = HVM_CPU_XSAVE_SIZE(v->arch.xcr0_accum);
+unsigned int size;
 int err;
 
-if ( !cpu_has_xsave || !xsave_enabled(v) )
+if ( !xsave_enabled(v) )
 return 0;   /* do nothing */
 
+size = HVM_CPU_XSAVE_SIZE(v->arch.xcr0_accum);
 err = _hvm_init_entry(h, CPU_XSAVE_CODE, v->vcpu_id, size);
 if ( err )
 return err;
@@ -1255,6 +1256,11 @@ static int cf_check hvm_load_cpu_xsave_states(
 if ( !cpu_has_xsave )
 return -EOPNOTSUPP;
 
+/*
+ * Note: Xen prior to 4.12 would write out empty XSAVE records for VMs
+ * running on XSAVE-capable hardware but without XSAVE active.
+ */
+
 /* Customized checking for entry since our entry is of variable length */
 desc = (struct hvm_save_descriptor *)>data[h->cur];
 if ( sizeof (*desc) > h->size - h->cur)
diff --git a/xen/arch/x86/xstate.c b/xen/arch/x86/xstate.c
index cf94761d0542..99cedb4f5e24 100644
--- a/xen/arch/x86/xstate.c
+++ b/xen/arch/x86/xstate.c
@@ -573,7 +573,7 @@ unsigned int xstate_ctxt_size(u64 xcr0)
 if ( xcr0 == xfeature_mask )
 return xsave_cntxt_size;
 
-if ( xcr0 == 0 )
+if ( xcr0 == 0 ) /* TODO: clean up paths passing 0 in here. */
 return 0;
 
 return hw_uncompressed_size(xcr0);
-- 
2.30.2

Re: [PATCH 07/15] xen/overlay: Enable device tree overlay assignment to running domains

2024-04-29 Thread Julien Grall


On 29/04/2024 04:36, Henry Wang wrote:

Hi Jan, Julien, Stefano,


Hi Henry,


On 4/24/2024 2:05 PM, Jan Beulich wrote:

On 24.04.2024 05:34, Henry Wang wrote:

--- a/xen/include/public/sysctl.h
+++ b/xen/include/public/sysctl.h
@@ -1197,7 +1197,9 @@ struct xen_sysctl_dt_overlay {
  #define XEN_SYSCTL_DT_OVERLAY_ADD   1
  #define XEN_SYSCTL_DT_OVERLAY_REMOVE    2
  uint8_t overlay_op; /* IN: Add or remove. */
-    uint8_t pad[3]; /* IN: Must be zero. */
+    bool domain_mapping;    /* IN: True of False. */
+    uint8_t pad[2]; /* IN: Must be zero. */
+    uint32_t domain_id;
  };

If you merely re-purposed padding fields, all would be fine without
bumping the interface version. Yet you don't, albeit for an unclear
reason: Why uint32_t rather than domid_t? And on top of that - why a
separate boolean when you could use e.g. DOMID_INVALID to indicate
"no domain mapping"?


I think both of your suggestion make great sense. I will follow the 
suggestion in v2.



That said - anything taking a domain ID is certainly suspicious in a
sysctl. Judging from the description you really mean this to be a
domctl. Anything else will require extra justification.


I also think a domctl is better. I had a look at the history of the 
already merged series, it looks like in the first version of merged part 
1 [1], the hypercall was implemented as the domctl in the beginning but 
later in v2 changed to sysctl. I think this makes sense as the scope of 
that time is just to make Xen aware of the device tree node via Xen 
device tree.


However this is now a problem for the current part where the scope (and 
the end goal) is extended to assign the added device to Linux Dom0/DomU 
via device tree overlays. I am not sure which way is better, should we 
repurposing the sysctl to domctl or maybe add another domctl (I am 
worrying about the duplication because basically we need the same sysctl 
functionality but now with a domid in it)? What do you think?


I am not entirely sure this is a good idea to try to add the device in 
Xen and attach it to the guests at the same time. Imagine the following 
situation:


1) Add and attach devices
2) The domain is rebooted
3) Detach and remove devices

After step 2, you technically have a new domain. You could have also a 
case where this is a completely different guest. So the flow would look 
a little bit weird (you create the DT overlay with domain A but remove 
with domain B).


So, at the moment, it feels like the add/attach (resp detech/remove) 
operations should happen separately.


Can you clarify why you want to add devices to Xen and attach to a guest 
within a single hypercall?


Cheers,

--
Julien Grall

[linux-linus test] 185862: tolerable FAIL - PUSHED

2024-04-29 Thread osstest service owner

flight 185862 linux-linus real [real]
flight 185869 linux-linus real-retest [real]
http://logs.test-lab.xenproject.org/osstest/logs/185862/
http://logs.test-lab.xenproject.org/osstest/logs/185869/

Failures :-/ but no regressions.

Tests which are failing intermittently (not blocking):
 test-amd64-amd64-xl-qemut-debianhvm-i386-xsm 12 debian-hvm-install fail pass 
in 185869-retest
 test-armhf-armhf-libvirt  8 xen-bootfail pass in 185869-retest
 test-armhf-armhf-xl-arndale   8 xen-bootfail pass in 185869-retest

Tests which did not succeed, but are not blocking:
 test-armhf-armhf-libvirt 16 saverestore-support-check fail in 185869 like 
185858
 test-armhf-armhf-xl-arndale 15 migrate-support-check fail in 185869 never pass
 test-armhf-armhf-xl-arndale 16 saverestore-support-check fail in 185869 never 
pass
 test-armhf-armhf-libvirt15 migrate-support-check fail in 185869 never pass
 test-amd64-amd64-xl-qemut-win7-amd64 19 guest-stopfail like 185858
 test-amd64-amd64-xl-qemuu-win7-amd64 19 guest-stopfail like 185858
 test-amd64-amd64-xl-qemuu-ws16-amd64 19 guest-stopfail like 185858
 test-amd64-amd64-qemuu-nested-amd 20 debian-hvm-install/l1/l2 fail like 185858
 test-amd64-amd64-xl-qemut-ws16-amd64 19 guest-stopfail like 185858
 test-amd64-amd64-libvirt 15 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt-xsm 15 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check 
fail never pass
 test-arm64-arm64-xl-credit1  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-credit1  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-libvirt-xsm 15 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-xsm 16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-thunderx 15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-thunderx 16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-credit2  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-credit2  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl  16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  16 saverestore-support-checkfail   never pass
 test-amd64-amd64-libvirt-qcow2 14 migrate-support-checkfail never pass
 test-amd64-amd64-libvirt-raw 14 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-multivcpu 15 migrate-support-checkfail  never pass
 test-armhf-armhf-xl-multivcpu 16 saverestore-support-checkfail  never pass
 test-amd64-amd64-libvirt-vhd 14 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit1  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit1  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-libvirt-raw 14 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-raw 15 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-vhd  14 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-vhd  15 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-qcow214 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-qcow215 saverestore-support-checkfail   never pass
 test-armhf-armhf-libvirt-vhd 14 migrate-support-checkfail   never pass
 test-armhf-armhf-libvirt-vhd 15 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-raw  14 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-raw  15 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-rtds 15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-rtds 16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-credit2  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit2  16 saverestore-support-checkfail   never pass

version targeted for testing:
 linuxe67572cd2204894179d89bd7b984072f19313b03
baseline version:
 linux245c8e81741b51fe1281964e4a6525311be6858f

Last test of basis   185858  2024-04-28 19:42:05 Z0 days
Testing same since   185862  2024-04-29 05:18:20 Z0 days1 attempts


People who touched revisions under test:
  Linus Torvalds 

jobs:
 build-amd64-xsm  pass
 build-arm64-xsm  pass
 build-i386-xsm

Re: [PATCH v6 8/8] xen: allow up to 16383 cpus

2024-04-29 Thread Julien Grall


Hi Juergen,

On 29/04/2024 12:28, Jürgen Groß wrote:

On 29.04.24 13:04, Julien Grall wrote:

Hi Juergen,

Sorry for the late reply.

On 29/04/2024 11:33, Juergen Gross wrote:

On 08.04.24 09:10, Jan Beulich wrote:

On 27.03.2024 16:22, Juergen Gross wrote:

With lock handling now allowing up to 16384 cpus (spinlocks can handle
65535 cpus, rwlocks can handle 16384 cpus), raise the allowed limit 
for

the number of cpus to be configured to 16383.

The new limit is imposed by IOMMU_CMD_BUFFER_MAX_ENTRIES and
QINVAL_MAX_ENTRY_NR required to be larger than 2 * CONFIG_NR_CPUS.

Signed-off-by: Juergen Gross 


Acked-by: Jan Beulich 

I'd prefer this to also gain an Arm ack, though.


Any comment from Arm side?


Can you clarify what the new limits mean in term of (security) 
support? Are we now claiming that Xen will work perfectly fine on 
platforms with up to 16383?


If so, I can't comment for x86, but for Arm, I am doubtful that it 
would work without any (at least performance) issues. AFAIK, this is 
also an untested configuration. In fact I would be surprised if Xen on 
Arm was tested with more than a couple of hundreds cores (AFAICT the 
Ampere CPUs has 192 CPUs).


I think we should add a security support limit for the number of physical
cpus similar to the memory support limit we already have in place.

For x86 I'd suggest 4096 cpus for security support (basically the limit we
have with this patch), but I'm open for other suggestions, too.

I have no idea about any sensible limits for Arm32/Arm64.


I am not entirely. Bertrand, Michal, Stefano, should we use 192 (the 
number of CPUs from Ampere)?


Cheers,

--
Julien Grall

[ovmf test] 185868: all pass - PUSHED

2024-04-29 Thread osstest service owner

flight 185868 ovmf real [real]
http://logs.test-lab.xenproject.org/osstest/logs/185868/

Perfect :-)
All tests in this flight passed as required
version targeted for testing:
 ovmf 094727264f887e275444bd11d9d99c651a85c2e4
baseline version:
 ovmf c0dfe3ec1f364dbdaf6b241e01343e560b21dd03

Last test of basis   185803  2024-04-26 03:11:18 Z3 days
Testing same since   185868  2024-04-29 10:45:51 Z0 days1 attempts


People who touched revisions under test:
  Foster Nong 
  Liming Gao 

jobs:
 build-amd64-xsm  pass
 build-i386-xsm   pass
 build-amd64  pass
 build-i386   pass
 build-amd64-libvirt  pass
 build-i386-libvirt   pass
 build-amd64-pvopspass
 build-i386-pvops pass
 test-amd64-amd64-xl-qemuu-ovmf-amd64 pass



sg-report-flight on osstest.test-lab.xenproject.org
logs: /home/logs/logs
images: /home/logs/images

Logs, config files, etc. are available at
http://logs.test-lab.xenproject.org/osstest/logs

Explanation of these reports, and of osstest in general, is at
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README.email;hb=master
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README;hb=master

Test harness code can be found at
http://xenbits.xen.org/gitweb?p=osstest.git;a=summary


Pushing revision :

To xenbits.xen.org:/home/xen/git/osstest/ovmf.git
   c0dfe3ec1f..094727264f  094727264f887e275444bd11d9d99c651a85c2e4 -> 
xen-tested-master

Re: [XEN PATCH] xen/mem_access: address violations of MISRA C: 2012 Rule 8.4

2024-04-29 Thread Jan Beulich

On 29.04.2024 17:45, Alessandro Zucchelli wrote:
> Change #ifdef CONFIG_MEM_ACCESS by OR-ing defined(CONFIG_ARM),
> allowing asm/mem_access.h to be included in all ARM build configurations.
> This is to address the violation of MISRA C: 2012 Rule 8.4 which states:
> "A compatible declaration shall be visible when an object or function
> with external linkage is defined". Functions p2m_mem_access_check
> and p2m_mem_access_check_and_get_page when CONFIG_MEM_ACCESS is not
> defined in ARM builds don't have visible declarations in the file
> containing their definitions.
> 
> Signed-off-by: Alessandro Zucchelli 
> ---
>  xen/include/xen/mem_access.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/xen/include/xen/mem_access.h b/xen/include/xen/mem_access.h
> index 87d93b31f6..ec0630677d 100644
> --- a/xen/include/xen/mem_access.h
> +++ b/xen/include/xen/mem_access.h
> @@ -33,7 +33,7 @@
>   */
>  struct vm_event_st;
>  
> -#ifdef CONFIG_MEM_ACCESS
> +#if defined(CONFIG_MEM_ACCESS) || defined(CONFIG_ARM)
>  #include 
>  #endif

This doesn't look quite right. If Arm supports mem-access, why would it
not set MEM_ACCESS=y? Whereas if it's only stubs that Arm supplies, then
those would better move here, thus eliminating the need for a per-arch
stub header (see what was e.g. done for numa.h). This way RISC-V and PPC
(and whatever is to come) would then be taken care of as well.

Jan

Re: [XEN PATCH v1 7/7] x86/MCE: optional build of AMD/Intel MCE code

2024-04-29 Thread Jan Beulich

On 27.04.2024 01:16, Stefano Stabellini wrote:
> On Tue, 23 Apr 2024, Sergiy Kibrik wrote:
>> --- a/xen/arch/x86/cpu/mcheck/Makefile
>> +++ b/xen/arch/x86/cpu/mcheck/Makefile
>> @@ -1,12 +1,10 @@
>> -obj-y += amd_nonfatal.o
>> -obj-y += mce_amd.o
>>  obj-y += mcaction.o
>>  obj-y += barrier.o
>> -obj-y += intel-nonfatal.o
>>  obj-y += mctelem.o
>>  obj-y += mce.o
>>  obj-y += mce-apei.o
>> -obj-y += mce_intel.o
>> +obj-$(CONFIG_AMD) += mce_amd.o amd_nonfatal.o
>> +obj-$(CONFIG_INTEL) += mce_intel.o intel-nonfatal.o
>>  obj-y += non-fatal.o
>>  obj-y += util.o
>>  obj-y += vmce.o
> 
> Awesome!

Almost. I'd appreciate if the ordering of files would be retained. It's
not quite alphabetic, but still. Moving mce_amd.o and mcaction.o to their
designated slots may or may not be done right here.

>> --- a/xen/arch/x86/cpu/mcheck/non-fatal.c
>> +++ b/xen/arch/x86/cpu/mcheck/non-fatal.c
>> @@ -24,14 +24,20 @@ static int __init cf_check 
>> init_nonfatal_mce_checker(void)
>>   * Check for non-fatal errors every MCE_RATE s
>>   */
>>  switch (c->x86_vendor) {
>> +#ifdef CONFIG_AMD
>>  case X86_VENDOR_AMD:
>>  case X86_VENDOR_HYGON:
>>  /* Assume we are on K8 or newer AMD or Hygon CPU here */
>>  amd_nonfatal_mcheck_init(c);
>>  break;
>> +#endif
>> +#ifdef CONFIG_INTEL
>>  case X86_VENDOR_INTEL:
>>  intel_nonfatal_mcheck_init(c);
>>  break;
>> +#endif
>> +default:
>> +return -ENODEV;

This, while perhaps desirable, doesn't fit ...

>>  }
>>  printk(KERN_INFO "mcheck_poll: Machine check polling timer started.\n");
>>  return 0;

... earlier behavior, and hence is somewhat unexpected in a change which, by
its description, looks like a "no functional change" one.

> For consistency in all other cases this patch series uses IS_ENABLED
> checks. They could be used here as well.

Hmm, I think for switch() statements like this (see also comments elsewhere
on this series) using #ifdef is overall better.

Jan

[PATCH] xen/x86: add extra pages to unpopulated-alloc if available

2024-04-29 Thread Roger Pau Monne

Commit 262fc47ac174 ('xen/balloon: don't use PV mode extra memory for zone
device allocations') removed the addition of the extra memory ranges to the
unpopulated range allocator, using those only for the balloon driver.

This forces the unpopulated allocator to attach hotplug ranges even when spare
memory (as part of the extra memory ranges) is available.  Furthermore, on PVH
domains it defeats the purpose of commit 38620fc4e893 ('x86/xen: attempt to
inflate the memory balloon on PVH'), as extra memory ranges would only be
used to map foreign memory if the kernel is built without XEN_UNPOPULATED_ALLOC
support.

Fix this by adding a helpers that adds the extra memory ranges to the list of
unpopulated pages, and zeroes the ranges so they are not also consumed by the
balloon driver.

This should have been part of 38620fc4e893, hence the fixes tag.

Note the current logic relies on unpopulated_init() (and hence
arch_xen_unpopulated_init()) always being called ahead of balloon_init(), so
that the extra memory regions are consumed by arch_xen_unpopulated_init().

Fixes: 38620fc4e893 ('x86/xen: attempt to inflate the memory balloon on PVH')
Signed-off-by: Roger Pau Monné 
---
There's a lot of duplication between the unpopulated allocator and the balloon
driver.  I feel like the balloon driver should request any extra memory it
needs to the unpopulated allocator, so that the current helpers provided by the
XEN_BALLOON_MEMORY_HOTPLUG option could be replaced with wrappers around the
unpopulated handlers.

However this is much more work than strictly required here, and won't be
suitable for backport IMO.  Hence the more contained fix presented in this
patch.
---
 arch/x86/xen/enlighten.c | 33 +
 1 file changed, 33 insertions(+)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index a01ca255b0c6..b88722dfc4f8 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -382,3 +382,36 @@ void __init xen_add_extra_mem(unsigned long start_pfn, 
unsigned long n_pfns)
 
memblock_reserve(PFN_PHYS(start_pfn), PFN_PHYS(n_pfns));
 }
+
+#ifdef CONFIG_XEN_UNPOPULATED_ALLOC
+int __init arch_xen_unpopulated_init(struct resource **res)
+{
+   unsigned int i;
+
+   if (!xen_domain())
+   return -ENODEV;
+
+   /* Must be set strictly before calling xen_free_unpopulated_pages(). */
+   *res = _resource;
+
+   /*
+* Initialize with pages from the extra memory regions (see
+* arch/x86/xen/setup.c).
+*/
+   for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
+   unsigned int j;
+
+   for (j = 0; j < xen_extra_mem[i].n_pfns; j++) {
+   struct page *pg =
+   pfn_to_page(xen_extra_mem[i].start_pfn + j);
+
+   xen_free_unpopulated_pages(1, );
+   }
+
+   /* Zero so region is not also added to the balloon driver. */
+   xen_extra_mem[i].n_pfns = 0;
+   }
+
+   return 0;
+}
+#endif
-- 
2.44.0

Re: [XEN PATCH v1 6/7] x86/MCE: guard call to Intel-specific intel_get_extended_msrs()

2024-04-29 Thread Jan Beulich

On 27.04.2024 01:14, Stefano Stabellini wrote:
> On Tue, 23 Apr 2024, Sergiy Kibrik wrote:
>> Add check for CONFIG_INTEL build option to conditional call of this routine,
>> so that if Intel support is disabled the call would be eliminated.
>>
>> No functional change intended.
>>
>> Signed-off-by: Sergiy Kibrik 
> 
> Reviewed-by: Stefano Stabellini 

Acked-by: Jan Beulich 

Aiui this doesn't depend on earlier patches and hence could go in right
away.

Jan

[XEN PATCH] xen/mem_access: address violations of MISRA C: 2012 Rule 8.4

2024-04-29 Thread Alessandro Zucchelli

Change #ifdef CONFIG_MEM_ACCESS by OR-ing defined(CONFIG_ARM),
allowing asm/mem_access.h to be included in all ARM build configurations.
This is to address the violation of MISRA C: 2012 Rule 8.4 which states:
"A compatible declaration shall be visible when an object or function
with external linkage is defined". Functions p2m_mem_access_check
and p2m_mem_access_check_and_get_page when CONFIG_MEM_ACCESS is not
defined in ARM builds don't have visible declarations in the file
containing their definitions.

Signed-off-by: Alessandro Zucchelli 
---
 xen/include/xen/mem_access.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xen/include/xen/mem_access.h b/xen/include/xen/mem_access.h
index 87d93b31f6..ec0630677d 100644
--- a/xen/include/xen/mem_access.h
+++ b/xen/include/xen/mem_access.h
@@ -33,7 +33,7 @@
  */
 struct vm_event_st;
 
-#ifdef CONFIG_MEM_ACCESS
+#if defined(CONFIG_MEM_ACCESS) || defined(CONFIG_ARM)
 #include 
 #endif
 
-- 
2.25.1

Re: [XEN PATCH v1 5/7] x86/MCE: guard {intel/amd}_mcheck_init() calls

2024-04-29 Thread Jan Beulich

On 23.04.2024 10:56, Sergiy Kibrik wrote:
> --- a/xen/arch/x86/cpu/mcheck/mce.c
> +++ b/xen/arch/x86/cpu/mcheck/mce.c
> @@ -761,7 +761,8 @@ void mcheck_init(struct cpuinfo_x86 *c, bool bsp)
>  {
>  case X86_VENDOR_AMD:
>  case X86_VENDOR_HYGON:
> -inited = amd_mcheck_init(c, bsp);
> +inited = IS_ENABLED(CONFIG_AMD) ? amd_mcheck_init(c, bsp) :
> +  mcheck_unset;
>  break;
>  
>  case X86_VENDOR_INTEL:
> @@ -769,7 +770,8 @@ void mcheck_init(struct cpuinfo_x86 *c, bool bsp)
>  {
>  case 6:
>  case 15:
> -inited = intel_mcheck_init(c, bsp);
> +inited = IS_ENABLED(CONFIG_INTEL) ? intel_mcheck_init(c, bsp) :
> +mcheck_unset;
>  break;
>  }
>  break;

Same question as on an earlier patch: Why set a value different from
what "default:" below here does (really: keeps)? And why not arrange to
have that "default:" take care of what's build-disabled?

Jan

Re: [XEN PATCH v1 4/7] x86/MCE: guard lmce_support/cmci_support

2024-04-29 Thread Jan Beulich

On 23.04.2024 10:54, Sergiy Kibrik wrote:
> Guard access to Intel-specific lmce_support & cmci_support variables in
> common MCE/VMCE code. These are set in Intel-specific parts of mcheck code
> and can potentially be skipped if building for non-intel platform by
> disabling CONFIG_INTEL option.
> 
> Signed-off-by: Sergiy Kibrik 

See comments given for patch 2.

Jan

Re: [XEN PATCH v1 3/7] x86/MCE: guard access to Intel/AMD-specific MCA MSRs

2024-04-29 Thread Jan Beulich

On 23.04.2024 10:52, Sergiy Kibrik wrote:
> --- a/xen/arch/x86/cpu/mcheck/vmce.c
> +++ b/xen/arch/x86/cpu/mcheck/vmce.c
> @@ -141,12 +141,14 @@ static int bank_mce_rdmsr(const struct vcpu *v, 
> uint32_t msr, uint64_t *val)
>  case X86_VENDOR_CENTAUR:
>  case X86_VENDOR_SHANGHAI:
>  case X86_VENDOR_INTEL:
> -ret = vmce_intel_rdmsr(v, msr, val);
> +ret = IS_ENABLED(CONFIG_INTEL) ?
> +  vmce_intel_rdmsr(v, msr, val) : -ENODEV;
>  break;
>  
>  case X86_VENDOR_AMD:
>  case X86_VENDOR_HYGON:
> -ret = vmce_amd_rdmsr(v, msr, val);
> +ret = IS_ENABLED(CONFIG_AMD) ?
> +  vmce_amd_rdmsr(v, msr, val) : -ENODEV;
>  break;

Why -ENODEV when ...

>  default:

... below here 0 is put into "ret"? And why not have the default case take
care of unsupported/unrecognized vendors uniformly?

Jan

Re: [XEN PATCH v1 2/7] x86/intel: guard vmce_has_lmce() with INTEL option

2024-04-29 Thread Jan Beulich

On 23.04.2024 10:50, Sergiy Kibrik wrote:
> Since MCG_LMCE_P bit is specific to Intel CPUs

That's the case now. It could change going forward, and an underlying hypervisor
might also have (obscure?) reasons to surface it elsewhere.

> the code to check it can
> possibly be excluded from build if !CONFIG_INTEL. With these guards
> calls to vmce_has_lmce() are eliminated and mce_intel.c can end up
> not being built.
> 
> Also replace boilerplate code that checks for MCG_LMCE_P flag with
> vmce_has_lmce(), which might contribute to readability a bit.

Alternatively, have you considered making that function an inline one in a
suitable header? Besides addressing your build issue (I think), ...

> --- a/xen/arch/x86/cpu/mcheck/vmce.c
> +++ b/xen/arch/x86/cpu/mcheck/vmce.c
> @@ -199,7 +199,7 @@ int vmce_rdmsr(uint32_t msr, uint64_t *val)
>   * bits are always set in guest MSR_IA32_FEATURE_CONTROL by Xen, so 
> it
>   * does not need to check them here.
>   */
> -if ( cur->arch.vmce.mcg_cap & MCG_LMCE_P )
> +if ( IS_ENABLED(CONFIG_INTEL) && vmce_has_lmce(cur) )

... doing so would alternatively also permit integrating the IS_ENABLED()
into the function, rather than repeating the same ...

> @@ -324,7 +324,7 @@ int vmce_wrmsr(uint32_t msr, uint64_t val)
>  break;
>  
>  case MSR_IA32_MCG_EXT_CTL:
> -if ( (cur->arch.vmce.mcg_cap & MCG_LMCE_P) &&
> +if ( IS_ENABLED(CONFIG_INTEL) && vmce_has_lmce(cur) &&
>   !(val & ~MCG_EXT_CTL_LMCE_EN) )
>  cur->arch.vmce.mcg_ext_ctl = val;
>  else
> --- a/xen/arch/x86/msr.c
> +++ b/xen/arch/x86/msr.c
> @@ -86,7 +86,7 @@ int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val)
>  goto gp_fault;
>  
>  *val = IA32_FEATURE_CONTROL_LOCK;
> -if ( vmce_has_lmce(v) )
> +if ( IS_ENABLED(CONFIG_INTEL) && vmce_has_lmce(v) )
>  *val |= IA32_FEATURE_CONTROL_LMCE_ON;
>  if ( cp->basic.vmx )
>  *val |= IA32_FEATURE_CONTROL_ENABLE_VMXON_OUTSIDE_SMX;

... three times.

Jan

Re: [XEN PATCH v1 1/7] x86/vpmu: separate amd/intel vPMU code

2024-04-29 Thread Jan Beulich

On 23.04.2024 10:48, Sergiy Kibrik wrote:
> --- a/xen/arch/x86/include/asm/vpmu.h
> +++ b/xen/arch/x86/include/asm/vpmu.h
> @@ -11,6 +11,7 @@
>  #define __ASM_X86_HVM_VPMU_H_
>  
>  #include 
> +#include 
>  
>  #define vcpu_vpmu(vcpu)   (&(vcpu)->arch.vpmu)
>  #define vpmu_vcpu(vpmu)   container_of((vpmu), struct vcpu, arch.vpmu)
> @@ -42,9 +43,27 @@ struct arch_vpmu_ops {
>  #endif
>  };
>  
> +#ifdef CONFIG_INTEL
>  const struct arch_vpmu_ops *core2_vpmu_init(void);
> +#else
> +static inline const struct arch_vpmu_ops *core2_vpmu_init(void)
> +{
> +return ERR_PTR(-ENODEV);
> +}
> +#endif
> +#ifdef CONFIG_AMD
>  const struct arch_vpmu_ops *amd_vpmu_init(void);
>  const struct arch_vpmu_ops *hygon_vpmu_init(void);
> +#else
> +static inline const struct arch_vpmu_ops *amd_vpmu_init(void)
> +{
> +return ERR_PTR(-ENODEV);
> +}
> +static inline const struct arch_vpmu_ops *hygon_vpmu_init(void)
> +{
> +return ERR_PTR(-ENODEV);
> +}
> +#endif

Any reason you don't follow the approach used in patch 7, putting simple
#ifdef in the switch() in vpmu_init()? That would avoid the need for the
three almost identical stubs.

Jan

Re: [PATCH 2/5] x86/cpu-policy: Add SVM features already used by Xen

2024-04-29 Thread Andrew Cooper

On 29/04/2024 4:16 pm, Andrew Cooper wrote:
> diff --git a/xen/tools/gen-cpuid.py b/xen/tools/gen-cpuid.py
> index bf3f9ec01e6e..f07b1f4cf905 100755
> --- a/xen/tools/gen-cpuid.py
> +++ b/xen/tools/gen-cpuid.py
> @@ -280,6 +280,9 @@ def crunch_numbers(state):
>  # standard 3DNow in the earlier K6 processors.
>  _3DNOW: [_3DNOWEXT],
>  
> +# The SVM bit enumerates the whole SVM leave.
> +SVM: list(range(NPT, NPT + 32)),

This should say leaf.  Fixed locally.

~Andrew

[XEN PATCH 2/3] automation: do not allow failure for triggered analyses

2024-04-29 Thread Federico Serafini

Do not allow_failure for triggered analyses:
introducing regressions of clean guidelines will cause a CI failure.

Signed-off-by: Federico Serafini 
---
 automation/gitlab-ci/analyze.yaml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/automation/gitlab-ci/analyze.yaml 
b/automation/gitlab-ci/analyze.yaml
index 46c9d8e2e5..32bf570149 100644
--- a/automation/gitlab-ci/analyze.yaml
+++ b/automation/gitlab-ci/analyze.yaml
@@ -26,7 +26,6 @@
 
 .eclair-analysis:triggered:
   extends: .eclair-analysis
-  allow_failure: true
   rules:
 - if: $CI_PIPELINE_SOURCE == "schedule"
   when: never
-- 
2.34.1

[XEN PATCH 0/3] automation/eclair: do not allow failure for triggered analyses

2024-04-29 Thread Federico Serafini

Patch 1/3 does some preparation work.

Patch 2/3, as the title says, removes allow_failure = true for triggered
analyses.

Patch 3/3 makes explicit that initally no files are tagged as adopted, this
is needed by the scheduled analysis.

Federico Serafini (3):
  automation/eclair: tag Rule 7.2 as clean and temporarily remove Rules
1.1 and 8.2
  automation: do not allow failure for triggered analyses
  automation/eclair: make explicit there are no adopted files by default

 automation/eclair_analysis/ECLAIR/analysis.ecl | 4 
 automation/eclair_analysis/ECLAIR/tagging.ecl  | 2 +-
 automation/gitlab-ci/analyze.yaml  | 1 -
 3 files changed, 5 insertions(+), 2 deletions(-)

-- 
2.34.1

[XEN PATCH 3/3] automation/eclair: make explicit there are no adopted files by default

2024-04-29 Thread Federico Serafini

Update ECLAIR configuration to consider no adopted files by default.

Signed-off-by: Federico Serafini 
---
 automation/eclair_analysis/ECLAIR/analysis.ecl | 4 
 1 file changed, 4 insertions(+)

diff --git a/automation/eclair_analysis/ECLAIR/analysis.ecl 
b/automation/eclair_analysis/ECLAIR/analysis.ecl
index a604582da3..66ed7f952c 100644
--- a/automation/eclair_analysis/ECLAIR/analysis.ecl
+++ b/automation/eclair_analysis/ECLAIR/analysis.ecl
@@ -21,6 +21,10 @@ map_strings("scheduled-analysis",analysis_kind)
 
 -eval_file=toolchain.ecl
 -eval_file=public_APIs.ecl
+
+-doc="Initially, there are no files tagged as adopted."
+-file_tag+={adopted,"none()"}
+
 if(not(scheduled_analysis),
 eval_file("adopted.ecl")
 )
-- 
2.34.1

[XEN PATCH 1/3] automation/eclair: tag Rule 7.2 as clean and temporarily remove Rules 1.1 and 8.2

2024-04-29 Thread Federico Serafini

Update ECLAIR configuration to consider Rule 7.2 as clean.

Temporarily remove the clean tag from Rules 1.1 and 8.2:
when violations of such rules will be addressed, the clean tag will be
reintroduced.

Signed-off-by: Federico Serafini 
---
 automation/eclair_analysis/ECLAIR/tagging.ecl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/automation/eclair_analysis/ECLAIR/tagging.ecl 
b/automation/eclair_analysis/ECLAIR/tagging.ecl
index d609b470eb..bdf94ed996 100644
--- a/automation/eclair_analysis/ECLAIR/tagging.ecl
+++ b/automation/eclair_analysis/ECLAIR/tagging.ecl
@@ -19,7 +19,7 @@
 
 -doc_begin="Clean guidelines: new violations for these guidelines are not 
accepted."
 
--service_selector={clean_guidelines_common,"MC3R1.D1.1||MC3R1.D2.1||MC3R1.D4.1||MC3R1.D4.11||MC3R1.D4.14||MC3R1.R1.1||MC3R1.R11.7||MC3R1.R11.9||MC3R1.R12.5||MC3R1.R1.3||MC3R1.R1.4||MC3R1.R14.1||MC3R1.R16.7||MC3R1.R17.1||MC3R1.R17.3||MC3R1.R17.4||MC3R1.R17.5||MC3R1.R17.6||MC3R1.R20.13||MC3R1.R20.14||MC3R1.R20.4||MC3R1.R20.9||MC3R1.R21.13||MC3R1.R21.19||MC3R1.R21.21||MC3R1.R2.2||MC3R1.R22.2||MC3R1.R22.4||MC3R1.R22.5||MC3R1.R22.6||MC3R1.R2.6||MC3R1.R3.1||MC3R1.R3.2||MC3R1.R4.1||MC3R1.R4.2||MC3R1.R5.1||MC3R1.R5.2||MC3R1.R5.4||MC3R1.R5.6||MC3R1.R6.1||MC3R1.R6.2||MC3R1.R7.1||MC3R1.R7.4||MC3R1.R8.1||MC3R1.R8.10||MC3R1.R8.12||MC3R1.R8.14||MC3R1.R8.2||MC3R1.R8.5||MC3R1.R8.6||MC3R1.R8.8||MC3R1.R9.2||MC3R1.R9.3||MC3R1.R9.4||MC3R1.R9.5"
+-service_selector={clean_guidelines_common,"MC3R1.D1.1||MC3R1.D2.1||MC3R1.D4.1||MC3R1.D4.11||MC3R1.D4.14||MC3R1.R11.7||MC3R1.R11.9||MC3R1.R12.5||MC3R1.R1.3||MC3R1.R1.4||MC3R1.R14.1||MC3R1.R16.7||MC3R1.R17.1||MC3R1.R17.3||MC3R1.R17.4||MC3R1.R17.5||MC3R1.R17.6||MC3R1.R20.13||MC3R1.R20.14||MC3R1.R20.4||MC3R1.R20.9||MC3R1.R21.13||MC3R1.R21.19||MC3R1.R21.21||MC3R1.R2.2||MC3R1.R22.2||MC3R1.R22.4||MC3R1.R22.5||MC3R1.R22.6||MC3R1.R2.6||MC3R1.R3.1||MC3R1.R3.2||MC3R1.R4.1||MC3R1.R4.2||MC3R1.R5.1||MC3R1.R5.2||MC3R1.R5.4||MC3R1.R5.6||MC3R1.R6.1||MC3R1.R6.2||MC3R1.R7.1||MC3R1.R7.2||MC3R1.R7.4||MC3R1.R8.1||MC3R1.R8.10||MC3R1.R8.12||MC3R1.R8.14||MC3R1.R8.5||MC3R1.R8.6||MC3R1.R8.8||MC3R1.R9.2||MC3R1.R9.3||MC3R1.R9.4||MC3R1.R9.5"
 }
 
 -setq=target,getenv("XEN_TARGET_ARCH")
-- 
2.34.1

Re: [PATCH v3 8/8] gzip: move crc state into gunzip state

2024-04-29 Thread Jan Beulich

On 24.04.2024 18:34, Daniel P. Smith wrote:
> Move the crc and its state into struct gunzip_state. In the process, expand 
> the
> only use of CRC_VALUE as it is hides what is being compared.

Andrew mentioned uint32_t only for the description, but I think you want
(maybe even need) to go further and actually use that type also, e.g.

> --- a/xen/common/gzip/gunzip.c
> +++ b/xen/common/gzip/gunzip.c
> @@ -20,6 +20,9 @@ struct gunzip_state {
>  
>  unsigned long bb;  /* bit buffer */
>  unsigned int  bk;  /* bits in bit buffer */
> +
> +uint32_t crc_32_tab[256];
> +uint32_t crc;
>  };

... not just here, but also ...

> @@ -72,7 +75,7 @@ static __init void flush_window(struct gunzip_state *s)
>   * The window is equal to the output buffer therefore only need to
>   * compute the crc.
>   */
> -unsigned long c = crc;
> +unsigned int c = s->crc;

... here.

> --- a/xen/common/gzip/inflate.c
> +++ b/xen/common/gzip/inflate.c
> @@ -1032,16 +1032,12 @@ static int __init inflate(struct gunzip_state *s)
>   *
>   **/
>  
> -static ulg __initdata crc_32_tab[256];
> -static ulg __initdata crc;  /* initialized in makecrc() so it'll reside in 
> bss */
> -#define CRC_VALUE (crc ^ 0xUL)

Note how this is _not_ same as ~0u, also ...

> @@ -1069,11 +1065,11 @@ static void __init makecrc(void)
>  if (k & 1)
>  c ^= e;
>  }
> -crc_32_tab[i] = c;
> +s->crc_32_tab[i] = c;
>  }
>  
>  /* this is initialized here so this code could reside in ROM */
> -crc = (ulg)0xUL; /* shift register contents */
> +s->crc = (ulg)~0u; /* shift register contents */

... applicable here then: sizeof(int) >= 4, not ==.

As Andrew indicates, the cast previously wasn't needed here. Keeping it is
at best misleading, when s->crc isn't of that type anymore.

Finally please stick to upper-case number suffixes; see all the related Misra
changes, which (iirc) put in place only upper-case ones.

Jan

[PATCH 1/5] x86/cpu-policy: Infrastructure for the AMD SVM and SEV leaves

2024-04-29 Thread Andrew Cooper

Allocate two new feature leaves, and extend cpu_policy with the non-feature
fields too.

The CPUID dependency between the SVM bit on the whole SVM leaf is
intentionally deferred, to avoid transiently breaking nested virt.

Signed-off-by: Andrew Cooper 
---
CC: Jan Beulich 
CC: Roger Pau Monné 
CC: Stefano Stabellini 
CC: Xenia Ragiadakou 
CC: Sergiy Kibrik 
CC: George Dunlap 
CC: Andrei Semenov 
CC: Vaishali Thakkar 
---
 tools/libs/light/libxl_cpuid.c  |  2 ++
 tools/misc/xen-cpuid.c  | 10 +
 xen/arch/x86/cpu/common.c   |  4 
 xen/include/public/arch-x86/cpufeatureset.h |  4 
 xen/include/xen/lib/x86/cpu-policy.h| 24 +++--
 xen/lib/x86/cpuid.c |  4 
 6 files changed, 46 insertions(+), 2 deletions(-)

diff --git a/tools/libs/light/libxl_cpuid.c b/tools/libs/light/libxl_cpuid.c
index ce4f3c7095ba..c7a8b76f541d 100644
--- a/tools/libs/light/libxl_cpuid.c
+++ b/tools/libs/light/libxl_cpuid.c
@@ -342,6 +342,8 @@ int libxl_cpuid_parse_config(libxl_cpuid_policy_list 
*policy, const char* str)
 CPUID_ENTRY(0x0007,  1, CPUID_REG_EDX),
 MSR_ENTRY(0x10a, CPUID_REG_EAX),
 MSR_ENTRY(0x10a, CPUID_REG_EDX),
+CPUID_ENTRY(0x800a, NA, CPUID_REG_EDX),
+CPUID_ENTRY(0x801f, NA, CPUID_REG_EAX),
 #undef MSR_ENTRY
 #undef CPUID_ENTRY
 };
diff --git a/tools/misc/xen-cpuid.c b/tools/misc/xen-cpuid.c
index 8893547bebce..ab09410a05d6 100644
--- a/tools/misc/xen-cpuid.c
+++ b/tools/misc/xen-cpuid.c
@@ -264,6 +264,14 @@ static const char *const str_m10Ah[32] =
 {
 };
 
+static const char *const str_eAd[32] =
+{
+};
+
+static const char *const str_e1Fa[32] =
+{
+};
+
 static const struct {
 const char *name;
 const char *abbr;
@@ -288,6 +296,8 @@ static const struct {
 { "CPUID 0x0007:1.edx", "7d1", str_7d1 },
 { "MSR_ARCH_CAPS.lo", "m10Al", str_m10Al },
 { "MSR_ARCH_CAPS.hi", "m10Ah", str_m10Ah },
+{ "CPUID 0x800a.edx",   "eAd", str_eAd },
+{ "CPUID 0x801f.eax",  "e1Fa", str_e1Fa },
 };
 
 #define COL_ALIGN "24"
diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c
index 28d7f34c4dbe..25b11e6472b8 100644
--- a/xen/arch/x86/cpu/common.c
+++ b/xen/arch/x86/cpu/common.c
@@ -477,6 +477,10 @@ static void generic_identify(struct cpuinfo_x86 *c)
c->x86_capability[FEATURESET_e7d] = cpuid_edx(0x8007);
if (c->extended_cpuid_level >= 0x8008)
c->x86_capability[FEATURESET_e8b] = cpuid_ebx(0x8008);
+   if (c->extended_cpuid_level >= 0x800a)
+   c->x86_capability[FEATURESET_eAd] = cpuid_edx(0x800a);
+   if (c->extended_cpuid_level >= 0x801f)
+   c->x86_capability[FEATURESET_e1Fa] = cpuid_eax(0x801f);
if (c->extended_cpuid_level >= 0x8021)
c->x86_capability[FEATURESET_e21a] = cpuid_eax(0x8021);
 
diff --git a/xen/include/public/arch-x86/cpufeatureset.h 
b/xen/include/public/arch-x86/cpufeatureset.h
index 53f13dec31f7..0f869214811e 100644
--- a/xen/include/public/arch-x86/cpufeatureset.h
+++ b/xen/include/public/arch-x86/cpufeatureset.h
@@ -357,6 +357,10 @@ XEN_CPUFEATURE(RFDS_CLEAR, 16*32+28) /*!A Register 
File(s) cleared by VE
 
 /* Intel-defined CPU features, MSR_ARCH_CAPS 0x10a.edx, word 17 */
 
+/* AMD-defined CPU features, CPUID level 0x800a.edx, word 18 */
+
+/* AMD-defined CPU features, CPUID level 0x801f.eax, word 19 */
+
 #endif /* XEN_CPUFEATURE */
 
 /* Clean up from a default include.  Close the enum (for C). */
diff --git a/xen/include/xen/lib/x86/cpu-policy.h 
b/xen/include/xen/lib/x86/cpu-policy.h
index d5e447e9dc06..936e00e4da73 100644
--- a/xen/include/xen/lib/x86/cpu-policy.h
+++ b/xen/include/xen/lib/x86/cpu-policy.h
@@ -22,6 +22,8 @@
 #define FEATURESET_7d1   15 /* 0x0007:1.edx*/
 #define FEATURESET_m10Al 16 /* 0x010a.eax  */
 #define FEATURESET_m10Ah 17 /* 0x010a.edx  */
+#define FEATURESET_eAd   18 /* 0x800a.edx  */
+#define FEATURESET_e1Fa  19 /* 0x801f.eax  */
 
 struct cpuid_leaf
 {
@@ -296,7 +298,16 @@ struct cpu_policy
 uint32_t /* d */:32;
 
 uint64_t :64, :64; /* Leaf 0x8009. */
-uint64_t :64, :64; /* Leaf 0x800a - SVM rev and features. */
+
+/* Leaf 0x800a - SVM rev and features. */
+uint8_t svm_rev, :8, :8, :8;
+uint32_t /* b */ :32;
+uint32_t nr_asids;
+union {
+uint32_t eAd;
+struct { DECL_BITFIELD(eAd); };
+};
+
 uint64_t :64, :64; /* Leaf 0x800b. */
 uint64_t :64, :64; /* Leaf 0x800c. */
 uint64_t :64, :64; /* Leaf 0x800d. */
@@ -317,7 +328,16 @@ struct cpu_policy
 uint64_t :64, :64; /* Leaf 0x801c. */
 uint64_t :64, :64; /* Leaf

[PATCH 4/5] x86/svm: Switch SVM features over normal cpu_has_*

2024-04-29 Thread Andrew Cooper

Delete the boot time rendering of advanced features.  It's entirely ad-hoc and
not even everything printed here is used by Xen.  It is available in
`xen-cpuid` now.

With (only) svm_load_segs_{,prefetch}() declared now in svm.h, only svm.c and
domain.c which need the header.  Clean up all others.

No functional change.

Signed-off-by: Andrew Cooper 
---
CC: Jan Beulich 
CC: Roger Pau Monné 
CC: Stefano Stabellini 
CC: Xenia Ragiadakou 
CC: Sergiy Kibrik 
CC: George Dunlap 
CC: Andrei Semenov 
CC: Vaishali Thakkar 
---
 xen/arch/x86/hvm/svm/asid.c|  5 ++-
 xen/arch/x86/hvm/svm/emulate.c |  3 +-
 xen/arch/x86/hvm/svm/intr.c|  1 -
 xen/arch/x86/hvm/svm/nestedsvm.c   | 14 
 xen/arch/x86/hvm/svm/svm.c | 50 +++---
 xen/arch/x86/hvm/svm/vmcb.c|  1 -
 xen/arch/x86/include/asm/cpufeature.h  | 10 ++
 xen/arch/x86/include/asm/hvm/svm/svm.h | 36 ---
 8 files changed, 31 insertions(+), 89 deletions(-)

diff --git a/xen/arch/x86/hvm/svm/asid.c b/xen/arch/x86/hvm/svm/asid.c
index 7977a8e86b53..6117a362d310 100644
--- a/xen/arch/x86/hvm/svm/asid.c
+++ b/xen/arch/x86/hvm/svm/asid.c
@@ -6,7 +6,6 @@
 
 #include 
 #include 
-#include 
 
 #include "svm.h"
 
@@ -39,7 +38,7 @@ void svm_asid_handle_vmrun(void)
 {
 vmcb_set_asid(vmcb, true);
 vmcb->tlb_control =
-cpu_has_svm_flushbyasid ? TLB_CTRL_FLUSH_ASID : TLB_CTRL_FLUSH_ALL;
+cpu_has_flush_by_asid ? TLB_CTRL_FLUSH_ASID : TLB_CTRL_FLUSH_ALL;
 return;
 }
 
@@ -48,7 +47,7 @@ void svm_asid_handle_vmrun(void)
 
 vmcb->tlb_control =
 !need_flush ? TLB_CTRL_NO_FLUSH :
-cpu_has_svm_flushbyasid ? TLB_CTRL_FLUSH_ASID : TLB_CTRL_FLUSH_ALL;
+cpu_has_flush_by_asid ? TLB_CTRL_FLUSH_ASID : TLB_CTRL_FLUSH_ALL;
 }
 
 /*
diff --git a/xen/arch/x86/hvm/svm/emulate.c b/xen/arch/x86/hvm/svm/emulate.c
index 93ac1d3435f9..da6e21b2e270 100644
--- a/xen/arch/x86/hvm/svm/emulate.c
+++ b/xen/arch/x86/hvm/svm/emulate.c
@@ -11,7 +11,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 
 #include "svm.h"
@@ -20,7 +19,7 @@ static unsigned long svm_nextrip_insn_length(struct vcpu *v)
 {
 struct vmcb_struct *vmcb = v->arch.hvm.svm.vmcb;
 
-if ( !cpu_has_svm_nrips )
+if ( !cpu_has_nrips )
 return 0;
 
 #ifndef NDEBUG
diff --git a/xen/arch/x86/hvm/svm/intr.c b/xen/arch/x86/hvm/svm/intr.c
index 4805c5567213..facd2894a2c6 100644
--- a/xen/arch/x86/hvm/svm/intr.c
+++ b/xen/arch/x86/hvm/svm/intr.c
@@ -17,7 +17,6 @@
 #include 
 #include 
 #include 
-#include 
 #include  /* for nestedhvm_vcpu_in_guestmode */
 #include 
 #include 
diff --git a/xen/arch/x86/hvm/svm/nestedsvm.c b/xen/arch/x86/hvm/svm/nestedsvm.c
index 35a2cbfd7d13..255af112661f 100644
--- a/xen/arch/x86/hvm/svm/nestedsvm.c
+++ b/xen/arch/x86/hvm/svm/nestedsvm.c
@@ -6,7 +6,6 @@
  */
 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -1620,7 +1619,7 @@ void svm_nested_features_on_efer_update(struct vcpu *v)
 {
 if ( !vmcb->virt_ext.fields.vloadsave_enable &&
  paging_mode_hap(v->domain) &&
- cpu_has_svm_vloadsave )
+ cpu_has_v_loadsave )
 {
 vmcb->virt_ext.fields.vloadsave_enable = 1;
 general2_intercepts  = vmcb_get_general2_intercepts(vmcb);
@@ -1629,8 +1628,7 @@ void svm_nested_features_on_efer_update(struct vcpu *v)
 vmcb_set_general2_intercepts(vmcb, general2_intercepts);
 }
 
-if ( !vmcb->_vintr.fields.vgif_enable &&
- cpu_has_svm_vgif )
+if ( !vmcb->_vintr.fields.vgif_enable && cpu_has_v_gif )
 {
 vintr = vmcb_get_vintr(vmcb);
 vintr.fields.vgif = svm->ns_gif;
@@ -1675,8 +1673,8 @@ void __init start_nested_svm(struct hvm_function_table 
*hvm_function_table)
  */
 hvm_function_table->caps.nested_virt =
 hvm_function_table->caps.hap && 
-cpu_has_svm_lbrv &&
-cpu_has_svm_nrips &&
-cpu_has_svm_flushbyasid &&
-cpu_has_svm_decode;
+cpu_has_v_lbr &&
+cpu_has_nrips &&
+cpu_has_flush_by_asid &&
+cpu_has_decode_assist;
 }
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index 4719fffae589..16eb875aab94 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -1287,7 +1287,7 @@ static void cf_check svm_inject_event(const struct 
x86_event *event)
  * that hardware doesn't perform DPL checking on injection.
  */
 if ( event->type == X86_EVENTTYPE_PRI_SW_EXCEPTION ||
- (!cpu_has_svm_nrips && (event->type >= X86_EVENTTYPE_SW_INTERRUPT)) )
+ (!cpu_has_nrips && (event->type >= X86_EVENTTYPE_SW_INTERRUPT)) )
 svm_emul_swint_injection(&_event);
 
 switch ( _event.vector | -(_event.type == X86_EVENTTYPE_SW_INTERRUPT) )
@@ -1341,7 +1341,7 @@ static void cf_check svm_inject_event(const struct 
x86_event *event)
 switch (

[PATCH 0/5] x86: AMD CPUID handling improvements

2024-04-29 Thread Andrew Cooper

This is (half) the series I've promised various people, to untangle some AMD
CPUID handling.  It moves the SVM feature leaf into the standard
x86_capabilities[] infrastructure.

On a random Milan system, with this in place, xen-cpuid reports:

Dynamic sets:
Raw 
178bfbff:7eda320b:2fd3fbff:75c237ff:000f:219c95a9:0040069c:6799:91bef75f:::204d:::::::119b9cff:0101fd3f
  [18] CPUID 0x800a.edx npt v-lbr svm-lock nrips v-tsc-rate 
vmcb-cleanbits flush-by-asid decode-assist pause-filter <11> pause-thresh 
v-loadsave v-gif <17> npt-sss v-spec-ctrl <23> <24> <28>
  [19] CPUID 0x801f.eax sme sev <2> sev-es sev-snp <5> <8> <10> <11> 
<12> <13> <14> <15> <16> <24>

Host
178bf3ff:76da320b:2fd3fbff:644037ff:000f:219c95a9:0040068c:0780:319ed205:::1845:::::::001994ff:
  [18] CPUID 0x800a.edx npt v-lbr svm-lock nrips v-tsc-rate 
vmcb-cleanbits flush-by-asid decode-assist pause-filter pause-thresh v-loadsave 
v-gif npt-sss v-spec-ctrl
  [19] CPUID 0x801f.eax

HVM Max 
178bfbff:f6fa3203:2e500800:440001f7:000f:219c05a9:0040060c:0100:331ed005:::1845:::::::04ab:
  [18] CPUID 0x800a.edx npt v-lbr nrips vmcb-cleanbits decode-assist 
pause-filter
  [19] CPUID 0x801f.eax


Unforunately, I haven't managed to do the second half to make the host_policy
usable earlier on boot.  Untanling __setup_xen() is proving stuborn due to
(ab)uses of the MB1 module list.

Andrew Cooper (5):
  x86/cpu-policy: Infrastructure for the AMD SVM and SEV leaves
  x86/cpu-policy: Add SVM features already used by Xen
  x86/spec-ctrl: Remove open-coded check of SVM_FEATURE_SPEC_CTRL
  x86/svm: Switch SVM features over normal cpu_has_*
  x86/cpu-policy: Introduce some SEV features

 tools/libs/light/libxl_cpuid.c  |  2 +
 tools/misc/xen-cpuid.c  | 24 ++
 xen/arch/x86/cpu-policy.c   | 17 +++
 xen/arch/x86/cpu/common.c   |  4 ++
 xen/arch/x86/hvm/svm/asid.c |  5 +--
 xen/arch/x86/hvm/svm/emulate.c  |  3 +-
 xen/arch/x86/hvm/svm/intr.c |  1 -
 xen/arch/x86/hvm/svm/nestedsvm.c| 14 +++---
 xen/arch/x86/hvm/svm/svm.c  | 50 +
 xen/arch/x86/hvm/svm/vmcb.c |  1 -
 xen/arch/x86/include/asm/cpufeature.h   | 16 +++
 xen/arch/x86/include/asm/hvm/svm/svm.h  | 36 ---
 xen/arch/x86/spec_ctrl.c|  7 +--
 xen/include/public/arch-x86/cpufeatureset.h | 22 +
 xen/include/xen/lib/x86/cpu-policy.h| 24 +-
 xen/lib/x86/cpuid.c |  4 ++
 xen/tools/gen-cpuid.py  |  7 +++
 17 files changed, 128 insertions(+), 109 deletions(-)

--
2.30.2

[PATCH 5/5] x86/cpu-policy: Introduce some SEV features

2024-04-29 Thread Andrew Cooper

For display purposes only right now.

Signed-off-by: Andrew Cooper 
---
CC: Jan Beulich 
CC: Roger Pau Monné 
CC: Stefano Stabellini 
CC: Xenia Ragiadakou 
CC: Sergiy Kibrik 
CC: George Dunlap 
CC: Andrei Semenov 
CC: Vaishali Thakkar 

This is only half the work to get SEV working nicely.  The other
half (rearranging __start_xen() so we can move the host policy collection
earlier) is still a work-in-progress.
---
 tools/misc/xen-cpuid.c  | 3 +++
 xen/arch/x86/include/asm/cpufeature.h   | 3 +++
 xen/include/public/arch-x86/cpufeatureset.h | 4 
 xen/tools/gen-cpuid.py  | 6 +-
 4 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/tools/misc/xen-cpuid.c b/tools/misc/xen-cpuid.c
index 0d01b0e797f1..1463e0429ba1 100644
--- a/tools/misc/xen-cpuid.c
+++ b/tools/misc/xen-cpuid.c
@@ -281,6 +281,9 @@ static const char *const str_eAd[32] =
 
 static const char *const str_e1Fa[32] =
 {
+[ 0] = "sme", [ 1] = "sev",
+/* 2 */   [ 3] = "sev-es",
+[ 4] = "sev-snp",
 };
 
 static const struct {
diff --git a/xen/arch/x86/include/asm/cpufeature.h 
b/xen/arch/x86/include/asm/cpufeature.h
index b6fb8c24423c..732f0d2bf758 100644
--- a/xen/arch/x86/include/asm/cpufeature.h
+++ b/xen/arch/x86/include/asm/cpufeature.h
@@ -230,6 +230,9 @@ static inline bool boot_cpu_has(unsigned int feat)
 #define cpu_has_v_gif   boot_cpu_has(X86_FEATURE_V_GIF)
 #define cpu_has_v_spec_ctrl boot_cpu_has(X86_FEATURE_V_SPEC_CTRL)
 
+/* CPUID level 0x801f.eax */
+#define cpu_has_sev boot_cpu_has(X86_FEATURE_SEV)
+
 /* Synthesized. */
 #define cpu_has_arch_perfmonboot_cpu_has(X86_FEATURE_ARCH_PERFMON)
 #define cpu_has_cpuid_faulting  boot_cpu_has(X86_FEATURE_CPUID_FAULTING)
diff --git a/xen/include/public/arch-x86/cpufeatureset.h 
b/xen/include/public/arch-x86/cpufeatureset.h
index 80d252a38c2d..7ee0f2329151 100644
--- a/xen/include/public/arch-x86/cpufeatureset.h
+++ b/xen/include/public/arch-x86/cpufeatureset.h
@@ -374,6 +374,10 @@ XEN_CPUFEATURE(NPT_SSS,18*32+19) /*   NPT 
Supervisor Shadow Stacks *
 XEN_CPUFEATURE(V_SPEC_CTRL,18*32+20) /*   Virtualised MSR_SPEC_CTRL */
 
 /* AMD-defined CPU features, CPUID level 0x801f.eax, word 19 */
+XEN_CPUFEATURE(SME,19*32+ 0) /*   Secure Memory Encryption */
+XEN_CPUFEATURE(SEV,19*32+ 1) /*   Secure Encryped VM */
+XEN_CPUFEATURE(SEV_ES, 19*32+ 3) /*   SEV Encrypted State */
+XEN_CPUFEATURE(SEV_SNP,19*32+ 4) /*   SEV Secure Nested Paging */
 
 #endif /* XEN_CPUFEATURE */
 
diff --git a/xen/tools/gen-cpuid.py b/xen/tools/gen-cpuid.py
index f07b1f4cf905..bff4d9389ff6 100755
--- a/xen/tools/gen-cpuid.py
+++ b/xen/tools/gen-cpuid.py
@@ -281,7 +281,7 @@ def crunch_numbers(state):
 _3DNOW: [_3DNOWEXT],
 
 # The SVM bit enumerates the whole SVM leave.
-SVM: list(range(NPT, NPT + 32)),
+SVM: list(range(NPT, NPT + 32)) + [SEV],
 
 # This is just the dependency between AVX512 and AVX2 of XSTATE
 # feature flags.  If want to use AVX512, AVX2 must be supported and
@@ -341,6 +341,10 @@ def crunch_numbers(state):
 
 # The behaviour described by RRSBA depend on eIBRS being active.
 EIBRS: [RRSBA],
+
+SEV: [SEV_ES],
+
+SEV_ES: [SEV_SNP],
 }
 
 deep_features = tuple(sorted(deps.keys()))
-- 
2.30.2

[PATCH 3/5] x86/spec-ctrl: Remove open-coded check of SVM_FEATURE_SPEC_CTRL

2024-04-29 Thread Andrew Cooper

Now that the SVM feature leaf has been included in normal feature handling, it
is available early enough for init_speculation_mitigations() to use.

No functional change.

Signed-off-by: Andrew Cooper 
---
CC: Jan Beulich 
CC: Roger Pau Monné 
CC: Stefano Stabellini 
CC: Xenia Ragiadakou 
CC: Sergiy Kibrik 
CC: George Dunlap 
CC: Andrei Semenov 
CC: Vaishali Thakkar 
---
 xen/arch/x86/include/asm/cpufeature.h | 3 +++
 xen/arch/x86/spec_ctrl.c  | 7 +--
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/xen/arch/x86/include/asm/cpufeature.h 
b/xen/arch/x86/include/asm/cpufeature.h
index 9bc553681f4a..77cfd900cb56 100644
--- a/xen/arch/x86/include/asm/cpufeature.h
+++ b/xen/arch/x86/include/asm/cpufeature.h
@@ -217,6 +217,9 @@ static inline bool boot_cpu_has(unsigned int feat)
 #define cpu_has_rfds_no boot_cpu_has(X86_FEATURE_RFDS_NO)
 #define cpu_has_rfds_clear  boot_cpu_has(X86_FEATURE_RFDS_CLEAR)
 
+/* CPUID level 0x800a.edx */
+#define cpu_has_v_spec_ctrl boot_cpu_has(X86_FEATURE_V_SPEC_CTRL)
+
 /* Synthesized. */
 #define cpu_has_arch_perfmonboot_cpu_has(X86_FEATURE_ARCH_PERFMON)
 #define cpu_has_cpuid_faulting  boot_cpu_has(X86_FEATURE_CPUID_FAULTING)
diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
index 40f6ae017010..0bda9d01def5 100644
--- a/xen/arch/x86/spec_ctrl.c
+++ b/xen/arch/x86/spec_ctrl.c
@@ -11,7 +11,6 @@
 #include 
 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -1896,12 +1895,8 @@ void __init init_speculation_mitigations(void)
  *
  * No need for SCF_ist_sc_msr because Xen's value is restored
  * atomically WRT NMIs in the VMExit path.
- *
- * TODO: Adjust cpu_has_svm_spec_ctrl to be usable earlier on boot.
  */
-if ( opt_msr_sc_hvm &&
- (boot_cpu_data.extended_cpuid_level >= 0x800aU) &&
- (cpuid_edx(0x800aU) & (1u << SVM_FEATURE_SPEC_CTRL)) )
+if ( opt_msr_sc_hvm && cpu_has_v_spec_ctrl )
 setup_force_cpu_cap(X86_FEATURE_SC_MSR_HVM);
 }
 
-- 
2.30.2

[PATCH 2/5] x86/cpu-policy: Add SVM features already used by Xen

2024-04-29 Thread Andrew Cooper

These will replace svm_feature_flags and the SVM_FEATURE_* constants over the
next few changes.  Take the opportunity to rationalise some names.

Drop the opencoded "inherit from host" logic in calculate_hvm_max_policy() and
use 'h'/'!' annotations.  The logic needs to operate on fs, not the policy
object, given its position within the function.

Drop some trailing whitespace introduced when this block of code was last
moved.

Signed-off-by: Andrew Cooper 
---
CC: Jan Beulich 
CC: Roger Pau Monné 
CC: Stefano Stabellini 
CC: Xenia Ragiadakou 
CC: Sergiy Kibrik 
CC: George Dunlap 
CC: Andrei Semenov 
CC: Vaishali Thakkar 
---
 tools/misc/xen-cpuid.c  | 11 +++
 xen/arch/x86/cpu-policy.c   | 17 +
 xen/include/public/arch-x86/cpufeatureset.h | 14 ++
 xen/tools/gen-cpuid.py  |  3 +++
 4 files changed, 33 insertions(+), 12 deletions(-)

diff --git a/tools/misc/xen-cpuid.c b/tools/misc/xen-cpuid.c
index ab09410a05d6..0d01b0e797f1 100644
--- a/tools/misc/xen-cpuid.c
+++ b/tools/misc/xen-cpuid.c
@@ -266,6 +266,17 @@ static const char *const str_m10Ah[32] =
 
 static const char *const str_eAd[32] =
 {
+[ 0] = "npt", [ 1] = "v-lbr",
+[ 2] = "svm-lock",[ 3] = "nrips",
+[ 4] = "v-tsc-rate",  [ 5] = "vmcb-cleanbits",
+[ 6] = "flush-by-asid",   [ 7] = "decode-assist",
+
+[10] = "pause-filter",
+[12] = "pause-thresh",
+/* 14 */  [15] = "v-loadsave",
+[16] = "v-gif",
+/* 18 */  [19] = "npt-sss",
+[20] = "v-spec-ctrl",
 };
 
 static const char *const str_e1Fa[32] =
diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c
index 4b6d96276399..da4401047e89 100644
--- a/xen/arch/x86/cpu-policy.c
+++ b/xen/arch/x86/cpu-policy.c
@@ -9,7 +9,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -748,22 +747,16 @@ static void __init calculate_hvm_max_policy(void)
 if ( !cpu_has_vmx )
 __clear_bit(X86_FEATURE_PKS, fs);
 
-/* 
+/*
  * Make adjustments to possible (nested) virtualization features exposed
  * to the guest
  */
-if ( p->extd.svm )
+if ( test_bit(X86_FEATURE_SVM, fs) )
 {
-/* Clamp to implemented features which require hardware support. */
-p->extd.raw[0xa].d &= ((1u << SVM_FEATURE_NPT) |
-   (1u << SVM_FEATURE_LBRV) |
-   (1u << SVM_FEATURE_NRIPS) |
-   (1u << SVM_FEATURE_PAUSEFILTER) |
-   (1u << SVM_FEATURE_DECODEASSISTS));
-/* Enable features which are always emulated. */
-p->extd.raw[0xa].d |= (1u << SVM_FEATURE_VMCBCLEAN);
+/* Xen always emulates cleanbits. */
+__set_bit(X86_FEATURE_VMCB_CLEANBITS, fs);
 }
-
+
 guest_common_max_feature_adjustments(fs);
 guest_common_feature_adjustments(fs);
 
diff --git a/xen/include/public/arch-x86/cpufeatureset.h 
b/xen/include/public/arch-x86/cpufeatureset.h
index 0f869214811e..80d252a38c2d 100644
--- a/xen/include/public/arch-x86/cpufeatureset.h
+++ b/xen/include/public/arch-x86/cpufeatureset.h
@@ -358,6 +358,20 @@ XEN_CPUFEATURE(RFDS_CLEAR, 16*32+28) /*!A Register 
File(s) cleared by VE
 /* Intel-defined CPU features, MSR_ARCH_CAPS 0x10a.edx, word 17 */
 
 /* AMD-defined CPU features, CPUID level 0x800a.edx, word 18 */
+XEN_CPUFEATURE(NPT,18*32+ 0) /*h  Nested PageTables */
+XEN_CPUFEATURE(V_LBR,  18*32+ 1) /*h  Virtualised LBR */
+XEN_CPUFEATURE(SVM_LOCK,   18*32+ 2) /*   SVM locking MSR */
+XEN_CPUFEATURE(NRIPS,  18*32+ 3) /*h  Next-RIP saved on VMExit */
+XEN_CPUFEATURE(V_TSC_RATE, 18*32+ 4) /*   Virtualised TSC Ratio */
+XEN_CPUFEATURE(VMCB_CLEANBITS, 18*32+ 5) /*!  VMCB Clean-bits */
+XEN_CPUFEATURE(FLUSH_BY_ASID,  18*32+ 6) /*   TLB Flush by ASID */
+XEN_CPUFEATURE(DECODE_ASSIST,  18*32+ 7) /*h  Decode assists */
+XEN_CPUFEATURE(PAUSE_FILTER,   18*32+10) /*h  Pause filter */
+XEN_CPUFEATURE(PAUSE_THRESH,   18*32+12) /*   Pause filter threshold */
+XEN_CPUFEATURE(V_LOADSAVE, 18*32+15) /*   Virtualised VMLOAD/SAVE */
+XEN_CPUFEATURE(V_GIF,  18*32+16) /*   Virtualised GIF */
+XEN_CPUFEATURE(NPT_SSS,18*32+19) /*   NPT Supervisor Shadow Stacks 
*/
+XEN_CPUFEATURE(V_SPEC_CTRL,18*32+20) /*   Virtualised MSR_SPEC_CTRL */
 
 /* AMD-defined CPU features, CPUID level 0x801f.eax, word 19 */
 
diff --git a/xen/tools/gen-cpuid.py b/xen/tools/gen-cpuid.py
index bf3f9ec01e6e..f07b1f4cf905 100755
--- a/xen/tools/gen-cpuid.py
+++ b/xen/tools/gen-cpuid.py
@@ -280,6 +280,9 @@ def crunch_numbers(state):
 # standard 3DNow in the earlier K6 processors.
 _3DNOW: [_3DNOWEXT],
 
+# The SVM bit enumerates the whole SVM leave.
+SVM: list(range(NPT, NPT + 32)),
+
 #

Re: [PATCH v3 6/8] gzip: move output buffer into gunzip state

2024-04-29 Thread Jan Beulich

On 24.04.2024 18:34, Daniel P. Smith wrote:
> --- a/xen/common/gzip/gunzip.c
> +++ b/xen/common/gzip/gunzip.c
> @@ -15,6 +15,8 @@ struct gunzip_state {
>  size_t insize;
>  /* Index of next byte to be processed in inbuf: */
>  unsigned int inptr;
> +
> +unsigned long bytes_out;
>  };

The conversion to unsigned long from ...

> @@ -42,7 +44,6 @@ typedef unsigned long   ulg;
>  #  define Tracecv(c, x)
>  #endif
>  
> -static long __initdata bytes_out;

... this originally wants justifying in the (then no longer empty) description.
It's not a lot that needs saying, but such a type change cannot go entirely
silently.

Jan

Re: [PATCH v3 5/8] gzip: move input buffer handling into gunzip state

2024-04-29 Thread Jan Beulich

On 24.04.2024 18:34, Daniel P. Smith wrote:
> --- a/xen/common/gzip/gunzip.c
> +++ b/xen/common/gzip/gunzip.c
> @@ -10,13 +10,12 @@ struct gunzip_state {
>  unsigned char *window;
>  /* window pointer: */
>  unsigned int wp;
> -};
> -
> -static unsigned char *__initdata inbuf;
> -static unsigned int __initdata insize;
>  
> -/* Index of next byte to be processed in inbuf: */
> -static unsigned int __initdata inptr;
> +unsigned char *inbuf;
> +size_t insize;
> +/* Index of next byte to be processed in inbuf: */
> +unsigned int inptr;

I'm puzzled by the (suddenly) different types, seeing that ...

> +};
>  
>  #define malloc(a)   xmalloc_bytes(a)
>  #define free(a) xfree(a)
> @@ -51,14 +50,14 @@ static __init void error(const char *x)
>  panic("%s\n", x);
>  }
>  
> -static __init uch get_byte(void) {
> -if ( inptr >= insize )
> +static __init uch get_byte(struct gunzip_state *s) {
> +if ( s->inptr >= s->insize )

... both are compared with one another.

> @@ -1129,29 +1129,29 @@ static int __init gunzip(struct gunzip_state *s)
>  error("Input has invalid flags");
>  return -1;
>  }
> -NEXTBYTE(); /* Get timestamp */
> -NEXTBYTE();
> -NEXTBYTE();
> -NEXTBYTE();
> +NEXTBYTE(s); /* Get timestamp */
> +NEXTBYTE(s);
> +NEXTBYTE(s);
> +NEXTBYTE(s);
>  
> -NEXTBYTE();  /* Ignore extra flags for the moment */
> -NEXTBYTE();  /* Ignore OS type for the moment */
> +NEXTBYTE(s);  /* Ignore extra flags for the moment */
> +NEXTBYTE(s);  /* Ignore OS type for the moment */
>  
>  if ((flags & EXTRA_FIELD) != 0) {
> -unsigned len = (unsigned)NEXTBYTE();
> -len |= ((unsigned)NEXTBYTE())<<8;
> -while (len--) (void)NEXTBYTE();
> +unsigned len = (unsigned)NEXTBYTE(s);
> +len |= ((unsigned)NEXTBYTE(s))<<8;
> +while (len--) (void)NEXTBYTE(s);

Would you mind moving the body of this while() to its own line, as you
touch this anyway?

>  }
>  
>  /* Get original file name if it was truncated */
>  if ((flags & ORIG_NAME) != 0) {
>  /* Discard the old name */
> -while (NEXTBYTE() != 0) /* null */ ;
> +while (NEXTBYTE(s) != 0) /* null */ ;
>  }
>  
>  /* Discard file comment if any */
>  if ((flags & COMMENT) != 0) {
> -while (NEXTBYTE() != 0) /* null */ ;
> +while (NEXTBYTE(s) != 0) /* null */ ;
>  }

For these two doing the same may help, too.

Jan

[PATCH 5/9] create-diff-object: move kpatch_include_symbol()

2024-04-29 Thread Roger Pau Monne

So it can be used by kpatch_process_special_sections() in further changes.

Non functional change.

Signed-off-by: Roger Pau Monné 
---
 create-diff-object.c | 74 ++--
 1 file changed, 37 insertions(+), 37 deletions(-)

diff --git a/create-diff-object.c b/create-diff-object.c
index 6a751bf3b789..7674d972e301 100644
--- a/create-diff-object.c
+++ b/create-diff-object.c
@@ -1198,6 +1198,43 @@ void kpatch_update_ex_table_addend(struct kpatch_elf 
*kelf,
}
 }
 
+#define inc_printf(fmt, ...) \
+   log_debug("%*s" fmt, recurselevel, "", ##__VA_ARGS__);
+
+static void kpatch_include_symbol(struct symbol *sym, int recurselevel)
+{
+   struct rela *rela;
+   struct section *sec;
+
+   inc_printf("start include_symbol(%s)\n", sym->name);
+   sym->include = 1;
+   inc_printf("symbol %s is included\n", sym->name);
+   /*
+* Check if sym is a non-local symbol (sym->sec is NULL) or
+* if an unchanged local symbol.  This a base case for the
+* inclusion recursion.
+*/
+   if (!sym->sec || sym->sec->include ||
+   (sym->type != STT_SECTION && sym->status == SAME))
+   goto out;
+   sec = sym->sec;
+   sec->include = 1;
+   inc_printf("section %s is included\n", sec->name);
+   if (sec->secsym && sec->secsym != sym) {
+   sec->secsym->include = 1;
+   inc_printf("section symbol %s is included\n", 
sec->secsym->name);
+   }
+   if (!sec->rela)
+   goto out;
+   sec->rela->include = 1;
+   inc_printf("section %s is included\n", sec->rela->name);
+   list_for_each_entry(rela, >rela->relas, list)
+   kpatch_include_symbol(rela->sym, recurselevel+1);
+out:
+   inc_printf("end include_symbol(%s)\n", sym->name);
+   return;
+}
+
 static void kpatch_regenerate_special_section(struct kpatch_elf *kelf,
  struct special_section *special,
  struct section *sec)
@@ -1455,43 +1492,6 @@ static void 
kpatch_include_standard_string_elements(struct kpatch_elf *kelf)
}
 }
 
-#define inc_printf(fmt, ...) \
-   log_debug("%*s" fmt, recurselevel, "", ##__VA_ARGS__);
-
-static void kpatch_include_symbol(struct symbol *sym, int recurselevel)
-{
-   struct rela *rela;
-   struct section *sec;
-
-   inc_printf("start include_symbol(%s)\n", sym->name);
-   sym->include = 1;
-   inc_printf("symbol %s is included\n", sym->name);
-   /*
-* Check if sym is a non-local symbol (sym->sec is NULL) or
-* if an unchanged local symbol.  This a base case for the
-* inclusion recursion.
-*/
-   if (!sym->sec || sym->sec->include ||
-   (sym->type != STT_SECTION && sym->status == SAME))
-   goto out;
-   sec = sym->sec;
-   sec->include = 1;
-   inc_printf("section %s is included\n", sec->name);
-   if (sec->secsym && sec->secsym != sym) {
-   sec->secsym->include = 1;
-   inc_printf("section symbol %s is included\n", 
sec->secsym->name);
-   }
-   if (!sec->rela)
-   goto out;
-   sec->rela->include = 1;
-   inc_printf("section %s is included\n", sec->rela->name);
-   list_for_each_entry(rela, >rela->relas, list)
-   kpatch_include_symbol(rela->sym, recurselevel+1);
-out:
-   inc_printf("end include_symbol(%s)\n", sym->name);
-   return;
-}
-
 static int kpatch_include_changed_functions(struct kpatch_elf *kelf)
 {
struct symbol *sym;
-- 
2.44.0

[PATCH 9/9] create-diff-object: account for special section changes

2024-04-29 Thread Roger Pau Monne

When deciding whether there's content to generate a payload also take into
account whether special sections have changed.  Initially account for changes
to alternative related section to cause the generation of a livepatch.

Note that accounting for hook sections is already done by
kpatch_include_hook_elements() when deciding whether there's changed content in
the object file.  .bugframe sections are also not accounted for since any
section in a bugframe section will be accompanied by a change in the function
the bugframe references (the placement of the BUG_INSTR will change in the
caller function).

Signed-off-by: Roger Pau Monné 
---
 create-diff-object.c | 24 
 1 file changed, 24 insertions(+)

diff --git a/create-diff-object.c b/create-diff-object.c
index 8bfb6bf92a1b..957631097b8a 100644
--- a/create-diff-object.c
+++ b/create-diff-object.c
@@ -1663,6 +1663,28 @@ static int kpatch_include_hook_elements(struct 
kpatch_elf *kelf)
return num_new_functions;
 }
 
+static unsigned int include_special_sections(const struct kpatch_elf *kelf)
+{
+   const struct section *sec;
+   unsigned int nr = 0;
+
+   /*
+* Only account for changes in alternatives and exception table related
+* sections.  Hooks are already taken care of separately, and changes
+* in bug_frame sections always go along with changes in the caller
+* functions.
+*/
+   list_for_each_entry(sec, >sections, list)
+   if (sec->status != SAME &&
+   (!strcmp(sec->name, ".altinstructions") ||
+!strcmp(sec->name, ".altinstr_replacement") ||
+!strcmp(sec->name, ".ex_table") ||
+!strcmp(sec->name, ".fixup")))
+   nr++;
+
+   return nr;
+}
+
 static int kpatch_include_new_globals(struct kpatch_elf *kelf)
 {
struct symbol *sym;
@@ -2469,6 +2491,8 @@ int main(int argc, char *argv[])
kpatch_include_debug_sections(kelf_patched);
log_debug("Include hook elements\n");
num_changed += kpatch_include_hook_elements(kelf_patched);
+   log_debug("Include special sections\n");
+   num_changed += include_special_sections(kelf_patched);
log_debug("num_changed = %d\n", num_changed);
log_debug("Include new globals\n");
new_globals_exist = kpatch_include_new_globals(kelf_patched);
-- 
2.44.0

[PATCH 7/9] create-diff-object: don't account for changes to .bug_frame.? sections

2024-04-29 Thread Roger Pau Monne

bug_frame related sections exclusively contain addresses that reference back to
the address where the BUG_INSTR is executed.  As such, any change to the
contents of bug_frame sections (or it's relocations) will necessarily require a
change in the caller function, as the placement of the BUG_INSTR must have
changed.

Take advantage of this relocation, and unconditionally mark the bug_frame
sections themselves as not having changed, the logic in
kpatch_regenerate_special_section() will already take care of including any
bug_frame element group that references a function that has changed.

This should be a non functional change in the payload generated by
create-diff-object, but needs doing so that we can take into account changes to
.altinstructions and .ex_table sections themselves without unnecessarily also
pulling .bug_frame sections.

Signed-off-by: Roger Pau Monné 
---
 create-diff-object.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/create-diff-object.c b/create-diff-object.c
index f4e4da063d0a..d1b1477be1cd 100644
--- a/create-diff-object.c
+++ b/create-diff-object.c
@@ -1284,6 +1284,17 @@ static void kpatch_regenerate_special_section(struct 
kpatch_elf *kelf,
}
}
 
+   /*
+* For bug_frame sections don't care if the section itself or the
+* relocations have changed, as any change in the bug_frames will be
+* accompanied by a change in the caller function text, as the
+* BUG_INSTR will have a different placement in the caller.
+*/
+   if (!strncmp(special->name, ".bug_frames.", strlen(".bug_frames."))) {
+   sec->status = SAME;
+   sec->base->status = SAME;
+   }
+
for ( ; src_offset < sec->base->sh.sh_size; src_offset += group_size) {
 
group_size = special->group_size(kelf, src_offset);
-- 
2.44.0

[PATCH 3/9] livepatch-build: fix detection of structure sizes

2024-04-29 Thread Roger Pau Monne

The current runes assume that in the list of DWARF tags DW_AT_byte_size will
come after DW_AT_name, but that's not always the case.  On one of my builds
I've seen:

   DW_AT_name: (indirect string, offset: 0x3c45): 
exception_table_entry
   DW_AT_declaration : 1
 <1>: Abbrev Number: 5 (DW_TAG_const_type)
   DW_AT_type: <0xb617>
 <1>: Abbrev Number: 14 (DW_TAG_pointer_type)
   DW_AT_byte_size   : 8

Instead of assuming such order, explicitly search for the DW_AT_byte_size tag
when a match in the DW_AT_name one is found.

Signed-off-by: Roger Pau Monné 
---
All this ad hoc parsing of DWARF data seems very fragile.  This is an
improvement over the current logic, but I would still prefer if we could find a
more reliable way to obtain the struct sizes we need.
---
 livepatch-build | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/livepatch-build b/livepatch-build
index f3ca9399d149..aad9849f2ba9 100755
--- a/livepatch-build
+++ b/livepatch-build
@@ -427,9 +427,16 @@ if [ "${SKIP}" != "build" ]; then
 SPECIAL_VARS=$(readelf -wi "$OUTPUT/xen-syms" |
awk '
BEGIN { a = b = e = 0 }
+   # Ensure no fall through to the next tag without getting the 
size
+   (a == 1 || b == 1 || e == 1) && /DW_AT_name/ \
+   {print "can not get special structure size" > 
"/dev/stderr"; exit 1}
a == 0 && /DW_AT_name.* alt_instr/ {a = 1; next}
b == 0 && /DW_AT_name.* bug_frame/ {b = 1; next}
e == 0 && /DW_AT_name.* exception_table_entry/ {e = 1; next}
+   # Seek the line that contains the size
+   a == 1 && !/DW_AT_byte_size/ {next}
+   b == 1 && !/DW_AT_byte_size/ {next}
+   e == 1 && !/DW_AT_byte_size/ {next}
# Use shell printf to (possibly) convert from hex to decimal
a == 1 {printf("export ALT_STRUCT_SIZE=`printf \"%%d\" 
\"%s\"`\n", $4); a = 2}
b == 1 {printf("export BUG_STRUCT_SIZE=`printf \"%%d\" 
\"%s\"`\n", $4); b = 2}
-- 
2.44.0

[PATCH 1/9] livepatch-build-tools: allow patch file name sizes up to 127 characters

2024-04-29 Thread Roger Pau Monne

XenServer uses quite long Xen version names, and encode such in the livepatch
filename, and it's currently running out of space in the file name.

Bump max filename size to 127, so it also matches the patch name length in the
hypervisor interface.  Note the size of the buffer is 128 character, and the
last one is reserved for the null terminator.

Signed-off-by: Roger Pau Monné 
---
Changes since v1:
 - Take into account the null terminator.
---
 livepatch-build | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/livepatch-build b/livepatch-build
index 948b2acfc2f6..f3ca9399d149 100755
--- a/livepatch-build
+++ b/livepatch-build
@@ -72,8 +72,9 @@ function make_patch_name()
 fi
 
 # Only allow alphanumerics and '_' and '-' in the patch name.  Everything
-# else is replaced with '-'.  Truncate to 48 chars.
-echo ${PATCHNAME//[^a-zA-Z0-9_-]/-} |cut -c 1-48
+# else is replaced with '-'.  Truncate to 127 chars
+# (XEN_LIVEPATCH_NAME_SIZE - 1).
+echo ${PATCHNAME//[^a-zA-Z0-9_-]/-} |cut -c -127
 }
 
 # Do a full normal build
-- 
2.44.0

[PATCH 6/9] create-diff-object: detect changes in .altinstr_replacement and .fixup sections

2024-04-29 Thread Roger Pau Monne

The current logic to detect changes in special sections elements will only
include group elements that reference function symbols that need including
(either because they have changed or are new).

This works fine in order to detect when a special section element needs
including because of the function is points has changed or is newly introduced,
but doesn't detect changes to the replacement code in .altinstr_replacement or
.fixup sections, as the relocations in that case are against STT_SECTION
symbols.

Fix this by also including the special section group if the symbol the
relocations points to is of type section.

Signed-off-by: Roger Pau Monné 
---
 create-diff-object.c | 29 +++--
 1 file changed, 27 insertions(+), 2 deletions(-)

diff --git a/create-diff-object.c b/create-diff-object.c
index 7674d972e301..f4e4da063d0a 100644
--- a/create-diff-object.c
+++ b/create-diff-object.c
@@ -1158,11 +1158,21 @@ static int should_keep_rela_group(struct section *sec, 
int start, int size)
struct rela *rela;
int found = 0;
 
-   /* check if any relas in the group reference any changed functions */
+   /*
+* Check if any relas in the group reference any changed functions.
+*
+* Relocations in the .altinstructions and .ex_table special sections
+* can also reference changed section symbols, since the replacement
+* text in both cases resides on a section that has no function symbols
+* (.altinstr_replacement and .fixup respectively).  Account for that
+* and also check whether the referenced symbols are section ones in
+* order to decide whether the relocation group needs including.
+*/
list_for_each_entry(rela, >relas, list) {
if (rela->offset >= start &&
rela->offset < start + size &&
-   rela->sym->type == STT_FUNC &&
+   (rela->sym->type == STT_FUNC ||
+rela->sym->type == STT_SECTION) &&
rela->sym->sec->include) {
found = 1;
log_debug("new/changed symbol %s found in special 
section %s\n",
@@ -1338,6 +1348,21 @@ static void kpatch_regenerate_special_section(struct 
kpatch_elf *kelf,
 
rela->sym->include = 1;
 
+   /*
+* Changes that cause only the
+* .altinstr_replacement or .fixup sections to
+* be modified need the target function of the
+* replacement to also be marked as CHANGED,
+* otherwise livepatch won't replace the
+* function, and the new replacement code won't
+* take effect.
+*/
+   if (rela->sym->type == STT_FUNC &&
+   rela->sym->status == SAME) {
+   rela->sym->status = CHANGED;
+   kpatch_include_symbol(rela->sym, 0);
+   }
+
if (!strcmp(special->name, ".fixup"))
kpatch_update_ex_table_addend(kelf, 
special,
  
src_offset,
-- 
2.44.0

[PATCH 8/9] create-diff-object: account for changes in the special section itself

2024-04-29 Thread Roger Pau Monne

Changes to special sections are not accounted for right now.  For example a
change that only affects .altinstructions or .ex_table won't be included in the
livepatch payload, if none of the symbols referenced by those sections have
also changed.

Since it's not possible to know which elements in the special group have
changed if we detect a change in the special section the whole group (minus
elements that have references to init section symbols) must be included.

Signed-off-by: Roger Pau Monné 
---
 create-diff-object.c | 33 ++---
 1 file changed, 26 insertions(+), 7 deletions(-)

diff --git a/create-diff-object.c b/create-diff-object.c
index d1b1477be1cd..8bfb6bf92a1b 100644
--- a/create-diff-object.c
+++ b/create-diff-object.c
@@ -1170,13 +1170,32 @@ static int should_keep_rela_group(struct section *sec, 
int start, int size)
 */
list_for_each_entry(rela, >relas, list) {
if (rela->offset >= start &&
-   rela->offset < start + size &&
-   (rela->sym->type == STT_FUNC ||
-rela->sym->type == STT_SECTION) &&
-   rela->sym->sec->include) {
-   found = 1;
-   log_debug("new/changed symbol %s found in special 
section %s\n",
- rela->sym->name, sec->name);
+   rela->offset < start + size)
+   {
+   /*
+* Avoid including groups with relocations against
+* symbols living in init sections, those are never
+* included in livepatches.
+*/
+   if (is_init_section(rela->sym->sec))
+   return 0;
+
+   /*
+* If the base section has changed, always include all
+* groups (except those pointing to .init section
+* symbols), as we have no way to differentiate which
+* groups have changed.
+*/
+   if (sec->status != SAME || sec->base->status != SAME)
+   found = 1;
+   else if ((rela->sym->type == STT_FUNC ||
+ rela->sym->type == STT_SECTION) &&
+rela->sym->sec->include) {
+   found = 1;
+   log_debug(
+   "new/changed symbol %s found in special section %s\n",
+ rela->sym->name, sec->name);
+   }
}
}
 
-- 
2.44.0

[PATCH 4/9] create-diff-object: document kpatch_regenerate_special_section() behavior

2024-04-29 Thread Roger Pau Monne

The purpose of kpatch_regenerate_special_section() is fairly opaque without
spending a good amount of time and having quite a lot of knowledge about what
the special sections contains.

Introduce some comments in order to give context and describe the expected
functionality.

Signed-off-by: Roger Pau Monné 
---
 create-diff-object.c | 33 +
 1 file changed, 33 insertions(+)

diff --git a/create-diff-object.c b/create-diff-object.c
index d8a2afbf2774..6a751bf3b789 100644
--- a/create-diff-object.c
+++ b/create-diff-object.c
@@ -1210,6 +1210,12 @@ static void kpatch_regenerate_special_section(struct 
kpatch_elf *kelf,
 
src_offset = 0;
dest_offset = 0;
+
+   /*
+* Special sections processed here are array objects, hence in order to
+* detect whether a special section needs processing attempt to get the
+* element size.  Returning a size of 0 means no processing required.
+*/
group_size = special->group_size(kelf, src_offset);
if (group_size == 0) {
log_normal("Skipping regeneration of a special section: %s\n",
@@ -1246,6 +1252,33 @@ static void kpatch_regenerate_special_section(struct 
kpatch_elf *kelf,
if (src_offset + group_size > sec->base->sh.sh_size)
group_size = sec->base->sh.sh_size - src_offset;
 
+   /*
+* Special sections handled perform a bunch of different tasks,
+* but they all have something in common: they are array like
+* sections that reference functions in the object file being
+* processed.
+*
+* .bug_frames.* relocations reference the symbol (plus offset)
+* where the exception is triggered from.
+*
+* .altinstructions relocations contain references to
+* coordinates where the alternatives are to be applied, plus
+* coordinates that point to the replacement code in
+* .altinstr_replacement.
+*
+* .ex_table relocations contain references to the coordinates
+* where the fixup code should be executed, plus relocation
+* coordinates that point to the text code to execte living in
+* the .fixup section.
+*
+* .livepatch.hooks.* relocations point to the hook
+* functions.
+*
+* Such dependencies allow to make a decision on whether an
+* element in the array needs including in the livepatch: if
+* the symbol pointed by the relocation is new or has changed
+* the array element needs including.
+*/
include = should_keep_rela_group(sec, src_offset, group_size);
 
if (!include)
-- 
2.44.0

[PATCH 0/9] livepatch-build-tools: some bug fixes and improvements

2024-04-29 Thread Roger Pau Monne

Hello,

The first 3 patches in the series attempt to fix some bugs, I don't
think they will be specially controversial or difficult to review (patch
1 was already posted standalone).

The rest of the patches are more convoluted, as they attempt to solve
some shortcomings when attempting to create livepatches that change
alternatives or exceptions.  For example a patch that only changes
content in alternative blocks (the code that ends up in the
.altinstr_replacement section) won't work, as create-diff-object will
report that there are no changes.

I've attempted to test as many corner cases as I could come up with, but
it's hard to figure all the possible changes, plus it's also fairly easy
to inadvertently regress existing functionality.

Thanks, Roger.

Roger Pau Monne (9):
  livepatch-build-tools: allow patch file name sizes up to 127
characters
  create-diff-object: update default alt_instr size
  livepatch-build: fix detection of structure sizes
  create-diff-object: document kpatch_regenerate_special_section()
behavior
  create-diff-object: move kpatch_include_symbol()
  create-diff-object: detect changes in .altinstr_replacement and .fixup
sections
  create-diff-object: don't account for changes to .bug_frame.? sections
  create-diff-object: account for changes in the special section itself
  create-diff-object: account for special section changes

 create-diff-object.c | 196 +--
 livepatch-build  |  12 ++-
 2 files changed, 161 insertions(+), 47 deletions(-)

-- 
2.44.0

[PATCH 2/9] create-diff-object: update default alt_instr size

2024-04-29 Thread Roger Pau Monne

The size of the alt_instr structure in Xen is 14 instead of 12 bytes, adjust
it.

Signed-off-by: Roger Pau Monné 
---
 create-diff-object.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/create-diff-object.c b/create-diff-object.c
index fed360a9aa68..d8a2afbf2774 100644
--- a/create-diff-object.c
+++ b/create-diff-object.c
@@ -1000,7 +1000,7 @@ static int altinstructions_group_size(struct kpatch_elf 
*kelf, int offset)
char *str;
if (!size) {
str = getenv("ALT_STRUCT_SIZE");
-   size = str ? atoi(str) : 12;
+   size = str ? atoi(str) : 14;
}
 
log_debug("altinstr_size=%d\n", size);
-- 
2.44.0

Re: [PATCH] revert "x86/mm: re-implement get_page_light() using an atomic increment"

2024-04-29 Thread Roger Pau Monné

On Mon, Apr 29, 2024 at 03:01:00PM +0200, Jan Beulich wrote:
> revert "x86/mm: re-implement get_page_light() using an atomic increment"
> 
> This reverts commit c40bc0576dcc5acd4d7e22ef628eb4642f568533.
> 
> That change aimed at eliminating an open-coded lock-like construct,
> which really isn't all that similar to, in particular, get_page(). The
> function always succeeds. Any remaining concern would want taking care
> of by placing block_lock_speculation() at the end of the function.
> Since the function is called only during page (de)validation, any
> possible performance concerns over such extra serialization could
> likely be addressed by pre-validating (e.g. via pinning) page tables.
> 
> The fundamental issue with the change being reverted is that it detects
> bad state only after already having caused possible corruption. While
> the system is going to be halted in such an event, there is a time
> window during which the resulting incorrect state could be leveraged by
> a clever (in particular: fast enough) attacker.
> 
> Signed-off-by: Jan Beulich 

Acked-by: Roger Pau Monné 

Thanks.

Re: [PATCH v3 4/8] gzip: move window pointer into gunzip state

2024-04-29 Thread Jan Beulich

On 24.04.2024 18:34, Daniel P. Smith wrote:
> Move the window pointer, outcnt/wp, into struct gunzip_data. It was 
> erroneously
> labeled as outcnt and then define aliased to wp, this eliminates the aliasing
> and only refers to as wp, the window pointer.
> 
> Signed-off-by: Daniel P. Smith 

Acked-by: Jan Beulich

Re: [PATCH v3 2/8] gzip: refactor and expand macros

2024-04-29 Thread Jan Beulich

On 24.04.2024 18:34, Daniel P. Smith wrote:
> This commit refactors macros into proper static functions. It in-place expands
> the `flush_output` macro, allowing the clear removal of the dead code
> underneath the `underrun` label.

But it's NEXTBYTE() which uses the label, not flush_output(). I'm actually
unconvinced of its expanding / removal.

> --- a/xen/common/gzip/gunzip.c
> +++ b/xen/common/gzip/gunzip.c
> @@ -25,8 +25,6 @@ typedef unsigned char   uch;
>  typedef unsigned short  ush;
>  typedef unsigned long   ulg;
>  
> -#define get_byte()  (inptr < insize ? inbuf[inptr++] : fill_inbuf())
> -
>  /* Diagnostic functions */
>  #ifdef DEBUG
>  #  define Assert(cond, msg) do { if (!(cond)) error(msg); } while (0)
> @@ -52,10 +50,14 @@ static __init void error(const char *x)
>  panic("%s\n", x);
>  }
>  
> -static __init int fill_inbuf(void)
> -{
> -error("ran out of input data");
> -return 0;

I'm not convinced of the removal of this as a separate function. It only
calling error() right now could change going forward, so I'd at least
expect a little bit of a justification.

> +static __init uch get_byte(void) {

Nit: Brace goes on its own line. Also for (effectively) new code it would
be nice if __init (and alike) would be placed "canonically", i.e. between
type and identifier.

> --- a/xen/common/gzip/inflate.c
> +++ b/xen/common/gzip/inflate.c
> @@ -119,6 +119,18 @@ static char rcsid[] = "#Id: inflate.c,v 0.14 1993/06/10 
> 13:27:04 jloup Exp #";
>  
>  #endif /* !__XEN__ */
>  
> +/*
> + * The inflate algorithm uses a sliding 32 K byte window on the uncompressed
> + * stream to find repeated byte strings.  This is implemented here as a
> + * circular buffer.  The index is updated simply by incrementing and then
> + * ANDing with 0x7fff (32K-1).
> + *
> + * It is left to other modules to supply the 32 K area.  It is assumed
> + * to be usable as if it were declared "uch slide[32768];" or as just
> + * "uch *slide;" and then malloc'ed in the latter case.  The definition
> + * must be in unzip.h, included above.

Nit: s/definition/declaration/ ?

> + */
> +#define wp outcnt
>  #define slide window
>  
>  /*
> @@ -150,21 +162,6 @@ static int inflate_dynamic(void);
>  static int inflate_block(int *);
>  static int inflate(void);
>  
> -/*
> - * The inflate algorithm uses a sliding 32 K byte window on the uncompressed
> - * stream to find repeated byte strings.  This is implemented here as a
> - * circular buffer.  The index is updated simply by incrementing and then
> - * ANDing with 0x7fff (32K-1).
> - *
> - * It is left to other modules to supply the 32 K area.  It is assumed
> - * to be usable as if it were declared "uch slide[32768];" or as just
> - * "uch *slide;" and then malloc'ed in the latter case.  The definition
> - * must be in unzip.h, included above.

Oh, an earlier comment just moves up. Is there really a need for this
extra churn?

> @@ -224,7 +221,7 @@ static const ush mask_bits[] = {
>  0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0x
>  };
>  
> -#define NEXTBYTE()  ({ int v = get_byte(); if (v < 0) goto underrun; (uch)v; 
> })
> +#define NEXTBYTE()  (get_byte()) /* get_byte will panic on failure */

Nit: No need for the outer parentheses.

> @@ -1148,8 +1135,8 @@ static int __init gunzip(void)
>  NEXTBYTE();
>  NEXTBYTE();
>  
> -(void)NEXTBYTE();  /* Ignore extra flags for the moment */
> -(void)NEXTBYTE();  /* Ignore OS type for the moment */
> +NEXTBYTE();  /* Ignore extra flags for the moment */
> +NEXTBYTE();  /* Ignore OS type for the moment */

In Misra discussions there were indications that such casts may need (re-)
introducing. Perhaps better leave this alone, the more when it's not
really fitting the patch's purpose?

Jan

Re: [PATCH v8 08/17] xen/riscv: introduce atomic.h

2024-04-29 Thread Jan Beulich

On 17.04.2024 12:04, Oleksii Kurochko wrote:
> --- /dev/null
> +++ b/xen/arch/riscv/include/asm/atomic.h
> @@ -0,0 +1,281 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Taken and modified from Linux.
> + *
> + * The following changes were done:
> + * - * atomic##prefix##_*xchg_*(atomic##prefix##_t *v, c_t n) were updated
> + * to use__*xchg_generic()
> + * - drop casts in write_atomic() as they are unnecessary
> + * - drop introduction of WRITE_ONCE() and READ_ONCE().
> + *   Xen provides ACCESS_ONCE()
> + * - remove zero-length array access in read_atomic()
> + * - drop defines similar to pattern
> + *   #define atomic_add_return_relaxed   atomic_add_return_relaxed
> + * - move not RISC-V specific functions to asm-generic/atomics-ops.h
> + * 
> + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
> + * Copyright (C) 2012 Regents of the University of California
> + * Copyright (C) 2017 SiFive
> + * Copyright (C) 2024 Vates SAS
> + */
> +
> +#ifndef _ASM_RISCV_ATOMIC_H
> +#define _ASM_RISCV_ATOMIC_H
> +
> +#include 
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +void __bad_atomic_size(void);
> +
> +/*
> + * Legacy from Linux kernel. For some reason they wanted to have ordered
> + * read/write access. Thereby read* is used instead of read*_cpu()
> + */
> +static always_inline void read_atomic_size(const volatile void *p,
> +   void *res,
> +   unsigned int size)
> +{
> +switch ( size )
> +{
> +case 1: *(uint8_t *)res = readb(p); break;
> +case 2: *(uint16_t *)res = readw(p); break;
> +case 4: *(uint32_t *)res = readl(p); break;
> +#ifndef CONFIG_RISCV_32
> +case 8: *(uint32_t *)res = readq(p); break;
> +#endif
> +default: __bad_atomic_size(); break;
> +}
> +}
> +
> +#define read_atomic(p) ({   \
> +union { typeof(*(p)) val; char c[sizeof(*(p))]; } x_;   \
> +read_atomic_size(p, x_.c, sizeof(*(p)));\
> +x_.val; \
> +})
> +
> +static always_inline void _write_atomic(volatile void *p,
> +   unsigned long x, unsigned int size)
> +{
> +switch ( size )
> +{
> +case 1: writeb(x, p); break;
> +case 2: writew(x, p); break;
> +case 4: writel(x, p); break;
> +#ifndef CONFIG_RISCV_32
> +case 8: writeq(x, p); break;
> +#endif
> +default: __bad_atomic_size(); break;
> +}
> +}
> +
> +#define write_atomic(p, x)  \
> +({  \
> +typeof(*(p)) x_ = (x);  \
> +_write_atomic((p), x_, sizeof(*(p)));   \

Nit: There are still excess parentheses here.

> +x_; \

Why is this? The macro isn't supposed to "return" a value, is it?

> +})
> +
> +static always_inline void _add_sized(volatile void *p,
> + unsigned long x, unsigned int size)
> +{
> +switch ( size )
> +{
> +case 1:
> +{
> +volatile uint8_t *ptr = (volatile uint8_t *)p;

Here and below: Why the casts?

Jan

[XEN PATCH v2 1/2] evtchn: Add error status indicators for evtchn_status hypercall

2024-04-29 Thread Matthew Barnes

When the evtchn_status hypercall fails, it is not possible to determine
the cause of the error, since the library call returns -1 to the tool
and not the errno.

Because of this, lsevtchn is unable to determine whether to continue
event channel enumeration upon an evtchn_status hypercall error.

Add error status indicators for the eventchn_status hypercall for
lsevtchn to terminate its loop on, and keep other errors as failed
hypercalls.

Signed-off-by: Matthew Barnes 
---
 xen/common/event_channel.c | 12 +++-
 xen/include/public/event_channel.h |  2 ++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c
index aceee0695f9f..0f11e71c3e6f 100644
--- a/xen/common/event_channel.c
+++ b/xen/common/event_channel.c
@@ -1030,7 +1030,17 @@ int evtchn_status(evtchn_status_t *status)
 
 d = rcu_lock_domain_by_any_id(dom);
 if ( d == NULL )
-return -ESRCH;
+{
+status->status = EVTCHNSTAT_dom_invalid;
+return 0;
+}
+
+if ( !port_is_valid(d, port) )
+{
+status->status = EVTCHNSTAT_port_invalid;
+rcu_unlock_domain(d);
+return 0;
+}
 
 chn = _evtchn_from_port(d, port);
 if ( !chn )
diff --git a/xen/include/public/event_channel.h 
b/xen/include/public/event_channel.h
index 0d91a1c4afab..29cbf43945b3 100644
--- a/xen/include/public/event_channel.h
+++ b/xen/include/public/event_channel.h
@@ -200,6 +200,8 @@ struct evtchn_status {
 #define EVTCHNSTAT_pirq 3  /* Channel is bound to a phys IRQ line.   */
 #define EVTCHNSTAT_virq 4  /* Channel is bound to a virtual IRQ line */
 #define EVTCHNSTAT_ipi  5  /* Channel is bound to a virtual IPI line */
+#define EVTCHNSTAT_dom_invalid  6  /* Given domain ID is not a valid domain  */
+#define EVTCHNSTAT_port_invalid 7  /* Given port is not within valid range   */
 uint32_t status;
 uint32_t vcpu; /* VCPU to which this channel is bound.   */
 union {
-- 
2.34.1

[XEN PATCH v2 0/2] Enumerate all allocated evtchns in lsevtchn

2024-04-29 Thread Matthew Barnes

Currently, lsevtchn aborts its event channel enumeration when it hits
its first hypercall error, namely:
* When an event channel doesn't exist at the specified port
* When the event channel is owned by Xen

This results in lsevtchn missing potential relevant event channels with
higher port numbers, since lsevtchn cannot determine the cause of
hypercall errors.

This patch adds error status indicators for the evtchn_status hypercall
for when no further event channels will be yielded for higher port
numbers, allowing lsevtchn to terminate when all event channels have
been enumerated over.

Matthew Barnes (2):
  evtchn: Add error status indicators for evtchn_status hypercall
  tools/lsevtchn: Use new status identifiers in loop

 tools/xcutils/lsevtchn.c   | 11 ++-
 xen/common/event_channel.c | 12 +++-
 xen/include/public/event_channel.h |  2 ++
 3 files changed, 23 insertions(+), 2 deletions(-)

-- 
2.34.1

[XEN PATCH v2 2/2] tools/lsevtchn: Use new status identifiers in loop

2024-04-29 Thread Matthew Barnes

lsevtchn terminates the loop when the hypercall returns an error, even
if there are still event channels with higher port numbers to be
enumerated over.

Continue the loop even on hypercall errors, and use the new status
identifiers for the evtchn_status hypercall, namely "port invalid" and
"domain invalid", to determine when to break the loop.

Signed-off-by: Matthew Barnes 
---
 tools/xcutils/lsevtchn.c | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/tools/xcutils/lsevtchn.c b/tools/xcutils/lsevtchn.c
index d1710613ddc5..4a48620cd72a 100644
--- a/tools/xcutils/lsevtchn.c
+++ b/tools/xcutils/lsevtchn.c
@@ -24,11 +24,20 @@ int main(int argc, char **argv)
 status.port = port;
 rc = xc_evtchn_status(xch, );
 if ( rc < 0 )
-break;
+continue;
 
 if ( status.status == EVTCHNSTAT_closed )
 continue;
 
+if ( status.status == EVTCHNSTAT_dom_invalid )
+{
+printf("Domain ID '%d' does not correspond to valid domain.\n", 
domid);
+break;
+}
+
+if ( status.status == EVTCHNSTAT_port_invalid )
+break;
+
 printf("%4d: VCPU %u: ", port, status.vcpu);
 
 switch ( status.status )
-- 
2.34.1

Re: [PATCH v8 06/17] xen/riscv: introduce cmpxchg.h

2024-04-29 Thread Jan Beulich

On 17.04.2024 12:04, Oleksii Kurochko wrote:
> +/*
> + * This function doesn't exist, so you'll get a linker error
> + * if something tries to do an invalid xchg().
> + */
> +extern void __bad_xchg(volatile void *ptr, int size);
> +
> +static always_inline unsigned long __xchg(volatile void *ptr, unsigned long 
> new, int size)
> +{
> +unsigned long ret;
> +
> +switch ( size )
> +{
> +case 1:
> +ret = emulate_xchg_1_2((volatile uint8_t *)ptr, new, ".aq", ".aqrl");
> +break;
> +case 2:
> +ret = emulate_xchg_1_2((volatile uint16_t *)ptr, new, ".aq", 
> ".aqrl");
> +break;
> +case 4:
> +_amoswap_generic((volatile uint32_t *)ptr, new, ret, ".w.aqrl");
> +break;
> +#ifndef CONFIG_RISCV_32
> +case 8:
> +_amoswap_generic((volatile uint64_t *)ptr, new, ret, ".d.aqrl");
> +break;
> +#endif
> +default:
> +__bad_xchg(ptr, size), ret = 0;
> +}

I see no real reason to use a comma expression here, the more that "break"
needs adding anyway. I wonder why here you don't use ...

> +/* This function doesn't exist, so you'll get a linker error
> +   if something tries to do an invalid cmpxchg().  */
> +extern unsigned long __bad_cmpxchg(volatile void *ptr, int size);
> +
> +/*
> + * Atomic compare and exchange.  Compare OLD with MEM, if identical,
> + * store NEW in MEM.  Return the initial value in MEM.  Success is
> + * indicated by comparing RETURN with OLD.
> + */
> +static always_inline unsigned long __cmpxchg(volatile void *ptr,
> + unsigned long old,
> + unsigned long new,
> + int size)
> +{
> +unsigned long ret;
> +
> +switch ( size )
> +{
> +case 1:
> +ret = emulate_cmpxchg_1_2((volatile uint8_t *)ptr, old, new,
> +  ".aq", ".aqrl");
> +break;
> +case 2:
> +ret = emulate_cmpxchg_1_2((volatile uint16_t *)ptr, old, new,
> +   ".aq", ".aqrl");
> +break;
> +case 4:
> +ret = _generic_cmpxchg((volatile uint32_t *)ptr, old, new,
> +  ".w.aq", ".w.aqrl");
> +break;
> +#ifndef CONFIG_32BIT
> +case 8:
> +ret = _generic_cmpxchg((volatile uint64_t *)ptr, old, new,
> +   ".d.aq", ".d.aqrl");
> +break;
> +#endif
> +default:
> +return __bad_cmpxchg(ptr, size);

... the approach used here.

Also (style nit) the comment on __bad_cmpxchg() is malformed, unlike that
ahead of __bad_xchg().

Jan

Re: [PATCH v1] xen/riscv: improve check-extension() macro

2024-04-29 Thread Jan Beulich

On 26.04.2024 17:23, Oleksii Kurochko wrote:
> Now, the check-extension() macro has 1 argument instead of 2.
> This change helps to reduce redundancy around usage of extensions
> name (in the case of the zbb extension, the name was used 3 times).
> 
> To implement this, a new variable was introduced:
>   -insn
> which represents the instruction support that is being checked.
> 
> Additionally, zbb-insn is updated to use $(comma) instead of ",".

Which is merely just-in-case, I suppose, but not strictly necessary
anymore?

> Signed-off-by: Oleksii Kurochko 
> Suggested-by: Jan Beulich 

Reviewed-by: Jan Beulich 

Just as a remark: Tags want to be put in chronological order.

Jan

Re: [PATCH] revert "x86/mm: re-implement get_page_light() using an atomic increment"

2024-04-29 Thread Andrew Cooper

On 29/04/2024 2:01 pm, Jan Beulich wrote:
> revert "x86/mm: re-implement get_page_light() using an atomic increment"
>
> This reverts commit c40bc0576dcc5acd4d7e22ef628eb4642f568533.
>
> That change aimed at eliminating an open-coded lock-like construct,
> which really isn't all that similar to, in particular, get_page(). The
> function always succeeds. Any remaining concern would want taking care
> of by placing block_lock_speculation() at the end of the function.
> Since the function is called only during page (de)validation, any
> possible performance concerns over such extra serialization could
> likely be addressed by pre-validating (e.g. via pinning) page tables.
>
> The fundamental issue with the change being reverted is that it detects
> bad state only after already having caused possible corruption. While
> the system is going to be halted in such an event, there is a time
> window during which the resulting incorrect state could be leveraged by
> a clever (in particular: fast enough) attacker.
>
> Signed-off-by: Jan Beulich 

Acked-by: Andrew Cooper

[PATCH] revert "x86/mm: re-implement get_page_light() using an atomic increment"

2024-04-29 Thread Jan Beulich

revert "x86/mm: re-implement get_page_light() using an atomic increment"

This reverts commit c40bc0576dcc5acd4d7e22ef628eb4642f568533.

That change aimed at eliminating an open-coded lock-like construct,
which really isn't all that similar to, in particular, get_page(). The
function always succeeds. Any remaining concern would want taking care
of by placing block_lock_speculation() at the end of the function.
Since the function is called only during page (de)validation, any
possible performance concerns over such extra serialization could
likely be addressed by pre-validating (e.g. via pinning) page tables.

The fundamental issue with the change being reverted is that it detects
bad state only after already having caused possible corruption. While
the system is going to be halted in such an event, there is a time
window during which the resulting incorrect state could be leveraged by
a clever (in particular: fast enough) attacker.

Signed-off-by: Jan Beulich 

--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -2582,10 +2582,16 @@ bool get_page(struct page_info *page, const struct 
domain *domain)
  */
 static void get_page_light(struct page_info *page)
 {
-unsigned long old_pgc = arch_fetch_and_add(>count_info, 1);
+unsigned long x, nx, y = page->count_info;
 
-BUG_ON(!(old_pgc & PGC_count_mask)); /* Not allocated? */
-BUG_ON(!((old_pgc + 1) & PGC_count_mask)); /* Overflow? */
+do {
+x  = y;
+nx = x + 1;
+BUG_ON(!(x & PGC_count_mask)); /* Not allocated? */
+BUG_ON(!(nx & PGC_count_mask)); /* Overflow? */
+y = cmpxchg(>count_info, x, nx);
+}
+while ( unlikely(y != x) );
 }
 
 static int validate_page(struct page_info *page, unsigned long type,

Re: [XEN PATCH] automation/eclair_enalysis: amend configuration for some MISRA rules

2024-04-29 Thread Alessandro Zucchelli


On 2024-04-29 14:44, Alessandro Zucchelli wrote:

Adjust ECLAIR configuration for rules: R21.14, R21.15, R21.16 by taking
into account mem* macros defined in the Xen sources as if they were
equivalent to the ones in Standard Library.

Signed-off-by: Alessandro Zucchelli 
---
 automation/eclair_analysis/ECLAIR/analysis.ecl | 17 +
 1 file changed, 17 insertions(+)

diff --git a/automation/eclair_analysis/ECLAIR/analysis.ecl 
b/automation/eclair_analysis/ECLAIR/analysis.ecl

index a604582da3..f3b634baba 100644
--- a/automation/eclair_analysis/ECLAIR/analysis.ecl
+++ b/automation/eclair_analysis/ECLAIR/analysis.ecl
@@ -19,6 +19,23 @@ map_strings("scheduled-analysis",analysis_kind)

 -enable=B.EXPLAIN

+-doc_begin="These configurations serve the purpose of recognizing the 
'mem*' macros as

+their Standard Library equivalents."
+
+-config=MC3R1.R21.14,call_select+=
+{"macro(^memcmp$)&_arg(1..2, 
skip(__non_syntactic_paren_cast_stmts, node(string_literal)))",
+ "any()", violation, "%{__callslct_any_base_fmt()}", {{arg, 
"%{__callslct_arg_fmt()}"}}}

+
+-config=MC3R1.R21.15,call_args+=
+{"macro(^mem(cmp|move|cpy)$)", {1, 2}, "unqual_pointee_compatible",
+ "%{__argscmpr_culprit_fmt()}", "%{__argscmpr_evidence_fmt()}"}
+
+-config=MC3R1.R21.16,call_select+=
+{"macro(^memcmp$)&_arg(1..2, skip(__non_syntactic_paren_stmts, 
type(canonical(__memcmp_pte_types",
+ "any()", violation, "%{__callslct_any_base_fmt()}", 
{{arg,"%{__callslct_arg_type_fmt()}"}}}

+
+-doc_end
+
 -eval_file=toolchain.ecl
 -eval_file=public_APIs.ecl
 if(not(scheduled_analysis),


Typo in the title: should be automation/eclair_analysis.
Sorry.
--
Alessandro Zucchelli, B.Sc.

Software Engineer, BUGSENG (https://bugseng.com)

[XEN PATCH] automation/eclair_enalysis: amend configuration for some MISRA rules

2024-04-29 Thread Alessandro Zucchelli

Adjust ECLAIR configuration for rules: R21.14, R21.15, R21.16 by taking
into account mem* macros defined in the Xen sources as if they were
equivalent to the ones in Standard Library.

Signed-off-by: Alessandro Zucchelli 
---
 automation/eclair_analysis/ECLAIR/analysis.ecl | 17 +
 1 file changed, 17 insertions(+)

diff --git a/automation/eclair_analysis/ECLAIR/analysis.ecl 
b/automation/eclair_analysis/ECLAIR/analysis.ecl
index a604582da3..f3b634baba 100644
--- a/automation/eclair_analysis/ECLAIR/analysis.ecl
+++ b/automation/eclair_analysis/ECLAIR/analysis.ecl
@@ -19,6 +19,23 @@ map_strings("scheduled-analysis",analysis_kind)
 
 -enable=B.EXPLAIN
 
+-doc_begin="These configurations serve the purpose of recognizing the 'mem*' 
macros as
+their Standard Library equivalents."
+
+-config=MC3R1.R21.14,call_select+=
+{"macro(^memcmp$)&_arg(1..2, skip(__non_syntactic_paren_cast_stmts, 
node(string_literal)))",
+ "any()", violation, "%{__callslct_any_base_fmt()}", {{arg, 
"%{__callslct_arg_fmt()}"}}}
+
+-config=MC3R1.R21.15,call_args+=
+{"macro(^mem(cmp|move|cpy)$)", {1, 2}, "unqual_pointee_compatible",
+ "%{__argscmpr_culprit_fmt()}", "%{__argscmpr_evidence_fmt()}"}
+
+-config=MC3R1.R21.16,call_select+=
+{"macro(^memcmp$)&_arg(1..2, skip(__non_syntactic_paren_stmts, 
type(canonical(__memcmp_pte_types",
+ "any()", violation, "%{__callslct_any_base_fmt()}", 
{{arg,"%{__callslct_arg_type_fmt()}"}}}
+
+-doc_end
+
 -eval_file=toolchain.ecl
 -eval_file=public_APIs.ecl
 if(not(scheduled_analysis),
-- 
2.25.1

Re: [XEN PATCH 03/10] automation/eclair_analysis: deviate macro count_args_ for MISRA Rule 20.7

2024-04-29 Thread Nicola Vetrini


On 2024-04-25 02:28, Stefano Stabellini wrote:

On Tue, 23 Apr 2024, Nicola Vetrini wrote:

The count_args_ macro violates Rule 20.7, but it can't be made
compliant with Rule 20.7 without breaking its functionality. Since
it's very unlikely for this macro to be misused, it is deviated.


That is OK but can't we use the SAF- framework to do it, given that it
is just one macro?

If not, this is also OK.




It would be more fragile, for no substantial gain


No functional change.

Signed-off-by: Nicola Vetrini 
---
 automation/eclair_analysis/ECLAIR/deviations.ecl | 6 ++
 docs/misra/deviations.rst| 6 ++
 2 files changed, 12 insertions(+)

diff --git a/automation/eclair_analysis/ECLAIR/deviations.ecl 
b/automation/eclair_analysis/ECLAIR/deviations.ecl

index d21f112a9b94..c17e2f5a0886 100644
--- a/automation/eclair_analysis/ECLAIR/deviations.ecl
+++ b/automation/eclair_analysis/ECLAIR/deviations.ecl
@@ -445,6 +445,12 @@ not to parenthesize their arguments."
 -config=MC3R1.R20.7,reports+={safe, 
"any_area(any_loc(any_exp(macro(^alternative_(v)?call[0-9]$"}

 -doc_end

+-doc_begin="The argument 'x' of the count_args_ macro can't be 
parenthesized as
+the rule would require, without breaking the functionality of the 
macro. The uses
+of this macro do not lead to developer confusion, and can thus be 
deviated."
+-config=MC3R1.R20.7,reports+={safe, 
"any_area(any_loc(any_exp(macro(^count_args_$"}

+-doc_end
+
 -doc_begin="Uses of variadic macros that have one of their arguments 
defined as
 a macro and used within the body for both ordinary parameter 
expansion and as an
 operand to the # or ## operators have a behavior that is 
well-understood and

diff --git a/docs/misra/deviations.rst b/docs/misra/deviations.rst
index ed0c1e8ed0bf..e228ae2e715f 100644
--- a/docs/misra/deviations.rst
+++ b/docs/misra/deviations.rst
@@ -371,6 +371,12 @@ Deviations related to MISRA C:2012 Rules:
sanity checks in place.
  - Tagged as `safe` for ECLAIR.

+   * - R20.7
+ - The macro `count_args_` is not compliant with the rule, but is 
not likely
+   to incur in the risk of being misused or lead to developer 
confusion, and

+   refactoring it to add parentheses breaks its functionality.
+ - Tagged as `safe` for ECLAIR.
+
* - R20.12
  - Variadic macros that use token pasting often employ the gcc 
extension
`ext_paste_comma`, as detailed in `C-language-toolchain.rst`, 
which is

--
2.34.1



--
Nicola Vetrini, BSc
Software Engineer, BUGSENG srl (https://bugseng.com)

Re: CVE-2024-26908: x86/xen: Add some null pointer checking to smp.c

2024-04-29 Thread Juergen Gross


Hi,

I'd like to dispute CVE-2024-26908: the issue fixed by upstream commit
3693bb4465e6e32a204a5b86d3ec7e6b9f7e67c2 can in no way be triggered by
an unprivileged user or by a remote attack of the system, as it requires
hotplug of (virtual) cpus to the running system. This can be done only by
either a host admin or by an admin of the guest which might suffer the
out-of-memory situation.

Please revoke this CVE.


Juergen


OpenPGP_0xB0DE9DD628BF132F.asc
Description: OpenPGP public key


OpenPGP_signature.asc
Description: OpenPGP digital signature

Re: [PATCH 3/3] x86/boot: Refactor pvh_load_kernel() to have an initrd_len local

2024-04-29 Thread Jan Beulich

On 26.04.2024 16:01, Andrew Cooper wrote:
> The expression get more complicated when ->mod_end isn't being abused as a
> size field.  Introduce and use a initrd_len local variable.
> 
> No functional change.
> 
> Signed-off-by: Andrew Cooper 

Reviewed-by: Jan Beulich

Re: [PATCH 2/3] x86/boot: Explain discard_initial_images() and untangle PV initrd handling

2024-04-29 Thread Jan Beulich

On 26.04.2024 16:01, Andrew Cooper wrote:
> discard_initial_images() frees the memory backing the boot modules.  It is
> called by dom0_construct_pv{,h}(), but obtains the module list by global
> pointer which is why there is no apparent link with the initrd pointer.
> 
> Generally, module contents are copied into dom0 as it's being built, but the
> initrd for PV dom0 might be directly mapped instead of copied.
> 
> dom0_construct_pv() does it's own ad-hoc freeing of the module in the copy
> case, and points the global reference at the new copy, then sets the size to
> 0.  This only functions because init_domheap_pages(x, x) happens to be a nop.
> 
> Delete the ad-hoc freeing, and leave it to discard_initial_images().  This
> requires (not) adjusting initd->mod_start in the copy case, and only setting
> the size to 0 in the mapping case.
> 
> Alter discard_initial_images() to explicitly check for an ignored module, and
> explain what's going on.  This is more robust and will allow for fixing other
> problems with module handling.
> 
> The later logic in dom0_construct_pv() now cannot use initrd->mod_start, but
> that's fine because initrd_mfn is already a duplicate of the information
> wanted, and is more consistent with initrd_{pfn,len} used elsewhere.
> 
> Invalidate the initrd pointer with LIST_POISON1 to make it clearer that it
> shouldn't be used.
> 
> No practical change in behaviour, but a substantial reduction in the
> complexity of how this works.
> 
> Signed-off-by: Andrew Cooper 
> ---
> CC: Jan Beulich 
> CC: Roger Pau Monné 
> CC: Stefano Stabellini 
> CC: Daniel Smith 
> CC: Christopher Clark 
> 
> In other akward questions, why does initial_images_nrpages() account for all
> modules when only 1 or 2 are relevant for how we construct dom0 ?
> ---
>  xen/arch/x86/pv/dom0_build.c | 22 +++---
>  xen/arch/x86/setup.c |  9 -
>  2 files changed, 19 insertions(+), 12 deletions(-)
> 
> diff --git a/xen/arch/x86/pv/dom0_build.c b/xen/arch/x86/pv/dom0_build.c
> index d8043fa58a27..64d9984b8308 100644
> --- a/xen/arch/x86/pv/dom0_build.c
> +++ b/xen/arch/x86/pv/dom0_build.c
> @@ -630,18 +630,20 @@ int __init dom0_construct_pv(struct domain *d,
>  }
>  memcpy(page_to_virt(page), mfn_to_virt(initrd->mod_start),
> initrd_len);
> -mpt_alloc = (paddr_t)initrd->mod_start << PAGE_SHIFT;
> -init_domheap_pages(mpt_alloc,
> -   mpt_alloc + PAGE_ALIGN(initrd_len));
> -initrd->mod_start = initrd_mfn = mfn_x(page_to_mfn(page));
> +initrd_mfn = mfn_x(page_to_mfn(page));
>  }
>  else
>  {
>  while ( count-- )
>  if ( assign_pages(mfn_to_page(_mfn(mfn++)), 1, d, 0) )
>  BUG();
> +/*
> + * Mapped rather than copied.  Tell discard_initial_images() to
> + * ignore it.
> + */
> +initrd->mod_end = 0;
>  }
> -initrd->mod_end = 0;
> +initrd = LIST_POISON1; /* No longer valid to use. */
>  
>  iommu_memory_setup(d, "initrd", mfn_to_page(_mfn(initrd_mfn)),
> PFN_UP(initrd_len), _flags);
> @@ -653,12 +655,10 @@ int __init dom0_construct_pv(struct domain *d,
>  if ( domain_tot_pages(d) < nr_pages )
>  printk(" (%lu pages to be allocated)",
> nr_pages - domain_tot_pages(d));
> -if ( initrd )
> -{
> -mpt_alloc = (paddr_t)initrd->mod_start << PAGE_SHIFT;
> +if ( initrd_len )
>  printk("\n Init. ramdisk: %"PRIpaddr"->%"PRIpaddr,
> -   mpt_alloc, mpt_alloc + initrd_len);
> -}
> +   pfn_to_paddr(initrd_mfn),
> +   pfn_to_paddr(initrd_mfn) + initrd_len);
>  
>  printk("\nVIRTUAL MEMORY ARRANGEMENT:\n");
>  printk(" Loaded kernel: %p->%p\n", _p(vkern_start), _p(vkern_end));

Between what this and the following hunk touch there is

if ( count < initrd_pfn || count >= initrd_pfn + PFN_UP(initrd_len) )
mfn = pfn++;
else
mfn = initrd_mfn++;

I can't help thinking that this invalidates ...

> @@ -881,7 +881,7 @@ int __init dom0_construct_pv(struct domain *d,
>  if ( pfn >= initrd_pfn )
>  {
>  if ( pfn < initrd_pfn + PFN_UP(initrd_len) )
> -mfn = initrd->mod_start + (pfn - initrd_pfn);
> +mfn = initrd_mfn + (pfn - initrd_pfn);
>  else
>  mfn -= PFN_UP(initrd_len);
>  }

... the use of the variable here.

> --- a/xen/arch/x86/setup.c
> +++ b/xen/arch/x86/setup.c
> @@ -294,7 +294,7 @@ unsigned long __init initial_images_nrpages(nodeid_t node)
>  return nr;
>  }
>  
> -void __init discard_initial_images(void)
> +void __init discard_initial_images(void) /* a.k.a. free multiboot modules */
>  {
>  unsigned int i;
>  
> @@ -302,6 +302,13 @@ void __init

Re: [PATCH 1/3] x86/boot: Explain how moving mod[0] works

2024-04-29 Thread Jan Beulich

On 26.04.2024 16:01, Andrew Cooper wrote:
> modules_headroom is a misleading name as it applies strictly to mod[0] only,
> and the movement loop is deeply unintuitive and completely undocumented.
> 
> Provide help to whomever needs to look at this code next.
> 
> Signed-off-by: Andrew Cooper 

Acked-by: Jan Beulich

Re: [PATCH v6 8/8] xen: allow up to 16383 cpus

2024-04-29 Thread Jürgen Groß


On 29.04.24 13:04, Julien Grall wrote:

Hi Juergen,

Sorry for the late reply.

On 29/04/2024 11:33, Juergen Gross wrote:

On 08.04.24 09:10, Jan Beulich wrote:

On 27.03.2024 16:22, Juergen Gross wrote:

With lock handling now allowing up to 16384 cpus (spinlocks can handle
65535 cpus, rwlocks can handle 16384 cpus), raise the allowed limit for
the number of cpus to be configured to 16383.

The new limit is imposed by IOMMU_CMD_BUFFER_MAX_ENTRIES and
QINVAL_MAX_ENTRY_NR required to be larger than 2 * CONFIG_NR_CPUS.

Signed-off-by: Juergen Gross 


Acked-by: Jan Beulich 

I'd prefer this to also gain an Arm ack, though.


Any comment from Arm side?


Can you clarify what the new limits mean in term of (security) support? Are we 
now claiming that Xen will work perfectly fine on platforms with up to 16383?


If so, I can't comment for x86, but for Arm, I am doubtful that it would work 
without any (at least performance) issues. AFAIK, this is also an untested 
configuration. In fact I would be surprised if Xen on Arm was tested with more 
than a couple of hundreds cores (AFAICT the Ampere CPUs has 192 CPUs).


I think we should add a security support limit for the number of physical
cpus similar to the memory support limit we already have in place.

For x86 I'd suggest 4096 cpus for security support (basically the limit we
have with this patch), but I'm open for other suggestions, too.

I have no idea about any sensible limits for Arm32/Arm64.


Juergen

Re: [PATCH v6 8/8] xen: allow up to 16383 cpus

2024-04-29 Thread Julien Grall


Hi Juergen,

Sorry for the late reply.

On 29/04/2024 11:33, Juergen Gross wrote:

On 08.04.24 09:10, Jan Beulich wrote:

On 27.03.2024 16:22, Juergen Gross wrote:

With lock handling now allowing up to 16384 cpus (spinlocks can handle
65535 cpus, rwlocks can handle 16384 cpus), raise the allowed limit for
the number of cpus to be configured to 16383.

The new limit is imposed by IOMMU_CMD_BUFFER_MAX_ENTRIES and
QINVAL_MAX_ENTRY_NR required to be larger than 2 * CONFIG_NR_CPUS.

Signed-off-by: Juergen Gross 


Acked-by: Jan Beulich 

I'd prefer this to also gain an Arm ack, though.


Any comment from Arm side?


Can you clarify what the new limits mean in term of (security) support? 
Are we now claiming that Xen will work perfectly fine on platforms with 
up to 16383?


If so, I can't comment for x86, but for Arm, I am doubtful that it would 
work without any (at least performance) issues. AFAIK, this is also an 
untested configuration. In fact I would be surprised if Xen on Arm was 
tested with more than a couple of hundreds cores (AFAICT the Ampere CPUs 
has 192 CPUs).


Cheers,

--
Julien Grall

Re: [PATCH 1/2] tools/{c,o}xenstored: Don't link against libsystemd

2024-04-29 Thread Jürgen Groß


On 25.04.24 19:32, Andrew Cooper wrote:

libsystemd is a giant dependency for one single function, but in the wake of
the xz backdoor, it turns out that even systemd leadership recommend against
linking against libsystemd for sd_notify().

Since commit 7b61011e1450 ("tools: make xenstore domain easy configurable") in
Xen 4.8, the launch-xenstore script invokes systemd-notify directly, so its
not even necessary for the xenstored's to call sd_notify() themselves.


You are aware that in the daemon case the call of systemd-notify does not
signal readyness of xenstored? It is just called with the "--booted" parameter
in order to detect whether systemd is active or not.

So in order to just drop the sd_notify() call from xenstored you need to
modify the launch-xenstore script, too.


Juergen



Therefore, just drop the calls to sd_notify() and stop linking against
libsystemd.

No functional change.

Signed-off-by: Andrew Cooper 
---
CC: Anthony PERARD 
CC: Juergen Gross 
CC: Christian Lindig 
CC: Edwin Török 
CC: Stefano Stabellini 
---
  tools/ocaml/xenstored/Makefile| 12 +--
  tools/ocaml/xenstored/systemd.ml  | 15 -
  tools/ocaml/xenstored/systemd.mli | 16 -
  tools/ocaml/xenstored/systemd_stubs.c | 47 ---
  tools/ocaml/xenstored/xenstored.ml|  1 -
  tools/xenstored/Makefile  |  5 ---
  tools/xenstored/posix.c   |  9 -
  7 files changed, 1 insertion(+), 104 deletions(-)
  delete mode 100644 tools/ocaml/xenstored/systemd.ml
  delete mode 100644 tools/ocaml/xenstored/systemd.mli
  delete mode 100644 tools/ocaml/xenstored/systemd_stubs.c

diff --git a/tools/ocaml/xenstored/Makefile b/tools/ocaml/xenstored/Makefile
index e8aaecf2e630..1e4b51cc5432 100644
--- a/tools/ocaml/xenstored/Makefile
+++ b/tools/ocaml/xenstored/Makefile
@@ -4,8 +4,6 @@ include $(OCAML_TOPLEVEL)/common.make
  
  # Include configure output (config.h)

  CFLAGS += -include $(XEN_ROOT)/tools/config.h
-CFLAGS-$(CONFIG_SYSTEMD)  += $(SYSTEMD_CFLAGS)
-LDFLAGS-$(CONFIG_SYSTEMD) += $(SYSTEMD_LIBS)
  
  CFLAGS  += $(CFLAGS-y)

  CFLAGS  += $(APPEND_CFLAGS)
@@ -25,13 +23,6 @@ poll_OBJS = poll
  poll_C_OBJS = select_stubs
  OCAML_LIBRARY = syslog poll
  
-LIBS += systemd.cma systemd.cmxa

-systemd_OBJS = systemd
-systemd_C_OBJS = systemd_stubs
-OCAML_LIBRARY += systemd
-
-LIBS_systemd += $(LDFLAGS-y)
-
  OBJS = paths \
define \
stdext \
@@ -56,12 +47,11 @@ OBJS = paths \
process \
xenstored
  
-INTF = symbol.cmi trie.cmi syslog.cmi systemd.cmi poll.cmi

+INTF = symbol.cmi trie.cmi syslog.cmi poll.cmi
  
  XENSTOREDLIBS = \

unix.cmxa \
-ccopt -L -ccopt . syslog.cmxa \
-   -ccopt -L -ccopt . systemd.cmxa \
-ccopt -L -ccopt . poll.cmxa \
-ccopt -L -ccopt $(OCAML_TOPLEVEL)/libs/mmap 
$(OCAML_TOPLEVEL)/libs/mmap/xenmmap.cmxa \
-ccopt -L -ccopt $(OCAML_TOPLEVEL)/libs/eventchn 
$(OCAML_TOPLEVEL)/libs/eventchn/xeneventchn.cmxa \
diff --git a/tools/ocaml/xenstored/systemd.ml b/tools/ocaml/xenstored/systemd.ml
deleted file mode 100644
index 39127f712d72..
--- a/tools/ocaml/xenstored/systemd.ml
+++ /dev/null
@@ -1,15 +0,0 @@
-(*
- * Copyright (C) 2014 Luis R. Rodriguez 
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published
- * by the Free Software Foundation; version 2.1 only. with the special
- * exception on linking described in file LICENSE.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *)
-
-external sd_notify_ready: unit -> unit = "ocaml_sd_notify_ready"
diff --git a/tools/ocaml/xenstored/systemd.mli 
b/tools/ocaml/xenstored/systemd.mli
deleted file mode 100644
index 18b9331031f9..
--- a/tools/ocaml/xenstored/systemd.mli
+++ /dev/null
@@ -1,16 +0,0 @@
-(*
- * Copyright (C) 2014 Luis R. Rodriguez 
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published
- * by the Free Software Foundation; version 2.1 only. with the special
- * exception on linking described in file LICENSE.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *)
-
-(** Tells systemd we're ready *)
-external sd_notify_ready: unit -> unit = "ocaml_sd_notify_ready"
diff --git a/tools/ocaml/xenstored/systemd_stubs.c 
b/tools/ocaml/xenstored/systemd_stubs.c
deleted file mode 100644
index f4c875075abe..
--- a/tools/ocaml/xenstored/systemd_stubs.c
+++ /dev/null
@@ -1,47 +0,0 @@
-/*

[xen-unstable test] 185860: tolerable FAIL

2024-04-29 Thread osstest service owner

flight 185860 xen-unstable real [real]
http://logs.test-lab.xenproject.org/osstest/logs/185860/

Failures :-/ but no regressions.

Tests which did not succeed, but are not blocking:
 test-amd64-amd64-xl-qemut-win7-amd64 19 guest-stopfail like 185839
 test-amd64-amd64-xl-qemuu-ws16-amd64 19 guest-stopfail like 185839
 test-armhf-armhf-libvirt 16 saverestore-support-checkfail  like 185839
 test-amd64-amd64-xl-qemuu-win7-amd64 19 guest-stopfail like 185839
 test-amd64-amd64-xl-qemut-ws16-amd64 19 guest-stopfail like 185839
 test-amd64-amd64-qemuu-nested-amd 20 debian-hvm-install/l1/l2 fail like 185839
 test-amd64-amd64-libvirt-xsm 15 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt 15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-libvirt-xsm 15 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-xsm 16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl  16 saverestore-support-checkfail   never pass
 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check 
fail never pass
 test-arm64-arm64-xl-credit2  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-credit2  16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-multivcpu 15 migrate-support-checkfail  never pass
 test-armhf-armhf-xl-multivcpu 16 saverestore-support-checkfail  never pass
 test-arm64-arm64-xl-thunderx 15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-thunderx 16 saverestore-support-checkfail   never pass
 test-amd64-amd64-libvirt-qcow2 14 migrate-support-checkfail never pass
 test-armhf-armhf-xl-credit2  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit2  16 saverestore-support-checkfail   never pass
 test-amd64-amd64-libvirt-raw 14 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-arndale  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-arndale  16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  16 saverestore-support-checkfail   never pass
 test-amd64-amd64-libvirt-vhd 14 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-raw 14 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-raw 15 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-credit1  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-credit1  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-vhd  14 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-vhd  15 saverestore-support-checkfail   never pass
 test-armhf-armhf-libvirt 15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-qcow214 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-qcow215 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-credit1  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit1  16 saverestore-support-checkfail   never pass
 test-armhf-armhf-libvirt-vhd 14 migrate-support-checkfail   never pass
 test-armhf-armhf-libvirt-vhd 15 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-rtds 15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-rtds 16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-raw  14 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-raw  15 saverestore-support-checkfail   never pass

version targeted for testing:
 xen  be5b08dd6ea6ef0f01caf537bdae125fa66a2230
baseline version:
 xen  be5b08dd6ea6ef0f01caf537bdae125fa66a2230

Last test of basis   185860  2024-04-29 01:53:41 Z0 days
Testing same since  (not found) 0 attempts

jobs:
 build-amd64-xsm  pass
 build-arm64-xsm  pass
 build-i386-xsm   pass
 build-amd64-xtf  pass
 build-amd64  pass
 build-arm64  pass
 build-armhf  pass
 build-i386   pass
 build-amd64-libvirt  pass
 build-arm64-libvirt  pass

Re: [PATCH v6 8/8] xen: allow up to 16383 cpus

2024-04-29 Thread Juergen Gross


On 08.04.24 09:10, Jan Beulich wrote:

On 27.03.2024 16:22, Juergen Gross wrote:

With lock handling now allowing up to 16384 cpus (spinlocks can handle
65535 cpus, rwlocks can handle 16384 cpus), raise the allowed limit for
the number of cpus to be configured to 16383.

The new limit is imposed by IOMMU_CMD_BUFFER_MAX_ENTRIES and
QINVAL_MAX_ENTRY_NR required to be larger than 2 * CONFIG_NR_CPUS.

Signed-off-by: Juergen Gross 


Acked-by: Jan Beulich 

I'd prefer this to also gain an Arm ack, though.


Any comment from Arm side?


Juergen



Jan


--- a/xen/arch/Kconfig
+++ b/xen/arch/Kconfig
@@ -6,7 +6,7 @@ config PHYS_ADDR_T_32
  
  config NR_CPUS

int "Maximum number of CPUs"
-   range 1 4095
+   range 1 16383
default "256" if X86
default "8" if ARM && RCAR3
default "4" if ARM && QEMU







OpenPGP_0xB0DE9DD628BF132F.asc
Description: OpenPGP public key


OpenPGP_signature.asc
Description: OpenPGP digital signature

Re: [PATCH v2 1/7] x86/p2m: Add braces for better code clarity

2024-04-29 Thread Jan Beulich

On 29.04.2024 12:26, Petr Beneš wrote:
> On Mon, Apr 29, 2024 at 9:07 AM Jan Beulich  wrote:
>>
>> On 28.04.2024 18:52, Petr Beneš wrote:
>>> From: Petr Beneš 
>>>
>>> No functional change.
>>>
>>> Signed-off-by: Petr Beneš 
>>
>> Where did Stefano's R-b go?
> 
> Oh no, I missed that one. Should I do v3?

Not just for this, I'd say. Just don't forget it again, if the patch needs
re-posting.

Jan

Re: [PATCH v2 1/7] x86/p2m: Add braces for better code clarity

2024-04-29 Thread Petr Beneš

On Mon, Apr 29, 2024 at 9:07 AM Jan Beulich  wrote:
>
> On 28.04.2024 18:52, Petr Beneš wrote:
> > From: Petr Beneš 
> >
> > No functional change.
> >
> > Signed-off-by: Petr Beneš 
>
> Where did Stefano's R-b go?
>
> Jan

Oh no, I missed that one. Should I do v3?

Re: [XEN PATCH v3 5/5] xen/arm: ffa: support notification

2024-04-29 Thread Jens Wiklander

Hi Julien,

On Fri, Apr 26, 2024 at 7:58 PM Julien Grall  wrote:
>
> Hi Jens,
>
> On 26/04/2024 09:47, Jens Wiklander wrote:
> > +static void notif_irq_enable(void *info)
> > +{
> > +struct notif_irq_info *irq_info = info;
> > +
> > +irq_info->ret = setup_irq(irq_info->irq, 0, irq_info->action);
> In v2, you were using request_irq(). But now you seem to be open-coding
> it. Can you explain why?

It's because request_irq() does a memory allocation that can't be done
in interrupt context.
>
> > +if ( irq_info->ret )
> > +printk(XENLOG_ERR "ffa: request_irq irq %u failed: error %d\n",
> > +   irq_info->irq, irq_info->ret);
> > +}
> > +
> > +void ffa_notif_init(void)
> > +{
> > +const struct arm_smccc_1_2_regs arg = {
> > +.a0 = FFA_FEATURES,
> > +.a1 = FFA_FEATURE_SCHEDULE_RECV_INTR,
> > +};
> > +struct notif_irq_info irq_info = { };
> > +struct arm_smccc_1_2_regs resp;
> > +unsigned int cpu;
> > +
> > +arm_smccc_1_2_smc(, );
> > +if ( resp.a0 != FFA_SUCCESS_32 )
> > +return;
> > +
> > +irq_info.irq = resp.a2;
> > +if ( irq_info.irq < GIC_SGI_STATIC_MAX || irq_info.irq >= NR_GIC_SGI )
> > +{
> > +printk(XENLOG_ERR "ffa: notification initialization failed: 
> > conflicting SGI %u\n",
> > +   irq_info.irq);
> > +return;
> > +}
> > +
> > +/*
> > + * SGIs are per-CPU so we must enable the IRQ on each CPU. We use an
> > + * IPI to call notif_irq_enable() on each CPU including the current
> > + * CPU. The struct irqaction is preallocated since we can't allocate
> > + * memory while in interrupt context.
> > + */
> > +for_each_online_cpu(cpu)
> Even though we currently don't support CPU hotplug, you want to add a
> CPU Notifier to also register the IRQ when a CPU is onlined
> ffa_notif_init().
>
> For an example, see time.c. We may also want to consider to enable TEE
> in presmp_initcalls() so we don't need to have a for_each_online_cpu().

I was considering that too. I'll update the code.

Thanks,
Jens

Re: [XEN PATCH v3 5/5] xen/arm: ffa: support notification

2024-04-29 Thread Jens Wiklander

Hi Bertrand,

On Mon, Apr 29, 2024 at 9:20 AM Bertrand Marquis
 wrote:
[...]
> >> +static void notif_irq_handler(int irq, void *data)
> >> +{
> >> +const struct arm_smccc_1_2_regs arg = {
> >> +.a0 = FFA_NOTIFICATION_INFO_GET_64,
> >> +};
> >> +struct arm_smccc_1_2_regs resp;
> >> +unsigned int id_pos;
> >> +unsigned int list_count;
> >> +uint64_t ids_count;
> >> +unsigned int n;
> >> +int32_t res;
> >> +
> >> +do {
> >> +arm_smccc_1_2_smc(, );
> >> +res = ffa_get_ret_code();
> >> +if ( res )
> >> +{
> >> +if ( res != FFA_RET_NO_DATA )
> >> +printk(XENLOG_ERR "ffa: notification info get failed: 
> >> error %d\n",
> >> +   res);
> >> +return;
> >> +}
> >> +
> >> +ids_count = resp.a2 >> FFA_NOTIF_INFO_GET_ID_LIST_SHIFT;
> >> +list_count = ( resp.a2 >> FFA_NOTIF_INFO_GET_ID_COUNT_SHIFT ) &
> >> + FFA_NOTIF_INFO_GET_ID_COUNT_MASK;
> >> +
> >> +id_pos = 0;
> >> +for ( n = 0; n < list_count; n++ )
> >> +{
> >> +unsigned int count = ((ids_count >> 2 * n) & 0x3) + 1;
> >> +struct domain *d;
> >> +
> >> +d = ffa_get_domain_by_vm_id(get_id_from_resp(, id_pos));
> >
> > Thinking a bit more about the question from Bertrand about get_domain_id() 
> > vs rcu_lock_domain_by_id(). I am actually not sure whether either are ok 
> > here.
> >
> > If I am not mistaken, d->arch.tee will be freed as part of the domain 
> > teardown process. This means you can have the following scenario:
> >
> > CPU0: ffa_get_domain_by_vm_id() (return the domain as it is alive)
> >
> > CPU1: call domain_kill()
> > CPU1: teardown is called, free d->arch.tee (the pointer is not set to NULL)
> >
> > d->arch.tee is now a dangling pointer
> >
> > CPU0: access d->arch.tee
> >
> > This implies you may need to gain a global lock (I don't have a better idea 
> > so far) to protect the IRQ handler against domains teardown.
> >
> > Did I miss anything?
>
> I think you are right which is why I was thinking to use 
> rcu_lock_live_remote_domain_by_id to only get a reference
> to the domain if it is not dying.
>
> From the comment in sched.h:
> /*
>  * rcu_lock_domain_by_id() is more efficient than get_domain_by_id().
>  * This is the preferred function if the returned domain reference
>  * is short lived,  but it cannot be used if the domain reference needs
>  * to be kept beyond the current scope (e.g., across a softirq).
>  * The returned domain reference must be discarded using rcu_unlock_domain().
>  */
>
> Now the question of short lived should be challenged but I do not think we can
> consider the current code as "long lived".
>
> It would be a good idea to start a mailing list thread discussion with other
> maintainers on the subject to confirm.
> @Jens: as i will be off for some time, would you mind doing it ?

Sure, first I'll send out a new patch set with the current comments
addressed. I'll update to use rcu_lock_live_remote_domain_by_id() both
because it makes more sense than the current code, and also to have a
good base for the discussion.

Thanks,
Jens

Re: [PATCH] x86/cpu-policy: Annotate the accumulated features

2024-04-29 Thread Andrew Cooper

On 29/04/2024 8:49 am, Jan Beulich wrote:
> On 26.04.2024 18:08, Andrew Cooper wrote:
>> Some features need accumulating rather than intersecting to make migration
>> safe.  Introduce the new '|' attribute for this purpose.
>>
>> Right now, it's only used by the Xapi toolstack, but it will be used by
>> xl/libxl when the full policy-object work is complete, and until then it's
>> still a useful hint for hand-crafted cpuid= lines in vm.cfg files.
>>
>> Signed-off-by: Andrew Cooper 
> Reviewed-by: Jan Beulich 

Thanks.

> The one feature you don't annotate such that I'm not entirely sure about is
> NO_FPU_SEL: On one hand it tells code not to rely on / use the selector
> fields in FPU state.

It's sadly far more complicated than this.

This feature, and it's AMD partner RSTR_FP_ERR_PTRS, where introduced to
stop windows BSOD-ing under virt, and came with an accidental breakage
of x86emul/DoSBox/etc which Intel and AMD have declined to fix.

If you recall, prior to these features, the hypervisor needs to divine
the operand size of Window's {F,}X{SAVE,RESTORE} instructions, as it
blindly does a memcmp() across the region to confirm that the interrupt
handler didn't corrupt any state.

Both features are discontinuities across which is is not safe to migrate.

Advertising "reliably store as 0" on older systems will still cause
windows to BSOD on occasion, whereas not advertising it on newer systems
will suggest that legacy emulators will work, when they don't.

I don't have any good idea of how to make this work nicely, other than
having some admin booleans in the xl.cfg file saying "I certify I'm not
using a legacy emulator in my VM" to override the safety check.

Other discontinuities I'm aware of are the introduction of DAZ (changing
MXCSR_MASK), and any migration which changes LBR_FORMAT.

~Andrew

Re: [XEN PATCH v3 5/5] xen/arm: ffa: support notification

2024-04-29 Thread Jens Wiklander

Hi Julien,

On Fri, Apr 26, 2024 at 9:07 PM Julien Grall  wrote:
>
> Hi Jens,
>
> On 26/04/2024 09:47, Jens Wiklander wrote:
> > diff --git a/xen/arch/arm/irq.c b/xen/arch/arm/irq.c
> > index d7306aa64d50..5224898265a5 100644
> > --- a/xen/arch/arm/irq.c
> > +++ b/xen/arch/arm/irq.c
> > @@ -155,7 +155,7 @@ void __init init_IRQ(void)
> >   {
> >   /* SGIs are always edge-triggered */
> >   if ( irq < NR_GIC_SGI )
> > -local_irqs_type[irq] = DT_IRQ_TYPE_EDGE_RISING;
> > +local_irqs_type[irq] = IRQ_TYPE_EDGE_RISING;
>
> This changes seems unrelated to this patch. This wants to be separate
> (if you actually intended to change it).

I'm sorry, my bad. I meant for this to go into "xen/arm: allow
dynamically assigned SGI handlers".

>
> >   else
> >   local_irqs_type[irq] = IRQ_TYPE_INVALID;
> >   }
> > diff --git a/xen/arch/arm/tee/Makefile b/xen/arch/arm/tee/Makefile
> > index f0112a2f922d..7c0f46f7f446 100644
> > --- a/xen/arch/arm/tee/Makefile
> > +++ b/xen/arch/arm/tee/Makefile
> > @@ -2,5 +2,6 @@ obj-$(CONFIG_FFA) += ffa.o
> >   obj-$(CONFIG_FFA) += ffa_shm.o
> >   obj-$(CONFIG_FFA) += ffa_partinfo.o
> >   obj-$(CONFIG_FFA) += ffa_rxtx.o
> > +obj-$(CONFIG_FFA) += ffa_notif.o
> >   obj-y += tee.o
> >   obj-$(CONFIG_OPTEE) += optee.o
> > diff --git a/xen/arch/arm/tee/ffa.c b/xen/arch/arm/tee/ffa.c
> > index 5209612963e1..912e905a27bd 100644
> > --- a/xen/arch/arm/tee/ffa.c
> > +++ b/xen/arch/arm/tee/ffa.c
> > @@ -39,6 +39,9 @@
> >*   - at most 32 shared memory regions per guest
> >* o FFA_MSG_SEND_DIRECT_REQ:
> >*   - only supported from a VM to an SP
> > + * o FFA_NOTIFICATION_*:
> > + *   - only supports global notifications, that is, per vCPU notifications
> > + * are not supported
> >*
> >* There are some large locked sections with ffa_tx_buffer_lock and
> >* ffa_rx_buffer_lock. Especially the ffa_tx_buffer_lock spinlock used
> > @@ -194,6 +197,8 @@ out:
> >
> >   static void handle_features(struct cpu_user_regs *regs)
> >   {
> > +struct domain *d = current->domain;
> > +struct ffa_ctx *ctx = d->arch.tee;
> >   uint32_t a1 = get_user_reg(regs, 1);
> >   unsigned int n;
> >
> > @@ -240,6 +245,30 @@ static void handle_features(struct cpu_user_regs *regs)
> >   BUILD_BUG_ON(PAGE_SIZE != FFA_PAGE_SIZE);
> >   ffa_set_regs_success(regs, 0, 0);
> >   break;
> > +case FFA_FEATURE_NOTIF_PEND_INTR:
> > +if ( ctx->notif.enabled )
> > +ffa_set_regs_success(regs, GUEST_FFA_NOTIF_PEND_INTR_ID, 0);
> > +else
> > +ffa_set_regs_error(regs, FFA_RET_NOT_SUPPORTED);
> > +break;
> > +case FFA_FEATURE_SCHEDULE_RECV_INTR:
> > +if ( ctx->notif.enabled )
> > +ffa_set_regs_success(regs, GUEST_FFA_SCHEDULE_RECV_INTR_ID, 0);
> > +else
> > +ffa_set_regs_error(regs, FFA_RET_NOT_SUPPORTED);
> > +break;
> > +
> > +case FFA_NOTIFICATION_BIND:
> > +case FFA_NOTIFICATION_UNBIND:
> > +case FFA_NOTIFICATION_GET:
> > +case FFA_NOTIFICATION_SET:
> > +case FFA_NOTIFICATION_INFO_GET_32:
> > +case FFA_NOTIFICATION_INFO_GET_64:
> > +if ( ctx->notif.enabled )
> > +ffa_set_regs_success(regs, 0, 0);
> > +else
> > +ffa_set_regs_error(regs, FFA_RET_NOT_SUPPORTED);
> > +break;
> >   default:
> >   ffa_set_regs_error(regs, FFA_RET_NOT_SUPPORTED);
> >   break;
> > @@ -305,6 +334,22 @@ static bool ffa_handle_call(struct cpu_user_regs *regs)
> >get_user_reg(regs, 
> > 1)),
> >  get_user_reg(regs, 3));
> >   break;
> > +case FFA_NOTIFICATION_BIND:
> > +e = ffa_handle_notification_bind(regs);
> > +break;
> > +case FFA_NOTIFICATION_UNBIND:
> > +e = ffa_handle_notification_unbind(regs);
> > +break;
> > +case FFA_NOTIFICATION_INFO_GET_32:
> > +case FFA_NOTIFICATION_INFO_GET_64:
> > +ffa_handle_notification_info_get(regs);
> > +return true;
> > +case FFA_NOTIFICATION_GET:
> > +ffa_handle_notification_get(regs);
> > +return true;
> > +case FFA_NOTIFICATION_SET:
> > +e = ffa_handle_notification_set(regs);
> > +break;
> >
> >   default:
> >   gprintk(XENLOG_ERR, "ffa: unhandled fid 0x%x\n", fid);
> > @@ -322,6 +367,7 @@ static bool ffa_handle_call(struct cpu_user_regs *regs)
> >   static int ffa_domain_init(struct domain *d)
> >   {
> >   struct ffa_ctx *ctx;
> > +int ret;
> >
> >   if ( !ffa_version )
> >   return -ENODEV;
> > @@ -345,10 +391,11 @@ static int ffa_domain_init(struct domain *d)
> >* error, so no need for cleanup in this function.
> >*/
> >
> > -if ( !ffa_partinfo_domain_init(d) )
> > -return -EIO;
> > +ret = ffa_partinfo_domain_init(d);
> > +if ( ret )
>

Re: [PATCH v5 1/7] x86/msi: passthrough all MSI-X vector ctrl writes to device model

2024-04-29 Thread Roger Pau Monné

On Wed, Mar 13, 2024 at 04:16:06PM +0100, Marek Marczykowski-Górecki wrote:
> QEMU needs to know whether clearing maskbit of a vector is really
> clearing, or was already cleared before. Currently Xen sends only
> clearing that bit to the device model, but not setting it, so QEMU
> cannot detect it. Because of that, QEMU is working this around by
> checking via /dev/mem, but that isn't the proper approach.
> 
> Give all necessary information to QEMU by passing all ctrl writes,
> including masking a vector. Advertise the new behavior via
> XENVER_get_features, so QEMU can know it doesn't need to access /dev/mem
> anymore.
> 
> While this commit doesn't move the whole maskbit handling to QEMU (as
> discussed on xen-devel as one of the possibilities), it is a necessary
> first step anyway. Including telling QEMU it will get all the required
> information to do so. The actual implementation would need to include:
>  - a hypercall for QEMU to control just maskbit (without (re)binding the
>interrupt again
>  - a methor for QEMU to tell Xen it will actually do the work
   ^ method
> Those are not part of this series.
> 
> Signed-off-by: Marek Marczykowski-Górecki 
> ---
> I did not added any control to enable/disable this new behavior (as
> Roger have suggested for possible non-QEMU ioreqs). I don't see how the
> new behavior could be problematic for some existing ioreq server (they
> already received writes to those addresses, just not all of them),
> but if that's really necessary, I can probably add a command line option
> to restore previous behavior system-wide.

That's fine I guess, as you say such ioreq servers should already know
how to handle the ranges, and if anything the current behavior of
device models not receiving all accesses is likely the bogus (or
unexpected at least).

Acked-by: Roger Pau Monné 

Thanks, Roger.

Re: [PATCH] x86/cpu-policy: Annotate the accumulated features

2024-04-29 Thread Jan Beulich

On 26.04.2024 18:08, Andrew Cooper wrote:
> Some features need accumulating rather than intersecting to make migration
> safe.  Introduce the new '|' attribute for this purpose.
> 
> Right now, it's only used by the Xapi toolstack, but it will be used by
> xl/libxl when the full policy-object work is complete, and until then it's
> still a useful hint for hand-crafted cpuid= lines in vm.cfg files.
> 
> Signed-off-by: Andrew Cooper 

Reviewed-by: Jan Beulich 

The one feature you don't annotate such that I'm not entirely sure about is
NO_FPU_SEL: On one hand it tells code not to rely on / use the selector
fields in FPU state. That would be a reason to mark it here. Otoh the
specific behavior of storing zero of course won't occur on older hardware,
being an argument for having things the way you do. Perhaps a sentence or
two could be added to the description?

Jan

Re: [XEN PATCH v3 5/5] xen/arm: ffa: support notification

2024-04-29 Thread Bertrand Marquis

Hi Julien,

> On 26 Apr 2024, at 21:07, Julien Grall  wrote:
> 
> Hi Jens,
> 
> On 26/04/2024 09:47, Jens Wiklander wrote:
>> diff --git a/xen/arch/arm/irq.c b/xen/arch/arm/irq.c
>> index d7306aa64d50..5224898265a5 100644
>> --- a/xen/arch/arm/irq.c
>> +++ b/xen/arch/arm/irq.c
>> @@ -155,7 +155,7 @@ void __init init_IRQ(void)
>>  {
>>  /* SGIs are always edge-triggered */
>>  if ( irq < NR_GIC_SGI )
>> -local_irqs_type[irq] = DT_IRQ_TYPE_EDGE_RISING;
>> +local_irqs_type[irq] = IRQ_TYPE_EDGE_RISING;
> 
> This changes seems unrelated to this patch. This wants to be separate (if you 
> actually intended to change it).
> 
>>  else
>>  local_irqs_type[irq] = IRQ_TYPE_INVALID;
>>  }
>> diff --git a/xen/arch/arm/tee/Makefile b/xen/arch/arm/tee/Makefile
>> index f0112a2f922d..7c0f46f7f446 100644
>> --- a/xen/arch/arm/tee/Makefile
>> +++ b/xen/arch/arm/tee/Makefile
>> @@ -2,5 +2,6 @@ obj-$(CONFIG_FFA) += ffa.o
>>  obj-$(CONFIG_FFA) += ffa_shm.o
>>  obj-$(CONFIG_FFA) += ffa_partinfo.o
>>  obj-$(CONFIG_FFA) += ffa_rxtx.o
>> +obj-$(CONFIG_FFA) += ffa_notif.o
>>  obj-y += tee.o
>>  obj-$(CONFIG_OPTEE) += optee.o
>> diff --git a/xen/arch/arm/tee/ffa.c b/xen/arch/arm/tee/ffa.c
>> index 5209612963e1..912e905a27bd 100644
>> --- a/xen/arch/arm/tee/ffa.c
>> +++ b/xen/arch/arm/tee/ffa.c
>> @@ -39,6 +39,9 @@
>>   *   - at most 32 shared memory regions per guest
>>   * o FFA_MSG_SEND_DIRECT_REQ:
>>   *   - only supported from a VM to an SP
>> + * o FFA_NOTIFICATION_*:
>> + *   - only supports global notifications, that is, per vCPU notifications
>> + * are not supported
>>   *
>>   * There are some large locked sections with ffa_tx_buffer_lock and
>>   * ffa_rx_buffer_lock. Especially the ffa_tx_buffer_lock spinlock used
>> @@ -194,6 +197,8 @@ out:
>>static void handle_features(struct cpu_user_regs *regs)
>>  {
>> +struct domain *d = current->domain;
>> +struct ffa_ctx *ctx = d->arch.tee;
>>  uint32_t a1 = get_user_reg(regs, 1);
>>  unsigned int n;
>>  @@ -240,6 +245,30 @@ static void handle_features(struct cpu_user_regs *regs)
>>  BUILD_BUG_ON(PAGE_SIZE != FFA_PAGE_SIZE);
>>  ffa_set_regs_success(regs, 0, 0);
>>  break;
>> +case FFA_FEATURE_NOTIF_PEND_INTR:
>> +if ( ctx->notif.enabled )
>> +ffa_set_regs_success(regs, GUEST_FFA_NOTIF_PEND_INTR_ID, 0);
>> +else
>> +ffa_set_regs_error(regs, FFA_RET_NOT_SUPPORTED);
>> +break;
>> +case FFA_FEATURE_SCHEDULE_RECV_INTR:
>> +if ( ctx->notif.enabled )
>> +ffa_set_regs_success(regs, GUEST_FFA_SCHEDULE_RECV_INTR_ID, 0);
>> +else
>> +ffa_set_regs_error(regs, FFA_RET_NOT_SUPPORTED);
>> +break;
>> +
>> +case FFA_NOTIFICATION_BIND:
>> +case FFA_NOTIFICATION_UNBIND:
>> +case FFA_NOTIFICATION_GET:
>> +case FFA_NOTIFICATION_SET:
>> +case FFA_NOTIFICATION_INFO_GET_32:
>> +case FFA_NOTIFICATION_INFO_GET_64:
>> +if ( ctx->notif.enabled )
>> +ffa_set_regs_success(regs, 0, 0);
>> +else
>> +ffa_set_regs_error(regs, FFA_RET_NOT_SUPPORTED);
>> +break;
>>  default:
>>  ffa_set_regs_error(regs, FFA_RET_NOT_SUPPORTED);
>>  break;
>> @@ -305,6 +334,22 @@ static bool ffa_handle_call(struct cpu_user_regs *regs)
>>   get_user_reg(regs, 1)),
>> get_user_reg(regs, 3));
>>  break;
>> +case FFA_NOTIFICATION_BIND:
>> +e = ffa_handle_notification_bind(regs);
>> +break;
>> +case FFA_NOTIFICATION_UNBIND:
>> +e = ffa_handle_notification_unbind(regs);
>> +break;
>> +case FFA_NOTIFICATION_INFO_GET_32:
>> +case FFA_NOTIFICATION_INFO_GET_64:
>> +ffa_handle_notification_info_get(regs);
>> +return true;
>> +case FFA_NOTIFICATION_GET:
>> +ffa_handle_notification_get(regs);
>> +return true;
>> +case FFA_NOTIFICATION_SET:
>> +e = ffa_handle_notification_set(regs);
>> +break;
>>default:
>>  gprintk(XENLOG_ERR, "ffa: unhandled fid 0x%x\n", fid);
>> @@ -322,6 +367,7 @@ static bool ffa_handle_call(struct cpu_user_regs *regs)
>>  static int ffa_domain_init(struct domain *d)
>>  {
>>  struct ffa_ctx *ctx;
>> +int ret;
>>if ( !ffa_version )
>>  return -ENODEV;
>> @@ -345,10 +391,11 @@ static int ffa_domain_init(struct domain *d)
>>   * error, so no need for cleanup in this function.
>>   */
>>  -if ( !ffa_partinfo_domain_init(d) )
>> -return -EIO;
>> +ret = ffa_partinfo_domain_init(d);
>> +if ( ret )
>> +return ret;
>>  -return 0;
>> +return ffa_notif_domain_init(d);
>>  }
>>static void ffa_domain_teardown_continue(struct ffa_ctx *ctx, bool 
>> first_time)
>> @@ -423,6 +470,7 @@ static int ffa_domain_teardown(struct domain *d)
>>  return

Re: MISRA and -Wextra-semi

2024-04-29 Thread Jan Beulich

On 26.04.2024 13:33, Andrew Cooper wrote:
> Hi,
> 
> Based on a call a long while back, I experimented with -Wextra-semi. 
> This is what lead to 8e36c668ca107 "xen: Drop superfluous semi-colons".
> 
> However, there are a number of problems with getting this working
> fully.  First, we need workarounds like this:
> 
> diff --git a/xen/include/xen/config.h b/xen/include/xen/config.h
> index d888b2314daf..12e99c6dded4 100644
> --- a/xen/include/xen/config.h
> +++ b/xen/include/xen/config.h
> @@ -26,7 +26,7 @@
>  
>  #include 
>  
> -#define EXPORT_SYMBOL(var)
> +#define EXPORT_SYMBOL(var) typedef int var##_ignore_t

For this specifically, could we perhaps finally get rid of most (all?)
EXPORT_SYMBOL()? If not all, then at least as far as permitting the
stub #define to be moved to linux-compat.h?

Jan

Re: [PATCH v2 1/7] x86/p2m: Add braces for better code clarity

2024-04-29 Thread Jan Beulich

On 28.04.2024 18:52, Petr Beneš wrote:
> From: Petr Beneš 
> 
> No functional change.
> 
> Signed-off-by: Petr Beneš 

Where did Stefano's R-b go?

Jan

Re: [PATCH 07/15] xen/overlay: Enable device tree overlay assignment to running domains

2024-04-29 Thread Jan Beulich

On 29.04.2024 05:36, Henry Wang wrote:
> Hi Jan, Julien, Stefano,
> 
> On 4/24/2024 2:05 PM, Jan Beulich wrote:
>> On 24.04.2024 05:34, Henry Wang wrote:
>>> --- a/xen/include/public/sysctl.h
>>> +++ b/xen/include/public/sysctl.h
>>> @@ -1197,7 +1197,9 @@ struct xen_sysctl_dt_overlay {
>>>   #define XEN_SYSCTL_DT_OVERLAY_ADD   1
>>>   #define XEN_SYSCTL_DT_OVERLAY_REMOVE2
>>>   uint8_t overlay_op; /* IN: Add or remove. */
>>> -uint8_t pad[3]; /* IN: Must be zero. */
>>> +bool domain_mapping;/* IN: True of False. */
>>> +uint8_t pad[2]; /* IN: Must be zero. */
>>> +uint32_t domain_id;
>>>   };
>> If you merely re-purposed padding fields, all would be fine without
>> bumping the interface version. Yet you don't, albeit for an unclear
>> reason: Why uint32_t rather than domid_t? And on top of that - why a
>> separate boolean when you could use e.g. DOMID_INVALID to indicate
>> "no domain mapping"?
> 
> I think both of your suggestion make great sense. I will follow the 
> suggestion in v2.
> 
>> That said - anything taking a domain ID is certainly suspicious in a
>> sysctl. Judging from the description you really mean this to be a
>> domctl. Anything else will require extra justification.
> 
> I also think a domctl is better. I had a look at the history of the 
> already merged series, it looks like in the first version of merged part 
> 1 [1], the hypercall was implemented as the domctl in the beginning but 
> later in v2 changed to sysctl. I think this makes sense as the scope of 
> that time is just to make Xen aware of the device tree node via Xen 
> device tree.
> 
> However this is now a problem for the current part where the scope (and 
> the end goal) is extended to assign the added device to Linux Dom0/DomU 
> via device tree overlays. I am not sure which way is better, should we 
> repurposing the sysctl to domctl or maybe add another domctl (I am 
> worrying about the duplication because basically we need the same sysctl 
> functionality but now with a domid in it)? What do you think?

I'm taking it that what is a sysctl right now legitimately is. Therefore
folding both into domctl would at least be bending the rules of what
should be sysctl and what domctl. It would need clarifying what (fake)
domain such a (folded) domctl ought to operate on for the case that's
currently a sysctl. That then may (or may not) be justification for such
folding.

Jan

> @Stefano: Since I am not 100% if I understand the whole story behind 
> this feature, would you mind checking if I am providing correct 
> information above and sharing your opinions on this? Thank you very much!
> 
> [1] 
> https://lore.kernel.org/xen-devel/13240b69-f7bb-6a64-b89c-b7c2cbb7e...@xen.org/
> 
> Kind regards,
> Henry
> 
>> Jan
>

Re: [PATCH v5 3/7] x86/hvm: Allow access to registers on the same page as MSI-X table

2024-04-29 Thread Jan Beulich

On 26.04.2024 17:26, Marek Marczykowski-Górecki wrote:
> On Thu, Apr 25, 2024 at 01:15:34PM +0200, Jan Beulich wrote:
>> On 13.03.2024 16:16, Marek Marczykowski-Górecki wrote:
>>> +hwaddr = fix_to_virt(fixmap_idx) + PAGE_OFFSET(address);
>>> +
>>> +switch ( len )
>>> +{
>>> +case 1:
>>> +*pval = readb(hwaddr);
>>> +break;
>>> +
>>> +case 2:
>>> +*pval = readw(hwaddr);
>>> +break;
>>> +
>>> +case 4:
>>> +*pval = readl(hwaddr);
>>> +break;
>>> +
>>> +case 8:
>>> +*pval = readq(hwaddr);
>>> +break;
>>> +
>>> +default:
>>> +ASSERT_UNREACHABLE();
>>
>> Misra demands "break;" to be here for release builds. In fact I wonder
>> why "*pval = ~0UL;" isn't put here, too. Question of course is whether
>> in such a case a true error indicator wouldn't be yet better.
> 
> I don't think it possible for the msixtbl_read() (that calls
> adjacent_read()) to be called with other sizes.

I agree, but scanners won't know.

Jan

> The default label is here exactly to make it obvious for the reader.
>

Re: [PATCH] xen/spinlock: use correct pointer

2024-04-29 Thread Jan Beulich

On 26.04.2024 16:33, Stewart Hildebrand wrote:
> On 4/26/24 02:31, Jan Beulich wrote:
>> On 25.04.2024 22:45, Stewart Hildebrand wrote:
>>> The ->profile member is at different offsets in struct rspinlock and
>>> struct spinlock. When initializing the profiling bits of an rspinlock,
>>> an unrelated member in struct rspinlock was being overwritten, leading
>>> to mild havoc. Use the correct pointer.
>>>
>>> Fixes: b053075d1a7b ("xen/spinlock: make struct lock_profile rspinlock_t 
>>> aware")
>>> Signed-off-by: Stewart Hildebrand 
>>
>> Reviewed-by: Jan Beulich 
> 
> Thanks!
> 
>>
>>> --- a/xen/common/spinlock.c
>>> +++ b/xen/common/spinlock.c
>>> @@ -789,7 +789,11 @@ static int __init cf_check lock_prof_init(void)
>>>  {
>>>  (*q)->next = lock_profile_glb_q.elem_q;
>>>  lock_profile_glb_q.elem_q = *q;
>>> -(*q)->ptr.lock->profile = *q;
>>> +
>>> +if ( (*q)->is_rlock )
>>> +(*q)->ptr.rlock->profile = *q;
>>> +else
>>> +(*q)->ptr.lock->profile = *q;
>>>  }
>>>  
>>>  _lock_profile_register_struct(LOCKPROF_TYPE_GLOBAL,
>>
>> Just to mention it: Strictly speaking spinlock_profile_print_elem()'s
>>
>> printk("%s: addr=%p, lockval=%08x, ", data->name, data->ptr.lock, 
>> lockval);
>>
>> isn't quite right either (and I would be surprised if Misra didn't have
>> to say something about it).
> 
> I'd be happy to send a patch for that instance, too. Would you like a
> Reported-by: tag?

I'm inclined to say no, not worth it, but it's really up to you. In fact
I'm not sure we need to change that; it all depends on whether ...

> That patch would look something like:
> 
> --- a/xen/common/spinlock.c
> +++ b/xen/common/spinlock.c
> @@ -637,22 +637,25 @@ static void cf_check spinlock_profile_print_elem(struct 
> lock_profile *data,
>  {
>  unsigned int cpu;
>  unsigned int lockval;
> +void *lockaddr;
>  
>  if ( data->is_rlock )
>  {
>  cpu = data->ptr.rlock->debug.cpu;
>  lockval = data->ptr.rlock->tickets.head_tail;
> +lockaddr = data->ptr.rlock;
>  }
>  else
>  {
>  cpu = data->ptr.lock->debug.cpu;
>  lockval = data->ptr.lock->tickets.head_tail;
> +lockaddr = data->ptr.lock;
>  }
>  
>  printk("%s ", lock_profile_ancs[type].name);
>  if ( type != LOCKPROF_TYPE_GLOBAL )
>  printk("%d ", idx);
> -printk("%s: addr=%p, lockval=%08x, ", data->name, data->ptr.lock, 
> lockval);
> +printk("%s: addr=%p, lockval=%08x, ", data->name, lockaddr, lockval);
>  if ( cpu == SPINLOCK_NO_CPU )
>  printk("not locked\n");
>  else
> 
> 
> That case is benign since the pointer is not dereferenced. So the
> rationale would primarily be for consistency (and possibly satisfying
> Misra).

... Misra takes issue with the "wrong" member of a union being used,
which iirc is UB, but which I'm afraid elsewhere we do all the time.

Jan

99 matches

Mail list logo