[linux-5.4 test] 184334: regressions - FAIL

2024-01-12 Thread osstest service owner
flight 184334 linux-5.4 real [real]
http://logs.test-lab.xenproject.org/osstest/logs/184334/

Regressions :-(

Tests which did not succeed and are blocking,
including tests which could not be run:
 test-armhf-armhf-xl-multivcpu 18 guest-start/debian.repeat fail REGR. vs. 
184192

Tests which are failing intermittently (not blocking):
 test-arm64-arm64-libvirt-raw 17 guest-start/debian.repeat fail in 184327 pass 
in 184334
 test-armhf-armhf-libvirt-raw 17 guest-start/debian.repeat fail in 184327 pass 
in 184334
 test-armhf-armhf-xl-vhd  13 guest-start  fail in 184327 pass in 184334
 test-amd64-i386-xl-qemuu-ws16-amd64 12 windows-install fail in 184327 pass in 
184334
 test-armhf-armhf-xl  18 guest-start/debian.repeat  fail pass in 184327

Regressions which are regarded as allowable (not blocking):
 test-armhf-armhf-xl-rtds18 guest-start/debian.repeat fail REGR. vs. 184192

Tests which did not succeed, but are not blocking:
 test-armhf-armhf-xl-credit1 18 guest-start/debian.repeat fail blocked in 184192
 test-armhf-armhf-libvirt-qcow2 15 saverestore-support-check fail in 184327 
blocked in 184192
 test-armhf-armhf-xl-credit1  14 guest-start fail in 184327 like 184192
 test-armhf-armhf-xl-arndale 18 guest-start/debian.repeat fail in 184327 like 
184192
 test-armhf-armhf-libvirt-qcow2 14 migrate-support-check fail in 184327 never 
pass
 test-armhf-armhf-libvirt 16 saverestore-support-checkfail  like 184192
 test-amd64-amd64-xl-qemuu-win7-amd64 19 guest-stopfail like 184192
 test-amd64-i386-xl-qemut-win7-amd64 19 guest-stop fail like 184192
 test-amd64-i386-xl-qemuu-win7-amd64 19 guest-stop fail like 184192
 test-amd64-amd64-xl-qemut-win7-amd64 19 guest-stopfail like 184192
 test-armhf-armhf-xl-credit2  18 guest-start/debian.repeatfail  like 184192
 test-amd64-amd64-xl-qemuu-ws16-amd64 19 guest-stopfail like 184192
 test-amd64-amd64-xl-qemut-ws16-amd64 19 guest-stopfail like 184192
 test-amd64-i386-xl-qemut-ws16-amd64 19 guest-stop fail like 184192
 test-armhf-armhf-libvirt-raw 15 saverestore-support-checkfail  like 184192
 test-armhf-armhf-libvirt-qcow2 13 guest-start fail like 184192
 test-amd64-i386-xl-qemuu-ws16-amd64 19 guest-stop fail like 184192
 test-amd64-amd64-qemuu-nested-amd 20 debian-hvm-install/l1/l2 fail like 184192
 test-amd64-i386-xl-pvshim14 guest-start  fail   never pass
 test-amd64-amd64-libvirt-xsm 15 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt 15 migrate-support-checkfail   never pass
 test-amd64-i386-libvirt-xsm  15 migrate-support-checkfail   never pass
 test-amd64-i386-libvirt  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl  15 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-xsm 15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-libvirt-xsm 16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-credit1  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-credit1  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-credit2  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-credit2  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-thunderx 15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-thunderx 16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-multivcpu 15 migrate-support-checkfail  never pass
 test-armhf-armhf-xl-multivcpu 16 saverestore-support-checkfail  never pass
 test-armhf-armhf-xl-rtds 15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-rtds 16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-credit1  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit1  16 saverestore-support-checkfail   never pass
 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check 
fail never pass
 test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check 
fail never pass
 test-armhf-armhf-xl-arndale  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-arndale  16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-credit2  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit2  16 saverestore-support-checkfail   never pass
 test-amd64-amd64-libvirt-vhd 14 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  16 saverestore-support-checkfail   n

[linux-linus test] 184332: regressions - FAIL

2024-01-12 Thread osstest service owner
flight 184332 linux-linus real [real]
flight 184336 linux-linus real-retest [real]
http://logs.test-lab.xenproject.org/osstest/logs/184332/
http://logs.test-lab.xenproject.org/osstest/logs/184336/

Regressions :-(

Tests which did not succeed and are blocking,
including tests which could not be run:
 test-arm64-arm64-xl-xsm  12 debian-install   fail REGR. vs. 184270
 test-arm64-arm64-xl-credit1  12 debian-install   fail REGR. vs. 184270
 test-arm64-arm64-xl  12 debian-install   fail REGR. vs. 184270
 test-arm64-arm64-xl-thunderx 12 debian-install   fail REGR. vs. 184270
 test-arm64-arm64-xl-credit2  12 debian-install   fail REGR. vs. 184270
 test-arm64-arm64-libvirt-xsm 12 debian-install   fail REGR. vs. 184270

Tests which are failing intermittently (not blocking):
 test-armhf-armhf-xl-vhd  13 guest-start fail pass in 184336-retest

Tests which did not succeed, but are not blocking:
 test-armhf-armhf-xl-vhd 14 migrate-support-check fail in 184336 never pass
 test-armhf-armhf-xl-vhd 15 saverestore-support-check fail in 184336 never pass
 test-armhf-armhf-libvirt 16 saverestore-support-checkfail  like 184270
 test-amd64-amd64-xl-qemut-win7-amd64 19 guest-stopfail like 184270
 test-amd64-amd64-xl-qemuu-win7-amd64 19 guest-stopfail like 184270
 test-amd64-amd64-xl-qemuu-ws16-amd64 19 guest-stopfail like 184270
 test-armhf-armhf-libvirt-raw 15 saverestore-support-checkfail  like 184270
 test-amd64-amd64-xl-qemut-ws16-amd64 19 guest-stopfail like 184270
 test-armhf-armhf-libvirt-qcow2 15 saverestore-support-check   fail like 184270
 test-amd64-amd64-qemuu-nested-amd 20 debian-hvm-install/l1/l2 fail like 184270
 test-amd64-amd64-libvirt 15 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt-xsm 15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-multivcpu 15 migrate-support-checkfail  never pass
 test-armhf-armhf-xl-multivcpu 16 saverestore-support-checkfail  never pass
 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check 
fail never pass
 test-armhf-armhf-libvirt 15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-arndale  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-arndale  16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-rtds 15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-rtds 16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-credit1  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit1  16 saverestore-support-checkfail   never pass
 test-amd64-amd64-libvirt-qcow2 14 migrate-support-checkfail never pass
 test-armhf-armhf-xl-credit2  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit2  16 saverestore-support-checkfail   never pass
 test-amd64-amd64-libvirt-raw 14 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-raw 14 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-raw 15 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-vhd  14 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-vhd  15 saverestore-support-checkfail   never pass
 test-armhf-armhf-libvirt-raw 14 migrate-support-checkfail   never pass
 test-armhf-armhf-libvirt-qcow2 14 migrate-support-checkfail never pass

version targeted for testing:
 linux70d201a40823acba23899342d62bc2644051ad2e
baseline version:
 linux0dd3ee31125508cd67f7e7172247f05b7fd1753a

Last test of basis   184270  2024-01-07 20:42:19 Z5 days
Failing since184283  2024-01-08 20:10:43 Z4 days7 attempts
Testing same since   184332  2024-01-12 10:15:58 Z0 days1 attempts


1200 people touched revisions under test,
not listing them all

jobs:
 build-amd64-xsm  pass
 build-arm64-xsm  pass
 build-i386-xsm   pass
 build-amd64  pass
 build-arm64  pass
 build-armhf  pass
 build-i386   pass
 build-amd64-libvirt  pass
 build-arm64-libvirt  pass
 build-armhf-libvirt  pass
 build-i386-libvirt   pass 

Re: [PATCH 0/4] x86emul: support further AVX extensions

2024-01-12 Thread Andrew Cooper
On 07/08/2023 4:18 pm, Jan Beulich wrote:
> Covers the smaller recently announced extensions, but not AVX10 (and
> even less so APX). Obviously CPUID aspects are taken care of alongside
> the actual emulator additions.
>
> 1: support AVX-VNNI-INT16
> 2: support SHA512
> 3: support SM3
> 4: support SM4

Reviewed-by: Andrew Cooper 

Sorry this took so long.  I think this is the 3rd time I've decided this
was all fine to go in, and clearly the first time I've written an email
to that effect.

~Andrew



[PATCH v12.1 09/15] vpci/header: program p2m with guest BAR view

2024-01-12 Thread Stewart Hildebrand
From: Oleksandr Andrushchenko 

Take into account guest's BAR view and program its p2m accordingly:
gfn is guest's view of the BAR and mfn is the physical BAR value.
This way hardware domain sees physical BAR values and guest sees
emulated ones.

Hardware domain continues getting the BARs identity mapped, while for
domUs the BARs are mapped at the requested guest address without
modifying the BAR address in the device PCI config space.

Signed-off-by: Oleksandr Andrushchenko 
Signed-off-by: Volodymyr Babchuk 
Signed-off-by: Stewart Hildebrand 
Reviewed-by: Roger Pau Monné 
---
In v12.1:
- ASSERT(rangeset_is_empty()) in modify_bars()
- Fixup print format in modify_bars()
- Make comment single line in bar_write()
- Add Roger's R-b
In v12:
- Update guest_addr in rom_write()
- Use unsigned long for start_mfn and map_mfn to reduce mfn_x() calls
- Use existing vmsix_table_*() functions
- Change vmsix_table_base() to use .guest_addr
In v11:
- Add vmsix_guest_table_addr() and vmsix_guest_table_base() functions
  to access guest's view of the VMSIx tables.
- Use MFN (not GFN) to check access permissions
- Move page offset check to this patch
- Call rangeset_remove_range() with correct parameters
In v10:
- Moved GFN variable definition outside the loop in map_range()
- Updated printk error message in map_range()
- Now BAR address is always stored in bar->guest_addr, even for
  HW dom, this removes bunch of ugly is_hwdom() checks in modify_bars()
- vmsix_table_base() now uses .guest_addr instead of .addr
In v9:
- Extended the commit message
- Use bar->guest_addr in modify_bars
- Extended printk error message in map_range
- Moved map_data initialization so .bar can be initialized during declaration
Since v5:
- remove debug print in map_range callback
- remove "identity" from the debug print
Since v4:
- moved start_{gfn|mfn} calculation into map_range
- pass vpci_bar in the map_data instead of start_{gfn|mfn}
- s/guest_addr/guest_reg
Since v3:
- updated comment (Roger)
- removed gfn_add(map->start_gfn, rc); which is wrong
- use v->domain instead of v->vpci.pdev->domain
- removed odd e.g. in comment
- s/d%d/%pd in altered code
- use gdprintk for map/unmap logs
Since v2:
- improve readability for data.start_gfn and restructure ?: construct
Since v1:
 - s/MSI/MSI-X in comments
---
 xen/drivers/vpci/header.c | 80 ++-
 xen/include/xen/vpci.h|  3 +-
 2 files changed, 65 insertions(+), 18 deletions(-)

diff --git a/xen/drivers/vpci/header.c b/xen/drivers/vpci/header.c
index feccd070ddd0..f1e366bb5fc6 100644
--- a/xen/drivers/vpci/header.c
+++ b/xen/drivers/vpci/header.c
@@ -34,6 +34,7 @@
 
 struct map_data {
 struct domain *d;
+const struct vpci_bar *bar;
 bool map;
 };
 
@@ -41,13 +42,24 @@ static int cf_check map_range(
 unsigned long s, unsigned long e, void *data, unsigned long *c)
 {
 const struct map_data *map = data;
+/* Start address of the BAR as seen by the guest. */
+unsigned long start_gfn = PFN_DOWN(map->bar->guest_addr);
+/* Physical start address of the BAR. */
+unsigned long start_mfn = PFN_DOWN(map->bar->addr);
 int rc;
 
 for ( ; ; )
 {
 unsigned long size = e - s + 1;
+/*
+ * Ranges to be mapped don't always start at the BAR start address, as
+ * there can be holes or partially consumed ranges. Account for the
+ * offset of the current address from the BAR start.
+ */
+unsigned long map_mfn = start_mfn + s - start_gfn;
+unsigned long m_end = map_mfn + size - 1;
 
-if ( !iomem_access_permitted(map->d, s, e) )
+if ( !iomem_access_permitted(map->d, map_mfn, m_end) )
 {
 printk(XENLOG_G_WARNING
"%pd denied access to MMIO range [%#lx, %#lx]\n",
@@ -55,7 +67,8 @@ static int cf_check map_range(
 return -EPERM;
 }
 
-rc = xsm_iomem_mapping(XSM_HOOK, map->d, s, e, map->map);
+rc = xsm_iomem_mapping(XSM_HOOK, map->d, map_mfn, m_end,
+   map->map);
 if ( rc )
 {
 printk(XENLOG_G_WARNING
@@ -73,8 +86,8 @@ static int cf_check map_range(
  * - {un}map_mmio_regions doesn't support preemption.
  */
 
-rc = map->map ? map_mmio_regions(map->d, _gfn(s), size, _mfn(s))
-  : unmap_mmio_regions(map->d, _gfn(s), size, _mfn(s));
+rc = map->map ? map_mmio_regions(map->d, _gfn(s), size, _mfn(map_mfn))
+  : unmap_mmio_regions(map->d, _gfn(s), size, 
_mfn(map_mfn));
 if ( rc == 0 )
 {
 *c += size;
@@ -83,8 +96,9 @@ static int cf_check map_range(
 if ( rc < 0 )
 {
 printk(XENLOG_G_WARNING
-   "Failed to identity %smap [%lx, %lx] for d%d: %d\n",
-   map->map ? "" : "un", s, e, map->d->domain_id, rc);
+   "Failed to %smap [%lx %lx] -> [%lx %lx] for %pd: %d\n",
+ 

Re: [PATCH v12 09/15] vpci/header: program p2m with guest BAR view

2024-01-12 Thread Stewart Hildebrand
On 1/12/24 10:06, Roger Pau Monné wrote:
> On Tue, Jan 09, 2024 at 04:51:24PM -0500, Stewart Hildebrand wrote:
>> From: Oleksandr Andrushchenko 
>>
>> Take into account guest's BAR view and program its p2m accordingly:
>> gfn is guest's view of the BAR and mfn is the physical BAR value.
>> This way hardware domain sees physical BAR values and guest sees
>> emulated ones.
>>
>> Hardware domain continues getting the BARs identity mapped, while for
>> domUs the BARs are mapped at the requested guest address without
>> modifying the BAR address in the device PCI config space.
>>
>> Signed-off-by: Oleksandr Andrushchenko 
>> Signed-off-by: Volodymyr Babchuk 
>> Signed-off-by: Stewart Hildebrand 
> 
> Some nits and a request to add an extra assert. If you agree:
> 
> Reviewed-by: Roger Pau Monné 

Thanks! I agree. I'll reply to this with a v12.1 patch.

> 
>> ---
>> In v12:
>> - Update guest_addr in rom_write()
>> - Use unsigned long for start_mfn and map_mfn to reduce mfn_x() calls
>> - Use existing vmsix_table_*() functions
>> - Change vmsix_table_base() to use .guest_addr
>> In v11:
>> - Add vmsix_guest_table_addr() and vmsix_guest_table_base() functions
>>   to access guest's view of the VMSIx tables.
>> - Use MFN (not GFN) to check access permissions
>> - Move page offset check to this patch
>> - Call rangeset_remove_range() with correct parameters
>> In v10:
>> - Moved GFN variable definition outside the loop in map_range()
>> - Updated printk error message in map_range()
>> - Now BAR address is always stored in bar->guest_addr, even for
>>   HW dom, this removes bunch of ugly is_hwdom() checks in modify_bars()
>> - vmsix_table_base() now uses .guest_addr instead of .addr
>> In v9:
>> - Extended the commit message
>> - Use bar->guest_addr in modify_bars
>> - Extended printk error message in map_range
>> - Moved map_data initialization so .bar can be initialized during declaration
>> Since v5:
>> - remove debug print in map_range callback
>> - remove "identity" from the debug print
>> Since v4:
>> - moved start_{gfn|mfn} calculation into map_range
>> - pass vpci_bar in the map_data instead of start_{gfn|mfn}
>> - s/guest_addr/guest_reg
>> Since v3:
>> - updated comment (Roger)
>> - removed gfn_add(map->start_gfn, rc); which is wrong
>> - use v->domain instead of v->vpci.pdev->domain
>> - removed odd e.g. in comment
>> - s/d%d/%pd in altered code
>> - use gdprintk for map/unmap logs
>> Since v2:
>> - improve readability for data.start_gfn and restructure ?: construct
>> Since v1:
>>  - s/MSI/MSI-X in comments
>> ---
>>  xen/drivers/vpci/header.c | 81 +++
>>  xen/include/xen/vpci.h|  3 +-
>>  2 files changed, 66 insertions(+), 18 deletions(-)
>>
>> diff --git a/xen/drivers/vpci/header.c b/xen/drivers/vpci/header.c
>> index feccd070ddd0..f0b0b64b0929 100644
>> --- a/xen/drivers/vpci/header.c
>> +++ b/xen/drivers/vpci/header.c
>> @@ -34,6 +34,7 @@
>>  
>>  struct map_data {
>>  struct domain *d;
>> +const struct vpci_bar *bar;
>>  bool map;
>>  };
>>  
>> @@ -41,13 +42,24 @@ static int cf_check map_range(
>>  unsigned long s, unsigned long e, void *data, unsigned long *c)
>>  {
>>  const struct map_data *map = data;
>> +/* Start address of the BAR as seen by the guest. */
>> +unsigned long start_gfn = PFN_DOWN(map->bar->guest_addr);
>> +/* Physical start address of the BAR. */
>> +unsigned long start_mfn = PFN_DOWN(map->bar->addr);
>>  int rc;
>>  
>>  for ( ; ; )
>>  {
>>  unsigned long size = e - s + 1;
>> +/*
>> + * Ranges to be mapped don't always start at the BAR start address, 
>> as
>> + * there can be holes or partially consumed ranges. Account for the
>> + * offset of the current address from the BAR start.
>> + */
>> +unsigned long map_mfn = start_mfn + s - start_gfn;
>> +unsigned long m_end = map_mfn + size - 1;
>>  
>> -if ( !iomem_access_permitted(map->d, s, e) )
>> +if ( !iomem_access_permitted(map->d, map_mfn, m_end) )
>>  {
>>  printk(XENLOG_G_WARNING
>> "%pd denied access to MMIO range [%#lx, %#lx]\n",
>> @@ -55,7 +67,8 @@ static int cf_check map_range(
>>  return -EPERM;
>>  }
>>  
>> -rc = xsm_iomem_mapping(XSM_HOOK, map->d, s, e, map->map);
>> +rc = xsm_iomem_mapping(XSM_HOOK, map->d, map_mfn, m_end,
>> +   map->map);
>>  if ( rc )
>>  {
>>  printk(XENLOG_G_WARNING
>> @@ -73,8 +86,8 @@ static int cf_check map_range(
>>   * - {un}map_mmio_regions doesn't support preemption.
>>   */
>>  
>> -rc = map->map ? map_mmio_regions(map->d, _gfn(s), size, _mfn(s))
>> -  : unmap_mmio_regions(map->d, _gfn(s), size, _mfn(s));
>> +rc = map->map ? map_mmio_regions(map->d, _gfn(s), size, 
>> _mfn(map_mfn))
>> +  : unmap_mmio_regions(map->d, _g

[PATCH] xen/xenbus: document will_handle argument for xenbus_watch_path()

2024-01-12 Thread SeongJae Park
Commit 2e85d32b1c86 ("xen/xenbus: Add 'will_handle' callback support in
xenbus_watch_path()") added will_handle argument to xenbus_watch_path()
and its wrapper, xenbus_watch_pathfmt(), but didn't document it on the
kerneldoc comments of the function.  This is causing warnings that
reported by kernel test robot.  Add the documentation to fix it.

Fixes: 2e85d32b1c86 ("xen/xenbus: Add 'will_handle' callback support in 
xenbus_watch_path()")
Reported-by: kernel test robot 
Closes: 
https://lore.kernel.org/oe-kbuild-all/202401121154.fi8jdgun-...@intel.com/
Signed-off-by: SeongJae Park 
---
 drivers/xen/xenbus/xenbus_client.c | 26 +++---
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/drivers/xen/xenbus/xenbus_client.c 
b/drivers/xen/xenbus/xenbus_client.c
index d4b251925796..d539210b39d8 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -116,14 +116,16 @@ EXPORT_SYMBOL_GPL(xenbus_strstate);
  * @dev: xenbus device
  * @path: path to watch
  * @watch: watch to register
+ * @will_handle: events queuing determine callback
  * @callback: callback to register
  *
  * Register a @watch on the given path, using the given xenbus_watch structure
- * for storage, and the given @callback function as the callback.  Return 0 on
- * success, or -errno on error.  On success, the given @path will be saved as
- * @watch->node, and remains the caller's to free.  On error, @watch->node will
- * be NULL, the device will switch to %XenbusStateClosing, and the error will
- * be saved in the store.
+ * for storage, @will_handle function as the callback to determine if each
+ * event need to be queued, and the given @callback function as the callback.
+ * Return 0 on success, or -errno on error.  On success, the given @path will
+ * be saved as @watch->node, and remains the caller's to free.  On error,
+ * @watch->node will be NULL, the device will switch to %XenbusStateClosing,
+ * and the error will be saved in the store.
  */
 int xenbus_watch_path(struct xenbus_device *dev, const char *path,
  struct xenbus_watch *watch,
@@ -156,16 +158,18 @@ EXPORT_SYMBOL_GPL(xenbus_watch_path);
  * xenbus_watch_pathfmt - register a watch on a sprintf-formatted path
  * @dev: xenbus device
  * @watch: watch to register
+ * @will_handle: events queuing determine callback
  * @callback: callback to register
  * @pathfmt: format of path to watch
  *
  * Register a watch on the given @path, using the given xenbus_watch
- * structure for storage, and the given @callback function as the callback.
- * Return 0 on success, or -errno on error.  On success, the watched path
- * (@path/@path2) will be saved as @watch->node, and becomes the caller's to
- * kfree().  On error, watch->node will be NULL, so the caller has nothing to
- * free, the device will switch to %XenbusStateClosing, and the error will be
- * saved in the store.
+ * structure for storage, @will_handle function as the callback to determine if
+ * each event need to be queued, and the given @callback function as the
+ * callback.  Return 0 on success, or -errno on error.  On success, the watched
+ * path (@path/@path2) will be saved as @watch->node, and becomes the caller's
+ * to kfree().  On error, watch->node will be NULL, so the caller has nothing
+ * to free, the device will switch to %XenbusStateClosing, and the error will
+ * be saved in the store.
  */
 int xenbus_watch_pathfmt(struct xenbus_device *dev,
 struct xenbus_watch *watch,
-- 
2.39.2




[PATCH v12.1 01/15] vpci: use per-domain PCI lock to protect vpci structure

2024-01-12 Thread Stewart Hildebrand
From: Oleksandr Andrushchenko 

Use the per-domain PCI read/write lock to protect the presence of the
pci device vpci field. This lock can be used (and in a few cases is used
right away) so that vpci removal can be performed while holding the lock
in write mode. Previously such removal could race with vpci_read for
example.

When taking both d->pci_lock and pdev->vpci->lock, they should be
taken in this exact order: d->pci_lock then pdev->vpci->lock to avoid
possible deadlock situations.

1. Per-domain's pci_lock is used to protect pdev->vpci structure
from being removed.

2. Writing the command register and ROM BAR register may trigger
modify_bars to run, which in turn may access multiple pdevs while
checking for the existing BAR's overlap. The overlapping check, if
done under the read lock, requires vpci->lock to be acquired on both
devices being compared, which may produce a deadlock. It is not
possible to upgrade read lock to write lock in such a case. So, in
order to prevent the deadlock, use d->pci_lock in write mode instead.

All other code, which doesn't lead to pdev->vpci destruction and does
not access multiple pdevs at the same time, can still use a
combination of the read lock and pdev->vpci->lock.

3. Drop const qualifier where the new rwlock is used and this is
appropriate.

4. Do not call process_pending_softirqs with any locks held. For that
unlock prior the call and re-acquire the locks after. After
re-acquiring the lock there is no need to check if pdev->vpci exists:
 - in apply_map because of the context it is called (no race condition
   possible)
 - for MSI/MSI-X debug code because it is called at the end of
   pdev->vpci access and no further access to pdev->vpci is made

5. Use d->pci_lock around for_each_pdev and pci_get_pdev_by_domain
while accessing pdevs in vpci code.

Suggested-by: Roger Pau Monné 
Suggested-by: Jan Beulich 
Signed-off-by: Oleksandr Andrushchenko 
Signed-off-by: Volodymyr Babchuk 
Signed-off-by: Stewart Hildebrand 
Reviewed-by: Roger Pau Monné 
---
Changes in v12.1:
 - use read_trylock() in vpci_msix_arch_print()
 - fixup in-code comments (revert double space, use DomXEN) in
   vpci_{read,write}()
 - minor updates in commit message
 - add Roger's R-b

Changes in v12:
 - s/pci_rwlock/pci_lock/ in commit message
 - expand comment about scope of pci_lock in sched.h
 - in vpci_{read,write}, if hwdom is trying to access a device assigned
   to dom_xen, holding hwdom->pci_lock is sufficient (no need to hold
   dom_xen->pci_lock)
 - reintroduce ASSERT in vmx_pi_update_irte()
 - reintroduce ASSERT in __pci_enable_msi{x}()
 - delete note 6. in commit message about removing ASSERTs since we have
   reintroduced them

Changes in v11:
 - Fixed commit message regarding possible spinlocks
 - Removed parameter from allocate_and_map_msi_pirq(), which was added
 in the prev version. Now we are taking pcidevs_lock in
 physdev_map_pirq()
 - Returned ASSERT to pci_enable_msi
 - Fixed case when we took read lock instead of write one
 - Fixed label indentation

Changes in v10:
 - Moved printk pas locked area
 - Returned back ASSERTs
 - Added new parameter to allocate_and_map_msi_pirq() so it knows if
 it should take the global pci lock
 - Added comment about possible improvement in vpci_write
 - Changed ASSERT(rw_is_locked()) to rw_is_write_locked() in
   appropriate places
 - Renamed release_domain_locks() to release_domain_write_locks()
 - moved domain_done label in vpci_dump_msi() to correct place
Changes in v9:
 - extended locked region to protect vpci_remove_device and
   vpci_add_handlers() calls
 - vpci_write() takes lock in the write mode to protect
   potential call to modify_bars()
 - renamed lock releasing function
 - removed ASSERT()s from msi code
 - added trylock in vpci_dump_msi

Changes in v8:
 - changed d->vpci_lock to d->pci_lock
 - introducing d->pci_lock in a separate patch
 - extended locked region in vpci_process_pending
 - removed pcidevs_lockis vpci_dump_msi()
 - removed some changes as they are not needed with
   the new locking scheme
 - added handling for hwdom && dom_xen case
---
 xen/arch/x86/hvm/vmsi.c   | 25 +
 xen/arch/x86/hvm/vmx/vmx.c|  2 +-
 xen/arch/x86/irq.c|  8 +++---
 xen/arch/x86/msi.c| 14 +-
 xen/arch/x86/physdev.c|  2 ++
 xen/drivers/passthrough/pci.c |  9 +++---
 xen/drivers/vpci/header.c | 18 
 xen/drivers/vpci/msi.c| 28 +--
 xen/drivers/vpci/msix.c   | 52 ++-
 xen/drivers/vpci/vpci.c   | 24 ++--
 xen/include/xen/sched.h   |  3 +-
 11 files changed, 145 insertions(+), 40 deletions(-)

diff --git a/xen/arch/x86/hvm/vmsi.c b/xen/arch/x86/hvm/vmsi.c
index 128f23636279..4725e3b72d53 100644
--- a/xen/arch/x86/hvm/vmsi.c
+++ b/xen/arch/x86/hvm/vmsi.c
@@ -468,7 +468,7 @@ int msixtbl_pt_register(struct domain *d, struct pirq 
*pirq, uint64_t gtable)
 struct msixtbl_entry *entry, *new_ent

[libvirt test] 184329: tolerable all pass - PUSHED

2024-01-12 Thread osstest service owner
flight 184329 libvirt real [real]
http://logs.test-lab.xenproject.org/osstest/logs/184329/

Failures :-/ but no regressions.

Tests which did not succeed, but are not blocking:
 test-armhf-armhf-libvirt-raw 15 saverestore-support-check fail blocked in 
184316
 test-armhf-armhf-libvirt 16 saverestore-support-checkfail  like 184316
 test-armhf-armhf-libvirt-qcow2 15 saverestore-support-check   fail like 184316
 test-amd64-i386-libvirt-xsm  15 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt 15 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt-xsm 15 migrate-support-checkfail   never pass
 test-amd64-i386-libvirt  15 migrate-support-checkfail   never pass
 test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check 
fail never pass
 test-arm64-arm64-libvirt 15 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt 16 saverestore-support-checkfail   never pass
 test-arm64-arm64-libvirt-xsm 15 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-xsm 16 saverestore-support-checkfail   never pass
 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check 
fail never pass
 test-armhf-armhf-libvirt 15 migrate-support-checkfail   never pass
 test-amd64-i386-libvirt-raw  14 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-qcow2 14 migrate-support-checkfail never pass
 test-arm64-arm64-libvirt-qcow2 15 saverestore-support-checkfail never pass
 test-arm64-arm64-libvirt-raw 14 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-raw 15 saverestore-support-checkfail   never pass
 test-amd64-amd64-libvirt-vhd 14 migrate-support-checkfail   never pass
 test-armhf-armhf-libvirt-qcow2 14 migrate-support-checkfail never pass
 test-armhf-armhf-libvirt-raw 14 migrate-support-checkfail   never pass

version targeted for testing:
 libvirt  9ef6fee12909a70e52b37c3e5277e83d70cdf0da
baseline version:
 libvirt  0e120bc43197a2d4099470f8932c37944a2fcc16

Last test of basis   184316  2024-01-11 04:18:50 Z1 days
Testing same since   184329  2024-01-12 04:20:39 Z0 days1 attempts


People who touched revisions under test:
  Sergio Durigan Junior 
  Shaleen Bathla 
  Yalei Li <274268...@qq.com>
  Yalei Li 

jobs:
 build-amd64-xsm  pass
 build-arm64-xsm  pass
 build-i386-xsm   pass
 build-amd64  pass
 build-arm64  pass
 build-armhf  pass
 build-i386   pass
 build-amd64-libvirt  pass
 build-arm64-libvirt  pass
 build-armhf-libvirt  pass
 build-i386-libvirt   pass
 build-amd64-pvopspass
 build-arm64-pvopspass
 build-armhf-pvopspass
 build-i386-pvops pass
 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm   pass
 test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsmpass
 test-amd64-amd64-libvirt-xsm pass
 test-arm64-arm64-libvirt-xsm pass
 test-amd64-i386-libvirt-xsm  pass
 test-amd64-amd64-libvirt pass
 test-arm64-arm64-libvirt pass
 test-armhf-armhf-libvirt pass
 test-amd64-i386-libvirt  pass
 test-amd64-amd64-libvirt-pairpass
 test-amd64-i386-libvirt-pair pass
 test-arm64-arm64-libvirt-qcow2   pass
 test-armhf-armhf-libvirt-qcow2   pass
 test-arm64-arm64-libvirt-raw pass
 test-armhf-armhf-libvirt-raw pass
 test-amd64-i386-libvirt-raw  pass
 test-amd64-amd64-libvirt-vhd pass



sg-report-flight on osstest.test-lab.xenproject.org
logs: /home/logs/logs
images: /home/logs/images

Logs, config files, etc. are available at
http://logs.test-lab.xenproject.org/osstest

[xen-unstable test] 184330: tolerable trouble: fail/pass/starved

2024-01-12 Thread osstest service owner
flight 184330 xen-unstable real [real]
http://logs.test-lab.xenproject.org/osstest/logs/184330/

Failures :-/ but no regressions.

Tests which did not succeed, but are not blocking:
 test-amd64-amd64-xl-qemuu-win7-amd64 19 guest-stopfail like 184311
 test-armhf-armhf-libvirt 16 saverestore-support-checkfail  like 184319
 test-amd64-amd64-xl-qemut-win7-amd64 19 guest-stopfail like 184319
 test-amd64-i386-xl-qemuu-win7-amd64 19 guest-stop fail like 184319
 test-amd64-amd64-xl-qemuu-ws16-amd64 19 guest-stopfail like 184319
 test-amd64-i386-xl-qemut-ws16-amd64 19 guest-stop fail like 184319
 test-armhf-armhf-libvirt-raw 15 saverestore-support-checkfail  like 184319
 test-amd64-i386-xl-qemut-win7-amd64 19 guest-stop fail like 184319
 test-amd64-amd64-xl-qemut-ws16-amd64 19 guest-stopfail like 184319
 test-armhf-armhf-libvirt-qcow2 15 saverestore-support-check   fail like 184319
 test-amd64-amd64-qemuu-nested-amd 20 debian-hvm-install/l1/l2 fail like 184319
 test-amd64-i386-xl-qemuu-ws16-amd64 19 guest-stop fail like 184319
 test-amd64-i386-libvirt-xsm  15 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt-xsm 15 migrate-support-checkfail   never pass
 test-amd64-i386-xl-pvshim14 guest-start  fail   never pass
 test-amd64-i386-libvirt  15 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt 15 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check 
fail never pass
 test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check 
fail never pass
 test-armhf-armhf-xl-multivcpu 15 migrate-support-checkfail  never pass
 test-armhf-armhf-xl-multivcpu 16 saverestore-support-checkfail  never pass
 test-armhf-armhf-libvirt 15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-credit2  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit2  16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-credit1  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit1  16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-arndale  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-arndale  16 saverestore-support-checkfail   never pass
 test-amd64-i386-libvirt-raw  14 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt-vhd 14 migrate-support-checkfail   never pass
 test-armhf-armhf-libvirt-raw 14 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-vhd  14 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-vhd  15 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-rtds 15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-rtds 16 saverestore-support-checkfail   never pass
 test-armhf-armhf-libvirt-qcow2 14 migrate-support-checkfail never pass
 test-arm64-arm64-libvirt-raw  3 hosts-allocate   starved  n/a
 test-arm64-arm64-xl-credit2   3 hosts-allocate   starved  n/a
 test-arm64-arm64-xl-vhd   3 hosts-allocate   starved  n/a
 test-arm64-arm64-xl-xsm   3 hosts-allocate   starved  n/a
 test-arm64-arm64-xl   3 hosts-allocate   starved  n/a
 test-arm64-arm64-xl-credit1   3 hosts-allocate   starved  n/a
 test-arm64-arm64-xl-thunderx  3 hosts-allocate   starved  n/a
 test-arm64-arm64-libvirt-xsm  3 hosts-allocate   starved  n/a

version targeted for testing:
 xen  c27c8922f2c6995d688437b0758cec6a27d18320
baseline version:
 xen  c27c8922f2c6995d688437b0758cec6a27d18320

Last test of basis   184330  2024-01-12 06:07:09 Z0 days
Testing same since  (not found) 0 attempts

jobs:
 build-amd64-xsm  pass
 build-arm64-xsm  pass
 build-i386-xsm   pass
 build-amd64-xtf  pass
 build-amd64  pass
 build-arm64  pass
 build-armhf  pass
 build-i386   pass
 build-amd64-libvirt  pass
 build-arm64-libvirt  pass
 build-armhf-libvirt  pass
 build-i386-libvirt   

Re: [PATCH v12 01/15] vpci: use per-domain PCI lock to protect vpci structure

2024-01-12 Thread Stewart Hildebrand
On 1/12/24 08:48, Roger Pau Monné wrote:
> On Tue, Jan 09, 2024 at 04:51:16PM -0500, Stewart Hildebrand wrote:
>> From: Oleksandr Andrushchenko 
>>
>> Use a previously introduced per-domain read/write lock to check
>> whether vpci is present, so we are sure there are no accesses to the
>> contents of the vpci struct if not.
> 
> Nit: isn't this sentence kind of awkward?  I would word it as:
> 
> "Use the per-domain PCI read/write lock to protect the presence of the
> pci device vpci field."

I'll use your suggested wording.

> 
>> This lock can be used (and in a
>> few cases is used right away) so that vpci removal can be performed
>> while holding the lock in write mode. Previously such removal could
>> race with vpci_read for example.
>>
>> When taking both d->pci_lock and pdev->vpci->lock, they should be
>> taken in this exact order: d->pci_lock then pdev->vpci->lock to avoid
>> possible deadlock situations.
>>
>> 1. Per-domain's pci_lock is used to protect pdev->vpci structure
>> from being removed.
>>
>> 2. Writing the command register and ROM BAR register may trigger
>> modify_bars to run, which in turn may access multiple pdevs while
>> checking for the existing BAR's overlap. The overlapping check, if
>> done under the read lock, requires vpci->lock to be acquired on both
>> devices being compared, which may produce a deadlock. It is not
>> possible to upgrade read lock to write lock in such a case. So, in
>> order to prevent the deadlock, use d->pci_lock instead.
> 
> ... use d->pci_lock in write mode instead.

Will fix

> 
>>
>> All other code, which doesn't lead to pdev->vpci destruction and does
>> not access multiple pdevs at the same time, can still use a
>> combination of the read lock and pdev->vpci->lock.
>>
>> 3. Drop const qualifier where the new rwlock is used and this is
>> appropriate.
>>
>> 4. Do not call process_pending_softirqs with any locks held. For that
>> unlock prior the call and re-acquire the locks after. After
>> re-acquiring the lock there is no need to check if pdev->vpci exists:
>>  - in apply_map because of the context it is called (no race condition
>>possible)
>>  - for MSI/MSI-X debug code because it is called at the end of
>>pdev->vpci access and no further access to pdev->vpci is made
>>
>> 5. Use d->pci_lock around for_each_pdev and pci_get_pdev_by_domain
>> while accessing pdevs in vpci code.
>>
>> Suggested-by: Roger Pau Monné 
>> Suggested-by: Jan Beulich 
>> Signed-off-by: Oleksandr Andrushchenko 
>> Signed-off-by: Volodymyr Babchuk 
>> Signed-off-by: Stewart Hildebrand 
> 
> Just one code fix regarding the usage of read_lock (instead of the
> trylock version) in vpci_msix_arch_print().. With that and the
> comments adjusted:
> 
> Reviewed-by: Roger Pau Monné 

Thanks! I'll reply to this with a v12.1 patch so as to not unnessecarily resend 
the whole series.

> 
>> ---
>> Changes in v12:
>>  - s/pci_rwlock/pci_lock/ in commit message
>>  - expand comment about scope of pci_lock in sched.h
>>  - in vpci_{read,write}, if hwdom is trying to access a device assigned
>>to dom_xen, holding hwdom->pci_lock is sufficient (no need to hold
>>dom_xen->pci_lock)
>>  - reintroduce ASSERT in vmx_pi_update_irte()
>>  - reintroduce ASSERT in __pci_enable_msi{x}()
>>  - delete note 6. in commit message about removing ASSERTs since we have
>>reintroduced them
>>
>> Changes in v11:
>>  - Fixed commit message regarding possible spinlocks
>>  - Removed parameter from allocate_and_map_msi_pirq(), which was added
>>  in the prev version. Now we are taking pcidevs_lock in
>>  physdev_map_pirq()
>>  - Returned ASSERT to pci_enable_msi
>>  - Fixed case when we took read lock instead of write one
>>  - Fixed label indentation
>>
>> Changes in v10:
>>  - Moved printk pas locked area
>>  - Returned back ASSERTs
>>  - Added new parameter to allocate_and_map_msi_pirq() so it knows if
>>  it should take the global pci lock
>>  - Added comment about possible improvement in vpci_write
>>  - Changed ASSERT(rw_is_locked()) to rw_is_write_locked() in
>>appropriate places
>>  - Renamed release_domain_locks() to release_domain_write_locks()
>>  - moved domain_done label in vpci_dump_msi() to correct place
>> Changes in v9:
>>  - extended locked region to protect vpci_remove_device and
>>vpci_add_handlers() calls
>>  - vpci_write() takes lock in the write mode to protect
>>potential call to modify_bars()
>>  - renamed lock releasing function
>>  - removed ASSERT()s from msi code
>>  - added trylock in vpci_dump_msi
>>
>> Changes in v8:
>>  - changed d->vpci_lock to d->pci_lock
>>  - introducing d->pci_lock in a separate patch
>>  - extended locked region in vpci_process_pending
>>  - removed pcidevs_lockis vpci_dump_msi()
>>  - removed some changes as they are not needed with
>>the new locking scheme
>>  - added handling for hwdom && dom_xen case
>> ---
>>  xen/arch/x86/hvm/vmsi.c   | 22 +++
>>  xen/arch/x86/hvm/vmx/vmx.c|  2 +-
>>  xen/arch/x

Re: [PATCH v3 16/33] tools/libs/light: add backend type for 9pfs PV devices

2024-01-12 Thread Anthony PERARD
On Thu, Jan 04, 2024 at 10:00:38AM +0100, Juergen Gross wrote:
> diff --git a/tools/libs/light/libxl_9pfs.c b/tools/libs/light/libxl_9pfs.c
> index 5ab0d3aa21..486bc4326e 100644
> --- a/tools/libs/light/libxl_9pfs.c
> +++ b/tools/libs/light/libxl_9pfs.c
> @@ -33,20 +33,159 @@ static int libxl__set_xenstore_p9(libxl__gc *gc, 
> uint32_t domid,
>  
>  flexarray_append_pair(front, "tag", p9->tag);
>  
> +if (p9->type == LIBXL_P9_TYPE_XEN_9PFSD) {
> +flexarray_append_pair(back, "max-space",
> +  GCSPRINTF("%u", p9->max_space));
> +flexarray_append_pair(back, "max-files",
> +  GCSPRINTF("%u", p9->max_files));
> +flexarray_append_pair(back, "max-open-files",
> +  GCSPRINTF("%u", p9->max_open_files));
> +flexarray_append_pair(back, "auto-delete",
> +  p9->auto_delete ? "1" : "0");
> +}
> +
> +return 0;
> +}
> +
> +static int libxl__device_from_p9(libxl__gc *gc, uint32_t domid,
> + libxl_device_p9 *type, libxl__device 
> *device)
> +{
> +device->backend_devid   = type->devid;
> +device->backend_domid   = type->backend_domid;
> +device->backend_kind= type->type == LIBXL_P9_TYPE_QEMU
> +  ? LIBXL__DEVICE_KIND_9PFS
> +  : LIBXL__DEVICE_KIND_XEN_9PFS;
> +device->devid   = type->devid;
> +device->domid   = domid;
> +device->kind= LIBXL__DEVICE_KIND_9PFS;
> +
>  return 0;
>  }
>  
> -#define libxl__add_p9s NULL
> +static int libxl_device_p9_dm_needed(void *e, unsigned domid)

Prefix of the function should be "libxl__" as it's only internal to
libxl.

> +{
> +libxl_device_p9 *elem = e;
> +
> +return elem->type == LIBXL_P9_TYPE_QEMU && elem->backend_domid == domid;
> +}
> +
> +typedef struct libxl__aop9_state libxl__aop9_state;
> +
> +struct libxl__aop9_state {
> +libxl__spawn_state spawn;
> +libxl__ao_device *aodev;
> +libxl_device_p9 *p9;
> +uint32_t domid;
> +void (*callback)(libxl__egc *, libxl__aop9_state *, int);
> +};
> +
> +static void xen9pfsd_spawn_outcome(libxl__egc *egc, libxl__aop9_state *aop9,
> +   int rc)
> +{
> +aop9->aodev->rc = rc;
> +if (rc)
> +aop9->aodev->callback(egc, aop9->aodev);
> +else
> +libxl__device_add_async(egc, aop9->domid, &libxl__p9_devtype,
> +aop9->p9, aop9->aodev);
> +}
> +
> +static void xen9pfsd_confirm(libxl__egc *egc, libxl__spawn_state *spawn,
> + const char *xsdata)
> +{
> +STATE_AO_GC(spawn->ao);
> +
> +if (!xsdata)
> +return;
> +
> +if (strcmp(xsdata, "running"))
> +return;
> +
> +libxl__spawn_initiate_detach(gc, spawn);
> +}
> +
> +static void xen9pfsd_failed(libxl__egc *egc, libxl__spawn_state *spawn, int 
> rc)
> +{
> +libxl__aop9_state *aop9 = CONTAINER_OF(spawn, *aop9, spawn);
> +
> +xen9pfsd_spawn_outcome(egc, aop9, rc);
> +}
> +
> +static void xen9pfsd_detached(libxl__egc *egc, libxl__spawn_state *spawn)
> +{
> +libxl__aop9_state *aop9 = CONTAINER_OF(spawn, *aop9, spawn);
> +
> +xen9pfsd_spawn_outcome(egc, aop9, 0);
> +}
> +
> +static int xen9pfsd_spawn(libxl__egc *egc, uint32_t domid, libxl_device_p9 
> *p9,
> + libxl__ao_device *aodev)
> +{
> +STATE_AO_GC(aodev->ao);
> +struct libxl__aop9_state *aop9;
> +int rc;
> +char *args[] = { "xen-9pfsd", NULL };
> +char *path = GCSPRINTF("/local/domain/%u/libxl/xen-9pfs",
> +   p9->backend_domid);
> +
> +if (p9->type != LIBXL_P9_TYPE_XEN_9PFSD ||
> +libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/state", path)))

I feel like this check and this function might not work as expected.
What happen if we try to add more than one 9pfs "device"? libxl I think
is going to try to start several xen-9pfs daemon before the first one
have had time to write the "*/state" path.
What about two different libxl process trying to spawn that daemon? Is
xen-9pfs going to behave well and have one giveup? But that would
probably mean that libxl is going to have an error due to the process
exiting early, maybe.

> +return 0;
> +
> +GCNEW(aop9);
> +aop9->aodev = aodev;
> +aop9->p9 = p9;
> +aop9->domid = domid;
> +aop9->callback = xen9pfsd_spawn_outcome;
> +
> +aop9->spawn.ao = aodev->ao;
> +aop9->spawn.what = "xen-9pfs daemon";
> +aop9->spawn.xspath = GCSPRINTF("%s/state", path);
> +aop9->spawn.timeout_ms = LIBXL_DEVICE_MODEL_START_TIMEOUT * 1000;
> +aop9->spawn.pidpath = GCSPRINTF("%s/pid", path);
> +aop9->spawn.midproc_cb = libxl__spawn_record_pid;
> +aop9->spawn.confirm_cb = xen9pfsd_confirm;
> +aop9->spawn.failure_cb = xen9pfsd_failed;
> +aop9->spawn.detached_cb = xen9pfsd_detached;
> +rc = libxl__spawn_spawn(egc, 

[linux-5.4 test] 184327: regressions - FAIL

2024-01-12 Thread osstest service owner
flight 184327 linux-5.4 real [real]
http://logs.test-lab.xenproject.org/osstest/logs/184327/

Regressions :-(

Tests which did not succeed and are blocking,
including tests which could not be run:
 test-armhf-armhf-xl-multivcpu 18 guest-start/debian.repeat fail REGR. vs. 
184192

Tests which are failing intermittently (not blocking):
 test-armhf-armhf-xl-arndale  14 guest-start  fail in 184318 pass in 184327
 test-armhf-armhf-xl-credit2  14 guest-start  fail in 184318 pass in 184327
 test-arm64-arm64-libvirt-raw 17 guest-start/debian.repeat  fail pass in 184318
 test-armhf-armhf-libvirt-raw 17 guest-start/debian.repeat  fail pass in 184318
 test-armhf-armhf-xl-vhd  13 guest-startfail pass in 184318
 test-amd64-i386-xl-qemuu-ws16-amd64 12 windows-install fail pass in 184318

Regressions which are regarded as allowable (not blocking):
 test-armhf-armhf-xl-rtds18 guest-start/debian.repeat fail REGR. vs. 184192

Tests which did not succeed, but are not blocking:
 test-armhf-armhf-libvirt-qcow2 15 saverestore-support-check fail blocked in 
184192
 test-armhf-armhf-xl-credit1 18 guest-start/debian.repeat fail in 184318 
blocked in 184192
 test-armhf-armhf-libvirt-qcow2 13 guest-start   fail in 184318 like 184192
 test-amd64-i386-xl-qemuu-ws16-amd64 19 guest-stop   fail in 184318 like 184192
 test-armhf-armhf-xl-credit1 15 migrate-support-check fail in 184318 never pass
 test-armhf-armhf-xl-credit1 16 saverestore-support-check fail in 184318 never 
pass
 test-armhf-armhf-xl-vhd 14 migrate-support-check fail in 184318 never pass
 test-armhf-armhf-xl-vhd 15 saverestore-support-check fail in 184318 never pass
 test-armhf-armhf-xl-credit1  14 guest-start  fail  like 184192
 test-armhf-armhf-libvirt 16 saverestore-support-checkfail  like 184192
 test-amd64-amd64-xl-qemuu-win7-amd64 19 guest-stopfail like 184192
 test-amd64-i386-xl-qemut-win7-amd64 19 guest-stop fail like 184192
 test-amd64-i386-xl-qemuu-win7-amd64 19 guest-stop fail like 184192
 test-amd64-amd64-xl-qemut-win7-amd64 19 guest-stopfail like 184192
 test-armhf-armhf-xl-arndale  18 guest-start/debian.repeatfail  like 184192
 test-armhf-armhf-xl-credit2  18 guest-start/debian.repeatfail  like 184192
 test-amd64-amd64-xl-qemuu-ws16-amd64 19 guest-stopfail like 184192
 test-amd64-amd64-xl-qemut-ws16-amd64 19 guest-stopfail like 184192
 test-amd64-i386-xl-qemut-ws16-amd64 19 guest-stop fail like 184192
 test-armhf-armhf-libvirt-raw 15 saverestore-support-checkfail  like 184192
 test-amd64-amd64-qemuu-nested-amd 20 debian-hvm-install/l1/l2 fail like 184192
 test-amd64-i386-xl-pvshim14 guest-start  fail   never pass
 test-amd64-amd64-libvirt-xsm 15 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt 15 migrate-support-checkfail   never pass
 test-amd64-i386-libvirt-xsm  15 migrate-support-checkfail   never pass
 test-amd64-i386-libvirt  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-libvirt-xsm 15 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-xsm 16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-credit1  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-credit1  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-credit2  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-credit2  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-thunderx 15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-thunderx 16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-multivcpu 15 migrate-support-checkfail  never pass
 test-armhf-armhf-xl-multivcpu 16 saverestore-support-checkfail  never pass
 test-armhf-armhf-xl-rtds 15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-rtds 16 saverestore-support-checkfail   never pass
 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check 
fail never pass
 test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check 
fail never pass
 test-armhf-armhf-xl-arndale  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-arndale  16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-credit2  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-

[xen-unstable-smoke test] 184333: tolerable all pass - PUSHED

2024-01-12 Thread osstest service owner
flight 184333 xen-unstable-smoke real [real]
http://logs.test-lab.xenproject.org/osstest/logs/184333/

Failures :-/ but no regressions.

Tests which did not succeed, but are not blocking:
 test-amd64-amd64-libvirt 15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  16 saverestore-support-checkfail   never pass

version targeted for testing:
 xen  1ec3fe1f664fa837daf31e9fa8938f6109464f28
baseline version:
 xen  c27c8922f2c6995d688437b0758cec6a27d18320

Last test of basis   184295  2024-01-09 14:00:26 Z3 days
Testing same since   184333  2024-01-12 12:03:48 Z0 days1 attempts


People who touched revisions under test:
  Javi Merino 
  Julien Grall 
  Shawn Anastasio 

jobs:
 build-arm64-xsm  pass
 build-amd64  pass
 build-armhf  pass
 build-amd64-libvirt  pass
 test-armhf-armhf-xl  pass
 test-arm64-arm64-xl-xsm  pass
 test-amd64-amd64-xl-qemuu-debianhvm-amd64pass
 test-amd64-amd64-libvirt pass



sg-report-flight on osstest.test-lab.xenproject.org
logs: /home/logs/logs
images: /home/logs/images

Logs, config files, etc. are available at
http://logs.test-lab.xenproject.org/osstest/logs

Explanation of these reports, and of osstest in general, is at
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README.email;hb=master
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README;hb=master

Test harness code can be found at
http://xenbits.xen.org/gitweb?p=osstest.git;a=summary


Pushing revision :

To xenbits.xen.org:/home/xen/git/xen.git
   c27c8922f2..1ec3fe1f66  1ec3fe1f664fa837daf31e9fa8938f6109464f28 -> smoke



Re: [PATCH net-next 16/24] net: netkit, veth, tun, virt*: Use nested-BH locking for XDP redirect.

2024-01-12 Thread Sebastian Andrzej Siewior
On 2023-12-18 09:52:05 [+0100], Daniel Borkmann wrote:
> Hi Sebastian,
Hi Daniel,

> Please exclude netkit from this set given it does not support XDP, but
> instead only accepts tc BPF typed programs.

okay, thank you.

> Thanks,
> Daniel

Sebastian



Re: [PATCH v2] x86/hvm: don't expose XENFEAT_hvm_pirqs by default

2024-01-12 Thread Roger Pau Monné
On Fri, Jan 12, 2024 at 02:40:35PM +, Anthony PERARD wrote:
> On Wed, Jan 10, 2024 at 01:42:02PM +0100, Roger Pau Monne wrote:
> > The HVM pirq feature allows routing interrupts from both physical and 
> > emulated
> > devices over event channels, this was done a performance improvement.  
> > However
> > its usage is fully undocumented, and the only reference implementation is in
> > Linux.  It defeats the purpose of local APIC hardware virtualization, 
> > because
> > when using it interrupts avoid the usage of the local APIC altogether.
> > 
> > It has also been reported to not work properly with certain devices, at 
> > least
> > when using some AMD GPUs Linux attempts to route interrupts over event
> > channels, but Xen doesn't correctly detect such routing, which leads to the
> > hypervisor complaining with:
> > 
> > (XEN) d15v0: Unsupported MSI delivery mode 7 for Dom15
> > 
> > When MSIs are attempted to be routed over event channels the entry delivery
> > mode is set to ExtINT, but Xen doesn't detect such routing and attempts to
> > inject the interrupt following the native MSI path, and the ExtINT delivery
> > mode is not supported.
> > 
> > Disable HVM PIRQs by default and provide a per-domain option in xl.cfg to
> > enable such feature.  Also for backwards compatibility keep the feature 
> > enabled
> > for any resumed domains that don't have an explicit selection.
> > 
> > Note that the only user of the feature (Linux) is also able to handle native
> > interrupts fine, as the feature was already not used if Xen reported local 
> > APIC
> > hardware virtualization active.
> > 
> > Link: https://github.com/QubesOS/qubes-issues/issues/7971
> > Signed-off-by: Roger Pau Monné 
> 
> Should we have an entry in the changelog about this patch?

Yes, indeed.  I always forget.  Will send v3 on Monday with you RB and
the changelog.

> With the patch, we will need to regenerate the golang binding, at least
> on commit.
> 
> Otherwise, patch looks fine:
> Reviewed-by: Anthony PERARD 

Thanks!



Re: [PATCH v12 09/15] vpci/header: program p2m with guest BAR view

2024-01-12 Thread Roger Pau Monné
On Tue, Jan 09, 2024 at 04:51:24PM -0500, Stewart Hildebrand wrote:
> From: Oleksandr Andrushchenko 
> 
> Take into account guest's BAR view and program its p2m accordingly:
> gfn is guest's view of the BAR and mfn is the physical BAR value.
> This way hardware domain sees physical BAR values and guest sees
> emulated ones.
> 
> Hardware domain continues getting the BARs identity mapped, while for
> domUs the BARs are mapped at the requested guest address without
> modifying the BAR address in the device PCI config space.
> 
> Signed-off-by: Oleksandr Andrushchenko 
> Signed-off-by: Volodymyr Babchuk 
> Signed-off-by: Stewart Hildebrand 

Some nits and a request to add an extra assert. If you agree:

Reviewed-by: Roger Pau Monné 

> ---
> In v12:
> - Update guest_addr in rom_write()
> - Use unsigned long for start_mfn and map_mfn to reduce mfn_x() calls
> - Use existing vmsix_table_*() functions
> - Change vmsix_table_base() to use .guest_addr
> In v11:
> - Add vmsix_guest_table_addr() and vmsix_guest_table_base() functions
>   to access guest's view of the VMSIx tables.
> - Use MFN (not GFN) to check access permissions
> - Move page offset check to this patch
> - Call rangeset_remove_range() with correct parameters
> In v10:
> - Moved GFN variable definition outside the loop in map_range()
> - Updated printk error message in map_range()
> - Now BAR address is always stored in bar->guest_addr, even for
>   HW dom, this removes bunch of ugly is_hwdom() checks in modify_bars()
> - vmsix_table_base() now uses .guest_addr instead of .addr
> In v9:
> - Extended the commit message
> - Use bar->guest_addr in modify_bars
> - Extended printk error message in map_range
> - Moved map_data initialization so .bar can be initialized during declaration
> Since v5:
> - remove debug print in map_range callback
> - remove "identity" from the debug print
> Since v4:
> - moved start_{gfn|mfn} calculation into map_range
> - pass vpci_bar in the map_data instead of start_{gfn|mfn}
> - s/guest_addr/guest_reg
> Since v3:
> - updated comment (Roger)
> - removed gfn_add(map->start_gfn, rc); which is wrong
> - use v->domain instead of v->vpci.pdev->domain
> - removed odd e.g. in comment
> - s/d%d/%pd in altered code
> - use gdprintk for map/unmap logs
> Since v2:
> - improve readability for data.start_gfn and restructure ?: construct
> Since v1:
>  - s/MSI/MSI-X in comments
> ---
>  xen/drivers/vpci/header.c | 81 +++
>  xen/include/xen/vpci.h|  3 +-
>  2 files changed, 66 insertions(+), 18 deletions(-)
> 
> diff --git a/xen/drivers/vpci/header.c b/xen/drivers/vpci/header.c
> index feccd070ddd0..f0b0b64b0929 100644
> --- a/xen/drivers/vpci/header.c
> +++ b/xen/drivers/vpci/header.c
> @@ -34,6 +34,7 @@
>  
>  struct map_data {
>  struct domain *d;
> +const struct vpci_bar *bar;
>  bool map;
>  };
>  
> @@ -41,13 +42,24 @@ static int cf_check map_range(
>  unsigned long s, unsigned long e, void *data, unsigned long *c)
>  {
>  const struct map_data *map = data;
> +/* Start address of the BAR as seen by the guest. */
> +unsigned long start_gfn = PFN_DOWN(map->bar->guest_addr);
> +/* Physical start address of the BAR. */
> +unsigned long start_mfn = PFN_DOWN(map->bar->addr);
>  int rc;
>  
>  for ( ; ; )
>  {
>  unsigned long size = e - s + 1;
> +/*
> + * Ranges to be mapped don't always start at the BAR start address, 
> as
> + * there can be holes or partially consumed ranges. Account for the
> + * offset of the current address from the BAR start.
> + */
> +unsigned long map_mfn = start_mfn + s - start_gfn;
> +unsigned long m_end = map_mfn + size - 1;
>  
> -if ( !iomem_access_permitted(map->d, s, e) )
> +if ( !iomem_access_permitted(map->d, map_mfn, m_end) )
>  {
>  printk(XENLOG_G_WARNING
> "%pd denied access to MMIO range [%#lx, %#lx]\n",
> @@ -55,7 +67,8 @@ static int cf_check map_range(
>  return -EPERM;
>  }
>  
> -rc = xsm_iomem_mapping(XSM_HOOK, map->d, s, e, map->map);
> +rc = xsm_iomem_mapping(XSM_HOOK, map->d, map_mfn, m_end,
> +   map->map);
>  if ( rc )
>  {
>  printk(XENLOG_G_WARNING
> @@ -73,8 +86,8 @@ static int cf_check map_range(
>   * - {un}map_mmio_regions doesn't support preemption.
>   */
>  
> -rc = map->map ? map_mmio_regions(map->d, _gfn(s), size, _mfn(s))
> -  : unmap_mmio_regions(map->d, _gfn(s), size, _mfn(s));
> +rc = map->map ? map_mmio_regions(map->d, _gfn(s), size, 
> _mfn(map_mfn))
> +  : unmap_mmio_regions(map->d, _gfn(s), size, 
> _mfn(map_mfn));
>  if ( rc == 0 )
>  {
>  *c += size;
> @@ -83,8 +96,9 @@ static int cf_check map_range(
>  if ( rc < 0 )
>  {
>  printk(XENLOG_G_WARNING

Re: [PATCH v2] x86/hvm: don't expose XENFEAT_hvm_pirqs by default

2024-01-12 Thread Anthony PERARD
On Wed, Jan 10, 2024 at 01:42:02PM +0100, Roger Pau Monne wrote:
> The HVM pirq feature allows routing interrupts from both physical and emulated
> devices over event channels, this was done a performance improvement.  However
> its usage is fully undocumented, and the only reference implementation is in
> Linux.  It defeats the purpose of local APIC hardware virtualization, because
> when using it interrupts avoid the usage of the local APIC altogether.
> 
> It has also been reported to not work properly with certain devices, at least
> when using some AMD GPUs Linux attempts to route interrupts over event
> channels, but Xen doesn't correctly detect such routing, which leads to the
> hypervisor complaining with:
> 
> (XEN) d15v0: Unsupported MSI delivery mode 7 for Dom15
> 
> When MSIs are attempted to be routed over event channels the entry delivery
> mode is set to ExtINT, but Xen doesn't detect such routing and attempts to
> inject the interrupt following the native MSI path, and the ExtINT delivery
> mode is not supported.
> 
> Disable HVM PIRQs by default and provide a per-domain option in xl.cfg to
> enable such feature.  Also for backwards compatibility keep the feature 
> enabled
> for any resumed domains that don't have an explicit selection.
> 
> Note that the only user of the feature (Linux) is also able to handle native
> interrupts fine, as the feature was already not used if Xen reported local 
> APIC
> hardware virtualization active.
> 
> Link: https://github.com/QubesOS/qubes-issues/issues/7971
> Signed-off-by: Roger Pau Monné 

Should we have an entry in the changelog about this patch?

With the patch, we will need to regenerate the golang binding, at least
on commit.

Otherwise, patch looks fine:
Reviewed-by: Anthony PERARD 

Thanks,

-- 
Anthony PERARD



Re: [PATCH v12 11/15] vpci: add initial support for virtual PCI bus topology

2024-01-12 Thread Stewart Hildebrand
On 1/12/24 06:46, George Dunlap wrote:
> On Tue, Jan 9, 2024 at 9:54 PM Stewart Hildebrand
>  wrote:
>> diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
>> index 37f5922f3206..b58a822847be 100644
>> --- a/xen/include/xen/sched.h
>> +++ b/xen/include/xen/sched.h
>> @@ -484,6 +484,14 @@ struct domain
>>   * 2. pdev->vpci->lock
>>   */
>>  rwlock_t pci_lock;
>> +#ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT
>> +/*
>> + * The bitmap which shows which device numbers are already used by the
>> + * virtual PCI bus topology and is used to assign a unique SBDF to the
>> + * next passed through virtual PCI device.
>> + */
>> +DECLARE_BITMAP(vpci_dev_assigned_map, VPCI_MAX_VIRT_DEV);
>> +#endif
>>  #endif
> 
> Without digging through the whole series, how big do we expect this
> bitmap to be on typical systems?
> 
> If it's only going to be a handful of bytes, keeping it around for all
> guests would be OK; but it's large, it would be better as a pointer,
> since it's unused on the vast majority of guests.

Since the bitmap is an unsigned long type it will typically be 8 bytes, 
although only 4 bytes are actually used. VPCI_MAX_VIRT_DEV is currently fixed 
at 32, as we are only tracking D (not the whole SBDF) in the bitmap so far.



Re: [PATCH v12 01/15] vpci: use per-domain PCI lock to protect vpci structure

2024-01-12 Thread Roger Pau Monné
On Tue, Jan 09, 2024 at 04:51:16PM -0500, Stewart Hildebrand wrote:
> From: Oleksandr Andrushchenko 
> 
> Use a previously introduced per-domain read/write lock to check
> whether vpci is present, so we are sure there are no accesses to the
> contents of the vpci struct if not.

Nit: isn't this sentence kind of awkward?  I would word it as:

"Use the per-domain PCI read/write lock to protect the presence of the
pci device vpci field."

> This lock can be used (and in a
> few cases is used right away) so that vpci removal can be performed
> while holding the lock in write mode. Previously such removal could
> race with vpci_read for example.
> 
> When taking both d->pci_lock and pdev->vpci->lock, they should be
> taken in this exact order: d->pci_lock then pdev->vpci->lock to avoid
> possible deadlock situations.
> 
> 1. Per-domain's pci_lock is used to protect pdev->vpci structure
> from being removed.
> 
> 2. Writing the command register and ROM BAR register may trigger
> modify_bars to run, which in turn may access multiple pdevs while
> checking for the existing BAR's overlap. The overlapping check, if
> done under the read lock, requires vpci->lock to be acquired on both
> devices being compared, which may produce a deadlock. It is not
> possible to upgrade read lock to write lock in such a case. So, in
> order to prevent the deadlock, use d->pci_lock instead.

... use d->pci_lock in write mode instead.

> 
> All other code, which doesn't lead to pdev->vpci destruction and does
> not access multiple pdevs at the same time, can still use a
> combination of the read lock and pdev->vpci->lock.
> 
> 3. Drop const qualifier where the new rwlock is used and this is
> appropriate.
> 
> 4. Do not call process_pending_softirqs with any locks held. For that
> unlock prior the call and re-acquire the locks after. After
> re-acquiring the lock there is no need to check if pdev->vpci exists:
>  - in apply_map because of the context it is called (no race condition
>possible)
>  - for MSI/MSI-X debug code because it is called at the end of
>pdev->vpci access and no further access to pdev->vpci is made
> 
> 5. Use d->pci_lock around for_each_pdev and pci_get_pdev_by_domain
> while accessing pdevs in vpci code.
> 
> Suggested-by: Roger Pau Monné 
> Suggested-by: Jan Beulich 
> Signed-off-by: Oleksandr Andrushchenko 
> Signed-off-by: Volodymyr Babchuk 
> Signed-off-by: Stewart Hildebrand 

Just one code fix regarding the usage of read_lock (instead of the
trylock version) in vpci_msix_arch_print().. With that and the
comments adjusted:

Reviewed-by: Roger Pau Monné 

> ---
> Changes in v12:
>  - s/pci_rwlock/pci_lock/ in commit message
>  - expand comment about scope of pci_lock in sched.h
>  - in vpci_{read,write}, if hwdom is trying to access a device assigned
>to dom_xen, holding hwdom->pci_lock is sufficient (no need to hold
>dom_xen->pci_lock)
>  - reintroduce ASSERT in vmx_pi_update_irte()
>  - reintroduce ASSERT in __pci_enable_msi{x}()
>  - delete note 6. in commit message about removing ASSERTs since we have
>reintroduced them
> 
> Changes in v11:
>  - Fixed commit message regarding possible spinlocks
>  - Removed parameter from allocate_and_map_msi_pirq(), which was added
>  in the prev version. Now we are taking pcidevs_lock in
>  physdev_map_pirq()
>  - Returned ASSERT to pci_enable_msi
>  - Fixed case when we took read lock instead of write one
>  - Fixed label indentation
> 
> Changes in v10:
>  - Moved printk pas locked area
>  - Returned back ASSERTs
>  - Added new parameter to allocate_and_map_msi_pirq() so it knows if
>  it should take the global pci lock
>  - Added comment about possible improvement in vpci_write
>  - Changed ASSERT(rw_is_locked()) to rw_is_write_locked() in
>appropriate places
>  - Renamed release_domain_locks() to release_domain_write_locks()
>  - moved domain_done label in vpci_dump_msi() to correct place
> Changes in v9:
>  - extended locked region to protect vpci_remove_device and
>vpci_add_handlers() calls
>  - vpci_write() takes lock in the write mode to protect
>potential call to modify_bars()
>  - renamed lock releasing function
>  - removed ASSERT()s from msi code
>  - added trylock in vpci_dump_msi
> 
> Changes in v8:
>  - changed d->vpci_lock to d->pci_lock
>  - introducing d->pci_lock in a separate patch
>  - extended locked region in vpci_process_pending
>  - removed pcidevs_lockis vpci_dump_msi()
>  - removed some changes as they are not needed with
>the new locking scheme
>  - added handling for hwdom && dom_xen case
> ---
>  xen/arch/x86/hvm/vmsi.c   | 22 +++
>  xen/arch/x86/hvm/vmx/vmx.c|  2 +-
>  xen/arch/x86/irq.c|  8 +++---
>  xen/arch/x86/msi.c| 14 +-
>  xen/arch/x86/physdev.c|  2 ++
>  xen/drivers/passthrough/pci.c |  9 +++---
>  xen/drivers/vpci/header.c | 18 
>  xen/drivers/vpci/msi.c| 28 +--
>  xen/drivers/vpci/msix.c   |

Re: [PATCH v2 2/2] xen/arm32: head: Improve logging in head.S

2024-01-12 Thread Julien Grall

Hi Michal,

On 12/01/2024 11:25, Michal Orzel wrote:



On 12/01/2024 11:58, Julien Grall wrote:



On 12/01/2024 08:49, Michal Orzel wrote:

Hi Julien,


Hi Michal,


On 11/01/2024 19:34, Julien Grall wrote:



From: Julien Grall 

The sequence to enable the MMU on arm32 is quite complex as we may need
to jump to a temporary mapping to map Xen.

Recently, we had one bug in the logic (see f5a49eb7f8b3 ("xen/arm32:
head: Add mising isb in switch_to_runtime_mapping()") and it was
a pain to debug because there are no logging.

In order to improve the logging in the MMU switch we need to add
support for early printk while running on the identity mapping
and also on the temporary mapping.

For the identity mapping, we have only the first page of Xen mapped.
So all the strings should reside in the first page. For that purpose
a new macro PRINT_ID is introduced.

For the temporary mapping, the fixmap is already linked in the temporary
area (and so does the UART). So we just need to update the register
storing the UART address (i.e. r11) to point to the UART temporary
mapping.

Take the opportunity to introduce mov_w_on_cond in order to
conditionally execute mov_w and avoid branches.

Signed-off-by: Julien Grall 

Reviewed-by: Michal Orzel 


Thanks!


   /*
@@ -29,16 +33,26 @@

   #ifdef CONFIG_EARLY_PRINTK
   /*
- * Macro to print a string to the UART, if there is one.
+ * Macros to print a string to the UART, if there is one.
+ *
+ * There are multiple flavors:
+ *  - PRINT_SECT(section, string): The @string will be located in @section
+ *  - PRINT(): The string will be located in .rodata.str.
+ *  - PRINT_ID(): When Xen is running on the Identity Mapping, it is
+ *only possible to have a limited amount of Xen. This will create
+ *the string in .rodata.idmap which will always be mapped.
*
* Clobbers r0 - r3
*/
-#define PRINT(_s)   \
-mov   r3, lr   ;\
-adr_l r0, 98f  ;\
-blasm_puts ;\
-mov   lr, r3   ;\
-RODATA_STR(98, _s)
+#define PRINT_SECT(section, string) \
+mov   r3, lr   ;\
+adr_l r0, 98f  ;\
+blasm_puts ;\
+mov   lr, r3   ;\
+RODATA_SECT(section, 98, string)
+
+#define PRINT(string) PRINT_SECT(.rodata.str, string)
+#define PRINT_ID(string) PRINT_SECT(.rodata.idmap, string)

I know this is just a macro but does it make sense to have something MMU 
specific in common header?
I don't expect MPU to use it.

For cache coloring, I would like secondary boot CPUs to start directly
on the colored Xen. This means that any message used before enabling the
MMU will need to be part of the .rodata.idmap.

I know that 32-bit is not in scope for the cache coloring series. But I
would like to keep 32-bit and 64-bit boot logic fairly similar.

With that in mind, would you be happy if I keep PRINT_ID() in macros.h?
Note that I would be ok to move in mmu/head.S and move back in macros.h
later on. I just wanted to avoid code movement :).

With the above explanation it does not make sense to move it back and forth, so 
let's keep it as is.


Thanks! If that's change, we will move PRINT_ID() to mmu/head.S

I have committed the patch.

Cheers,

--
Julien Grall



Re: [PATCH v12 11/15] vpci: add initial support for virtual PCI bus topology

2024-01-12 Thread George Dunlap
On Tue, Jan 9, 2024 at 9:54 PM Stewart Hildebrand
 wrote:
> diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
> index 37f5922f3206..b58a822847be 100644
> --- a/xen/include/xen/sched.h
> +++ b/xen/include/xen/sched.h
> @@ -484,6 +484,14 @@ struct domain
>   * 2. pdev->vpci->lock
>   */
>  rwlock_t pci_lock;
> +#ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT
> +/*
> + * The bitmap which shows which device numbers are already used by the
> + * virtual PCI bus topology and is used to assign a unique SBDF to the
> + * next passed through virtual PCI device.
> + */
> +DECLARE_BITMAP(vpci_dev_assigned_map, VPCI_MAX_VIRT_DEV);
> +#endif
>  #endif

Without digging through the whole series, how big do we expect this
bitmap to be on typical systems?

If it's only going to be a handful of bytes, keeping it around for all
guests would be OK; but it's large, it would be better as a pointer,
since it's unused on the vast majority of guests.

 -George



[ovmf test] 184331: all pass - PUSHED

2024-01-12 Thread osstest service owner
flight 184331 ovmf real [real]
http://logs.test-lab.xenproject.org/osstest/logs/184331/

Perfect :-)
All tests in this flight passed as required
version targeted for testing:
 ovmf cfe48465724293abd0a7d92c2a72f8ee3cf15628
baseline version:
 ovmf 2bce85bd862e54cede2b59b48972c9f05e2e733d

Last test of basis   184328  2024-01-12 01:43:23 Z0 days
Testing same since   184331  2024-01-12 06:14:31 Z0 days1 attempts


People who touched revisions under test:
  Zhi Jin 

jobs:
 build-amd64-xsm  pass
 build-i386-xsm   pass
 build-amd64  pass
 build-i386   pass
 build-amd64-libvirt  pass
 build-i386-libvirt   pass
 build-amd64-pvopspass
 build-i386-pvops pass
 test-amd64-amd64-xl-qemuu-ovmf-amd64 pass
 test-amd64-i386-xl-qemuu-ovmf-amd64  pass



sg-report-flight on osstest.test-lab.xenproject.org
logs: /home/logs/logs
images: /home/logs/images

Logs, config files, etc. are available at
http://logs.test-lab.xenproject.org/osstest/logs

Explanation of these reports, and of osstest in general, is at
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README.email;hb=master
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README;hb=master

Test harness code can be found at
http://xenbits.xen.org/gitweb?p=osstest.git;a=summary


Pushing revision :

To xenbits.xen.org:/home/xen/git/osstest/ovmf.git
   2bce85bd86..cfe4846572  cfe48465724293abd0a7d92c2a72f8ee3cf15628 -> 
xen-tested-master



Re: [PATCH v2 2/2] xen/arm32: head: Improve logging in head.S

2024-01-12 Thread Michal Orzel



On 12/01/2024 11:58, Julien Grall wrote:
> 
> 
> On 12/01/2024 08:49, Michal Orzel wrote:
>> Hi Julien,
> 
> Hi Michal,
> 
>> On 11/01/2024 19:34, Julien Grall wrote:
>>>
>>>
>>> From: Julien Grall 
>>>
>>> The sequence to enable the MMU on arm32 is quite complex as we may need
>>> to jump to a temporary mapping to map Xen.
>>>
>>> Recently, we had one bug in the logic (see f5a49eb7f8b3 ("xen/arm32:
>>> head: Add mising isb in switch_to_runtime_mapping()") and it was
>>> a pain to debug because there are no logging.
>>>
>>> In order to improve the logging in the MMU switch we need to add
>>> support for early printk while running on the identity mapping
>>> and also on the temporary mapping.
>>>
>>> For the identity mapping, we have only the first page of Xen mapped.
>>> So all the strings should reside in the first page. For that purpose
>>> a new macro PRINT_ID is introduced.
>>>
>>> For the temporary mapping, the fixmap is already linked in the temporary
>>> area (and so does the UART). So we just need to update the register
>>> storing the UART address (i.e. r11) to point to the UART temporary
>>> mapping.
>>>
>>> Take the opportunity to introduce mov_w_on_cond in order to
>>> conditionally execute mov_w and avoid branches.
>>>
>>> Signed-off-by: Julien Grall 
>> Reviewed-by: Michal Orzel 
> 
> Thanks!
> 
>>>   /*
>>> @@ -29,16 +33,26 @@
>>>
>>>   #ifdef CONFIG_EARLY_PRINTK
>>>   /*
>>> - * Macro to print a string to the UART, if there is one.
>>> + * Macros to print a string to the UART, if there is one.
>>> + *
>>> + * There are multiple flavors:
>>> + *  - PRINT_SECT(section, string): The @string will be located in @section
>>> + *  - PRINT(): The string will be located in .rodata.str.
>>> + *  - PRINT_ID(): When Xen is running on the Identity Mapping, it is
>>> + *only possible to have a limited amount of Xen. This will create
>>> + *the string in .rodata.idmap which will always be mapped.
>>>*
>>>* Clobbers r0 - r3
>>>*/
>>> -#define PRINT(_s)   \
>>> -mov   r3, lr   ;\
>>> -adr_l r0, 98f  ;\
>>> -blasm_puts ;\
>>> -mov   lr, r3   ;\
>>> -RODATA_STR(98, _s)
>>> +#define PRINT_SECT(section, string) \
>>> +mov   r3, lr   ;\
>>> +adr_l r0, 98f  ;\
>>> +blasm_puts ;\
>>> +mov   lr, r3   ;\
>>> +RODATA_SECT(section, 98, string)
>>> +
>>> +#define PRINT(string) PRINT_SECT(.rodata.str, string)
>>> +#define PRINT_ID(string) PRINT_SECT(.rodata.idmap, string)
>> I know this is just a macro but does it make sense to have something MMU 
>> specific in common header?
>> I don't expect MPU to use it.
> For cache coloring, I would like secondary boot CPUs to start directly
> on the colored Xen. This means that any message used before enabling the
> MMU will need to be part of the .rodata.idmap.
> 
> I know that 32-bit is not in scope for the cache coloring series. But I
> would like to keep 32-bit and 64-bit boot logic fairly similar.
> 
> With that in mind, would you be happy if I keep PRINT_ID() in macros.h?
> Note that I would be ok to move in mmu/head.S and move back in macros.h
> later on. I just wanted to avoid code movement :).
With the above explanation it does not make sense to move it back and forth, so 
let's keep it as is.

~Michal



Re: [PATCH v2 3/3] x86/vmx: Disallow the use of inactivity states

2024-01-12 Thread Andrew Cooper
On 12/01/2024 11:04 am, Jan Beulich wrote:
> On 12.01.2024 11:43, Andrew Cooper wrote:
>> On 12/01/2024 10:37 am, Jan Beulich wrote:
>>> On 12.01.2024 00:13, Andrew Cooper wrote:
 --- a/xen/arch/x86/hvm/vmx/vmx.c
 +++ b/xen/arch/x86/hvm/vmx/vmx.c
 @@ -1551,7 +1551,10 @@ static void cf_check vmx_set_nonreg_state(struct 
 vcpu *v,
  {
  vmx_vmcs_enter(v);
  
 -__vmwrite(GUEST_ACTIVITY_STATE, nrs->vmx.activity_state);
 +if ( nrs->vmx.activity_state )
 +domain_crash(v->domain, "Attempt to set activity_state %#lx\n",
 + nrs->vmx.activity_state);
>>> Might be useful to log the offending vCPU here?
>> Already covered.  the innards of __domain_crash() does:
>>
>>     else if ( d == current->domain )
>>     {
>>         printk("Domain %d (vcpu#%d) crashed on cpu#%d:\n",
>>     ...
> Except that afaict v != current here at all times (at least as far as
> current use of the function goes).

Hmm.  That's irritating.

In this case, it's a dead logic path - hence why in v1 I simply deleted it.

But I would prefer not to have to rely on a human getting an error
message right in order to get proper logging.

I suppose what we really want is a vcpu_crash(), but this is now firmly
in the realms of the cleanup patch I still haven't had time to repost.

I think I'll extend this with %pv for now, which can be dropped when
vcpu_crash() appears.

~Andrew



[PATCH v4] x86/intel: ensure Global Performance Counter Control is setup correctly

2024-01-12 Thread Roger Pau Monne
When Architectural Performance Monitoring is available, the PERF_GLOBAL_CTRL
MSR contains per-counter enable bits that is ANDed with the enable bit in the
counter EVNTSEL MSR in order for a PMC counter to be enabled.

So far the watchdog code seems to have relied on the PERF_GLOBAL_CTRL enable
bits being set by default, but at least on some Intel Sapphire and Emerald
Rapids this is no longer the case, and Xen reports:

Testing NMI watchdog on all CPUs: 0 40 stuck

The first CPU on each package is started with PERF_GLOBAL_CTRL zeroed, so PMC0
doesn't start counting when the enable bit in EVNTSEL0 is set, due to the
relevant enable bit in PERF_GLOBAL_CTRL not being set.

Check and adjust PERF_GLOBAL_CTRL during CPU initialization so that all the
general-purpose PMCs are enabled.  Doing so brings the state of the package-BSP
PERF_GLOBAL_CTRL in line with the rest of the CPUs on the system.

Signed-off-by: Roger Pau Monné 
---
Changes since v3:
 - Revert previous change and require 2 counters.

Changes since v2:
 - Print message on invalid PERF_GLOBAL_CTRL.
 - Use a mask instead of type truncation.
 - Adjust the check to require a single counter instead of 2.

Changes since v1:
 - Do the adjustment of PERF_GLOBAL_CTRL even if the watchdog is not used, and
   enable all counters.
---
 xen/arch/x86/cpu/intel.c | 23 ++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/xen/arch/x86/cpu/intel.c b/xen/arch/x86/cpu/intel.c
index dfee64689ffe..b5490eb06e00 100644
--- a/xen/arch/x86/cpu/intel.c
+++ b/xen/arch/x86/cpu/intel.c
@@ -533,9 +533,30 @@ static void cf_check init_intel(struct cpuinfo_x86 *c)
init_intel_cacheinfo(c);
if (c->cpuid_level > 9) {
unsigned eax = cpuid_eax(10);
+   unsigned int cnt = (eax >> 8) & 0xff;
+
/* Check for version and the number of counters */
-   if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
+   if ((eax & 0xff) && (cnt > 1) && (cnt <= 32)) {
+   uint64_t global_ctrl;
+   unsigned int cnt_mask = (1UL << cnt) - 1;
+
+   /*
+* On (some?) Sapphire/Emerald Rapids platforms each
+* package-BSP starts with all the enable bits for the
+* general-purpose PMCs cleared.  Adjust so counters
+* can be enabled from EVNTSEL.
+*/
+   rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, global_ctrl);
+   if ((global_ctrl & cnt_mask) != cnt_mask) {
+   printk("CPU%u: invalid PERF_GLOBAL_CTRL: %#"
+  PRIx64 " adjusting to %#" PRIx64 "\n",
+  smp_processor_id(), global_ctrl,
+  global_ctrl | cnt_mask);
+   wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL,
+  global_ctrl | cnt_mask);
+   }
__set_bit(X86_FEATURE_ARCH_PERFMON, c->x86_capability);
+   }
}
 
if ( !cpu_has(c, X86_FEATURE_XTOPOLOGY) )
-- 
2.43.0




Re: [PATCH v3 21/34] xen/riscv: introduce p2m.h

2024-01-12 Thread Julien Grall

Hi Jan,

On 12/01/2024 11:06, Jan Beulich wrote:

On 12.01.2024 11:39, Julien Grall wrote:

On 22/12/2023 15:13, Oleksii Kurochko wrote:

Signed-off-by: Oleksii Kurochko 
---
Changes in V3:
   - add SPDX
   - drop unneeded for now p2m types.
   - return false in all functions implemented with BUG() inside.
   - update the commit message
---
Changes in V2:
   - Nothing changed. Only rebase.
---
   xen/arch/ppc/include/asm/p2m.h   |   3 +-
   xen/arch/riscv/include/asm/p2m.h | 102 +++
   2 files changed, 103 insertions(+), 2 deletions(-)
   create mode 100644 xen/arch/riscv/include/asm/p2m.h

diff --git a/xen/arch/ppc/include/asm/p2m.h b/xen/arch/ppc/include/asm/p2m.h
index 25ba054668..3bc05b7c05 100644
--- a/xen/arch/ppc/include/asm/p2m.h
+++ b/xen/arch/ppc/include/asm/p2m.h
@@ -50,8 +50,7 @@ static inline void memory_type_changed(struct domain *d)
   static inline int guest_physmap_mark_populate_on_demand(struct domain *d, 
unsigned long gfn,
   unsigned int order)
   {
-BUG_ON("unimplemented");
-return 1;
+return -EOPNOTSUPP;
   }
   
   static inline int guest_physmap_add_entry(struct domain *d,

diff --git a/xen/arch/riscv/include/asm/p2m.h b/xen/arch/riscv/include/asm/p2m.h
new file mode 100644
index 00..d270ef6635
--- /dev/null
+++ b/xen/arch/riscv/include/asm/p2m.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_RISCV_P2M_H__
+#define __ASM_RISCV_P2M_H__
+
+#include 
+
+#define paddr_bits PADDR_BITS
+
+/*
+ * List of possible type for each page in the p2m entry.
+ * The number of available bit per page in the pte for this purpose is 4 bits.
+ * So it's possible to only have 16 fields. If we run out of value in the
+ * future, it's possible to use higher value for pseudo-type and don't store
+ * them in the p2m entry.
+ */


This looks like a verbatim copy from Arm. Did you actually check RISC-V
has 4 bits available in the PTE to store this value?


+typedef enum {
+p2m_invalid = 0,/* Nothing mapped here */
+p2m_ram_rw, /* Normal read/write guest RAM */


s/guest/domain/ as this also applies for dom0.


+} p2m_type_t;
+
+#include 
+
+static inline int get_page_and_type(struct page_info *page,
+struct domain *domain,
+unsigned long type)
+{
+BUG();


I understand your goal with the BUG() but I find it risky. This is not a
problem right now, it is more when we will decide to have RISC-V
supported. You will have to go through all the BUG() to figure out which
one are warrant or not.

To reduce the load, I would recommend to switch to ASSERT_UNREACHABLE()
(or maybe introduced a different macro) that would lead to a crash on
debug build but propagate the error normally on production build.


Elsewhere BUG_ON("unimplemented") is used to indicate such cases.
Can't this be used here (and then uniformly elsewhere) as well?


I would prefer something that can be compiled out in production build. 
But I would be Ok with BUG_ON("...") this is at least clearer than a 
plain BUG().


--
Julien Grall



Re: [PATCH v3 21/34] xen/riscv: introduce p2m.h

2024-01-12 Thread Jan Beulich
On 12.01.2024 11:39, Julien Grall wrote:
> On 22/12/2023 15:13, Oleksii Kurochko wrote:
>> Signed-off-by: Oleksii Kurochko 
>> ---
>> Changes in V3:
>>   - add SPDX
>>   - drop unneeded for now p2m types.
>>   - return false in all functions implemented with BUG() inside.
>>   - update the commit message
>> ---
>> Changes in V2:
>>   - Nothing changed. Only rebase.
>> ---
>>   xen/arch/ppc/include/asm/p2m.h   |   3 +-
>>   xen/arch/riscv/include/asm/p2m.h | 102 +++
>>   2 files changed, 103 insertions(+), 2 deletions(-)
>>   create mode 100644 xen/arch/riscv/include/asm/p2m.h
>>
>> diff --git a/xen/arch/ppc/include/asm/p2m.h b/xen/arch/ppc/include/asm/p2m.h
>> index 25ba054668..3bc05b7c05 100644
>> --- a/xen/arch/ppc/include/asm/p2m.h
>> +++ b/xen/arch/ppc/include/asm/p2m.h
>> @@ -50,8 +50,7 @@ static inline void memory_type_changed(struct domain *d)
>>   static inline int guest_physmap_mark_populate_on_demand(struct domain *d, 
>> unsigned long gfn,
>>   unsigned int order)
>>   {
>> -BUG_ON("unimplemented");
>> -return 1;
>> +return -EOPNOTSUPP;
>>   }
>>   
>>   static inline int guest_physmap_add_entry(struct domain *d,
>> diff --git a/xen/arch/riscv/include/asm/p2m.h 
>> b/xen/arch/riscv/include/asm/p2m.h
>> new file mode 100644
>> index 00..d270ef6635
>> --- /dev/null
>> +++ b/xen/arch/riscv/include/asm/p2m.h
>> @@ -0,0 +1,102 @@
>> +/* SPDX-License-Identifier: GPL-2.0-only */
>> +#ifndef __ASM_RISCV_P2M_H__
>> +#define __ASM_RISCV_P2M_H__
>> +
>> +#include 
>> +
>> +#define paddr_bits PADDR_BITS
>> +
>> +/*
>> + * List of possible type for each page in the p2m entry.
>> + * The number of available bit per page in the pte for this purpose is 4 
>> bits.
>> + * So it's possible to only have 16 fields. If we run out of value in the
>> + * future, it's possible to use higher value for pseudo-type and don't store
>> + * them in the p2m entry.
>> + */
> 
> This looks like a verbatim copy from Arm. Did you actually check RISC-V 
> has 4 bits available in the PTE to store this value?
> 
>> +typedef enum {
>> +p2m_invalid = 0,/* Nothing mapped here */
>> +p2m_ram_rw, /* Normal read/write guest RAM */
> 
> s/guest/domain/ as this also applies for dom0.
> 
>> +} p2m_type_t;
>> +
>> +#include 
>> +
>> +static inline int get_page_and_type(struct page_info *page,
>> +struct domain *domain,
>> +unsigned long type)
>> +{
>> +BUG();
> 
> I understand your goal with the BUG() but I find it risky. This is not a 
> problem right now, it is more when we will decide to have RISC-V 
> supported. You will have to go through all the BUG() to figure out which 
> one are warrant or not.
> 
> To reduce the load, I would recommend to switch to ASSERT_UNREACHABLE() 
> (or maybe introduced a different macro) that would lead to a crash on 
> debug build but propagate the error normally on production build.

Elsewhere BUG_ON("unimplemented") is used to indicate such cases.
Can't this be used here (and then uniformly elsewhere) as well?

Jan



Re: [PATCH] x86/hvm: don't expose XENFEAT_hvm_pirqs by default

2024-01-12 Thread Roger Pau Monné
On Thu, Jan 11, 2024 at 05:47:30PM +, David Woodhouse wrote:
> On Wed, 2024-01-10 at 11:26 +0100, Jan Beulich wrote:
> > On 10.01.2024 10:53, Roger Pau Monne wrote:
> > > The HVM pirq feature allows routing interrupts from both physical and 
> > > emulated
> > > devices over event channels, this was done a performance improvement.  
> > > However
> > > its usage is fully undocumented, and the only reference implementation is 
> > > in
> > > Linux.  It defeats the purpose of local APIC hardware virtualization, 
> > > because
> > > when using it interrupts avoid the usage of the local APIC altogether.
> > 
> > So without sufficient APIC acceleration, isn't this arranging for degraded
> > performance then? IOW should the new default perhaps be dependent on the
> > degree of APIC acceleration?
> 
> In fact Linux already declines to use MSI → PIRQ routing if APIC
> acceleration is enabled.
> 
> static void __init xen_hvm_msi_init(void)
> {
> if (!apic_is_disabled) {
> /*
>  * If hardware supports (x2)APIC virtualization (as indicated
>  * by hypervisor's leaf 4) then we don't need to use pirqs/
>  * event channels for MSI handling and instead use regular
>  * APIC processing
>  */
> uint32_t eax = cpuid_eax(xen_cpuid_base() + 4);
> 
> if (((eax & XEN_HVM_CPUID_X2APIC_VIRT) && x2apic_mode) ||
> ((eax & XEN_HVM_CPUID_APIC_ACCESS_VIRT) && 
> boot_cpu_has(X86_FEATURE_APIC)))
> return;
> }
> xen_setup_pci_msi();
> }
> 
> > > It has also been reported to not work properly with certain devices, at 
> > > least
> > > when using some AMD GPUs Linux attempts to route interrupts over event
> > > channels, but Xen doesn't correctly detect such routing, which leads to 
> > > the
> > > hypervisor complaining with:
> > > 
> > > (XEN) d15v0: Unsupported MSI delivery mode 7 for Dom15
> > > 
> > > When MSIs are attempted to be routed over event channels the entry 
> > > delivery
> > > mode is set to ExtINT, but Xen doesn't detect such routing and attempts to
> > > inject the interrupt following the native MSI path, and the ExtINT 
> > > delivery
> > > mode is not supported.
> > 
> > Shouldn't this be properly addressed nevertheless? The way it's described
> > it sounds as if MSI wouldn't work at all this way; I can't spot why the
> > issue would only be "with certain devices". Yet that in turn doesn't look
> > to be very likely - pass-through use cases, in particular SR-IOV ones,
> > would certainly have noticed.
> 
> I agree. The MSI to PIRQ routing thing is *awful*, especially the way
> that Xen/QEMU snoops on writes to the MSI table while the target is
> *masked*, and then Xen unmasks the MSI instead of the guest doing so.
> 
> But it does work, and there are three implementations of it on the
> hypervisor side now (Xen itself, QEMU and the Nitro hypervisor).

There's only one implementation in Xen, that's split between the
hypervisor and QEMU.  IOW: it's not like the QEMU and the hypervisor
sides are independent.

It's also only used by Linux, I don't know of any other guest having
implemented support for them (thankfully I should say).

> We
> should fix the bug which is being reported, but I don't see why it's
> necessary to completely disable the feature.

It's not just this bug, the feature is fully undocumented, and every
time there's an issue reported against interrupt delivery on HVM Linux
we need to reverse engineer how this is supposed to work.

Not to mention the MSI breakage it introduces by re-using the MSI
data and address fields for Xen specific purposes.

Note that this commit is not removing the code, just disabling it by
default.  Users can still enable it using hvm_pirq option on xl.cfg
(and toolstacks can use the domctl create flag to enable it).

IMO if someone whats to make a case for having HVM PIRQs enabled by
default, we first need documentation about how it's supposed to work,
plus all the reported bugs fixed.  I have to admit I would probably be
reluctant to enable it by default even then.

Apart from the original issue, Xenia has also reported that when
having the option enabled they see some unexpected scheduling
imbalance, that's gone when the option is disabled:

https://lore.kernel.org/xen-devel/6fe776cd-3fa6-421f-9d02-9350e85d5...@amd.com/

Regards, Roger.



Re: [PATCH v2] xen/arm: bootfdt: Harden handling of malformed mem reserve map

2024-01-12 Thread Julien Grall

Hi,

On 12/01/2024 08:56, Michal Orzel wrote:



On 12/01/2024 00:24, Shawn Anastasio wrote:



The early_print_info routine in bootfdt.c incorrectly stores the result
of a call to fdt_num_mem_rsv() in an unsigned int, which results in the
negative error code being interpreted incorrectly in a subsequent loop
in the case where the device tree is malformed. Fix this by properly
checking the return code for an error and calling panic().

Signed-off-by: Shawn Anastasio 

Reviewed-by: Michal Orzel 


Committed.

Cheers,

--
Julien Grall



Re: [PATCH v2] xen/common: Don't dereference overlay_node after checking that it is NULL

2024-01-12 Thread Julien Grall

Hi Javi,

On 11/01/2024 12:09, Javi Merino wrote:

In remove_nodes(), overlay_node is dereferenced when printing the
error message even though it is known to be NULL.  Return without
printing as an error message is already printed by the caller.

The semantic patch that spots this code is available in

https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/scripts/coccinelle/null/deref_null.cocci?id=1f874787ed9a2d78ed59cb21d0d90ac0178eceb0

Fixes: 7e5c4a8b86f1 ("xen/arm: Implement device tree node removal 
functionalities")
Signed-off-by: Javi Merino 


Acked-by: Julien Grall 

And committed.

Cheers,

--
Julien Grall



Re: [PATCH v2 3/3] x86/vmx: Disallow the use of inactivity states

2024-01-12 Thread Jan Beulich
On 12.01.2024 11:43, Andrew Cooper wrote:
> On 12/01/2024 10:37 am, Jan Beulich wrote:
>> On 12.01.2024 00:13, Andrew Cooper wrote:
>>> --- a/xen/arch/x86/hvm/vmx/vmx.c
>>> +++ b/xen/arch/x86/hvm/vmx/vmx.c
>>> @@ -1551,7 +1551,10 @@ static void cf_check vmx_set_nonreg_state(struct 
>>> vcpu *v,
>>>  {
>>>  vmx_vmcs_enter(v);
>>>  
>>> -__vmwrite(GUEST_ACTIVITY_STATE, nrs->vmx.activity_state);
>>> +if ( nrs->vmx.activity_state )
>>> +domain_crash(v->domain, "Attempt to set activity_state %#lx\n",
>>> + nrs->vmx.activity_state);
>> Might be useful to log the offending vCPU here?
> 
> Already covered.  the innards of __domain_crash() does:
> 
>     else if ( d == current->domain )
>     {
>         printk("Domain %d (vcpu#%d) crashed on cpu#%d:\n",
>     ...

Except that afaict v != current here at all times (at least as far as
current use of the function goes).

Jan



Re: [PATCH v2 2/2] xen/arm32: head: Improve logging in head.S

2024-01-12 Thread Julien Grall




On 12/01/2024 08:49, Michal Orzel wrote:

Hi Julien,


Hi Michal,


On 11/01/2024 19:34, Julien Grall wrote:



From: Julien Grall 

The sequence to enable the MMU on arm32 is quite complex as we may need
to jump to a temporary mapping to map Xen.

Recently, we had one bug in the logic (see f5a49eb7f8b3 ("xen/arm32:
head: Add mising isb in switch_to_runtime_mapping()") and it was
a pain to debug because there are no logging.

In order to improve the logging in the MMU switch we need to add
support for early printk while running on the identity mapping
and also on the temporary mapping.

For the identity mapping, we have only the first page of Xen mapped.
So all the strings should reside in the first page. For that purpose
a new macro PRINT_ID is introduced.

For the temporary mapping, the fixmap is already linked in the temporary
area (and so does the UART). So we just need to update the register
storing the UART address (i.e. r11) to point to the UART temporary
mapping.

Take the opportunity to introduce mov_w_on_cond in order to
conditionally execute mov_w and avoid branches.

Signed-off-by: Julien Grall 

Reviewed-by: Michal Orzel 


Thanks!


  /*
@@ -29,16 +33,26 @@

  #ifdef CONFIG_EARLY_PRINTK
  /*
- * Macro to print a string to the UART, if there is one.
+ * Macros to print a string to the UART, if there is one.
+ *
+ * There are multiple flavors:
+ *  - PRINT_SECT(section, string): The @string will be located in @section
+ *  - PRINT(): The string will be located in .rodata.str.
+ *  - PRINT_ID(): When Xen is running on the Identity Mapping, it is
+ *only possible to have a limited amount of Xen. This will create
+ *the string in .rodata.idmap which will always be mapped.
   *
   * Clobbers r0 - r3
   */
-#define PRINT(_s)   \
-mov   r3, lr   ;\
-adr_l r0, 98f  ;\
-blasm_puts ;\
-mov   lr, r3   ;\
-RODATA_STR(98, _s)
+#define PRINT_SECT(section, string) \
+mov   r3, lr   ;\
+adr_l r0, 98f  ;\
+blasm_puts ;\
+mov   lr, r3   ;\
+RODATA_SECT(section, 98, string)
+
+#define PRINT(string) PRINT_SECT(.rodata.str, string)
+#define PRINT_ID(string) PRINT_SECT(.rodata.idmap, string)

I know this is just a macro but does it make sense to have something MMU 
specific in common header?
I don't expect MPU to use it.
For cache coloring, I would like secondary boot CPUs to start directly 
on the colored Xen. This means that any message used before enabling the 
MMU will need to be part of the .rodata.idmap.


I know that 32-bit is not in scope for the cache coloring series. But I 
would like to keep 32-bit and 64-bit boot logic fairly similar.


With that in mind, would you be happy if I keep PRINT_ID() in macros.h? 
Note that I would be ok to move in mmu/head.S and move back in macros.h 
later on. I just wanted to avoid code movement :).






  /*
   * Macro to print the value of register \rb
@@ -54,6 +68,7 @@

  #else /* CONFIG_EARLY_PRINTK */
  #define PRINT(s)
+#define PRINT_ID(s)

  .macro print_reg rb
  .endm
diff --git a/xen/arch/arm/include/asm/asm_defns.h 
b/xen/arch/arm/include/asm/asm_defns.h
index 29a9dbb002fa..ec803c0a370c 100644
--- a/xen/arch/arm/include/asm/asm_defns.h
+++ b/xen/arch/arm/include/asm/asm_defns.h
@@ -22,11 +22,13 @@
  # error "unknown ARM variant"
  #endif

-#define RODATA_STR(label, msg)  \
-.pushsection .rodata.str, "aMS", %progbits, 1 ; \
+#define RODATA_SECT(section, label, msg) \
+.pushsection section, "aMS", %progbits, 1 ; \
  label:  .asciz msg; \
  .popsection

+#define RODATA_STR(label, msg) RODATA_SECT(.rodata.str, label, msg)
+
  #define ASM_INT(label, val) \
  .p2align 2; \
  label: .long (val); \
diff --git a/xen/arch/arm/include/asm/early_printk.h 
b/xen/arch/arm/include/asm/early_printk.h
index c5149b2976da..c1e84f8b0009 100644
--- a/xen/arch/arm/include/asm/early_printk.h
+++ b/xen/arch/arm/include/asm/early_printk.h
@@ -19,6 +19,9 @@
  #define EARLY_UART_VIRTUAL_ADDRESS \
  (FIXMAP_ADDR(FIXMAP_CONSOLE) + (CONFIG_EARLY_UART_BASE_ADDRESS & 
~PAGE_MASK))

+#define TEMPORARY_EARLY_UART_VIRTUAL_ADDRESS \
+(TEMPORARY_FIXMAP_ADDR(FIXMAP_CONSOLE) + (CONFIG_EARLY_UART_BASE_ADDRESS & 
~PAGE_MASK))
+
  #endif /* !CONFIG_EARLY_PRINTK */

  #endif
diff --git a/xen/arch/arm/include/asm/mmu/layout.h 
b/xen/arch/arm/include/asm/mmu/layout.h
index eac7eef885d6..a3b546465b5a 100644
--- a/xen/arch/arm/include/asm/mmu/layout.h
+++ b/xen/arch/arm/include/asm/mmu/layout.h
@@ -116,6 +116,10 @@
(TEMPORARY_AREA_FIRST_SLOT << XEN_PT_LEVEL_SHIFT(1)))

  #define TEMPORARY_XEN_VIRT_STARTTEMPORARY_AREA_ADDR(XEN_VIRT_START)
+#define TEMPORARY_FIXMAP_VIRT_START TEMPORARY_AREA_ADDR(FIXMAP_VIRT_START)
+
+#define TEMPORARY_FIXMAP_ADDR(n)

Re: [PATCH v2 1/2] xen/arm32: head: Rework how the fixmap and early UART mapping are prepared

2024-01-12 Thread Julien Grall

Hi Michal,

On 12/01/2024 07:46, Michal Orzel wrote:

On 11/01/2024 19:34, Julien Grall wrote:



From: Julien Grall 

Since commit 5e213f0f4d2c ("xen/arm32: head: Widen the use of the
temporary mapping"), boot_second (used to cover regions like Xen and
the fixmap) will not be mapped if the identity mapping overlap.

So it is ok to prepare the fixmap table and link it in boot_second
earlier. With that, the fixmap can also be used earlier via the
temporary mapping.

Therefore split setup_fixmap() in two:
 * The table is now linked in create_page_tables() because
   the boot page tables needs to be recreated for every CPU.
 * The early UART mapping is only added for the boot CPU0 as the
   fixmap table is not cleared when secondary CPUs boot.

Signed-off-by: Julien Grall 

Reviewed-by: Michal Orzel 


Thanks.



with below 2 adjustments:


I will address them on commit.


+ */
+mov_w r0, EARLY_UART_VIRTUAL_ADDRESS
+create_mapping_entry xen_fixmap, r0, r11, type=PT_DEV_L3

Would you mind listing r11 in the Input section of a comment?


I have added:

r11: UART physical address



~Michal


--
Julien Grall



Re: [PATCH v2 3/3] x86/vmx: Disallow the use of inactivity states

2024-01-12 Thread Andrew Cooper
On 12/01/2024 10:37 am, Jan Beulich wrote:
> On 12.01.2024 00:13, Andrew Cooper wrote:
>> Right now, vvmx will blindly copy L12's ACTIVITY_STATE into the L02 VMCS and
>> enter the vCPU.  Luckily for us, nested-virt is explicitly unsupported for
>> security bugs.
>>
>> The inactivity states are HLT, SHUTDOWN and WAIT-FOR-SIPI, and as noted by 
>> the
>> SDM in Vol3 27.7 "Special Features of VM Entry":
>>
>>   If VM entry ends with the logical processor in an inactive activity state,
>>   the VM entry generates any special bus cycle that is normally generated 
>> when
>>   that activity state is entered from the active state.
>>
>> Also,
>>
>>   Some activity states unconditionally block certain events.
>>
>> I.e. A VMEntry with ACTIVITY=SHUTDOWN will initiate a platform reset, while a
>> VMEntry with ACTIVITY=WAIT-FOR-SIPI will really block everything other than
>> SIPIs.
>>
>> Both of these activity states are for the TXT ACM to use, not for regular
>> hypervisors, and Xen doesn't support dropping the HLT intercept either.
>>
>> There are two paths in Xen which operate on ACTIVITY_STATE.
>>
>> 1) The vmx_{get,set}_nonreg_state() helpers for VM-Fork.
>>
>>As regular VMs can't use any inactivity states, this is just duplicating
>>the 0 from construct_vmcs().  Retain the ability to query activity_state,
>>but crash the domain on any attempt to set an inactivity state.
>>
>> 2) Nested virt, because of ACTIVITY_STATE in vmcs_gstate_field[].
>>
>>Explicitly hide the inactivity states in the guest's view of MSR_VMX_MISC,
>>and remove ACTIVITY_STATE from vmcs_gstate_field[].
>>
>>In virtual_vmentry(), we should trigger a VMEntry failure for the use of
>>any inactivity states, but there's no support for that in the code at all
>>so leave a TODO for when we finally start working on nested-virt in
>>earnest.
>>
>> Reported-by: Reima Ishii 
>> Signed-off-by: Andrew Cooper 
> Reviewed-by: Jan Beulich 

Thanks.

> with one remark/suggestion:
>
>> --- a/xen/arch/x86/hvm/vmx/vmx.c
>> +++ b/xen/arch/x86/hvm/vmx/vmx.c
>> @@ -1551,7 +1551,10 @@ static void cf_check vmx_set_nonreg_state(struct vcpu 
>> *v,
>>  {
>>  vmx_vmcs_enter(v);
>>  
>> -__vmwrite(GUEST_ACTIVITY_STATE, nrs->vmx.activity_state);
>> +if ( nrs->vmx.activity_state )
>> +domain_crash(v->domain, "Attempt to set activity_state %#lx\n",
>> + nrs->vmx.activity_state);
> Might be useful to log the offending vCPU here?

Already covered.  the innards of __domain_crash() does:

    else if ( d == current->domain )
    {
        printk("Domain %d (vcpu#%d) crashed on cpu#%d:\n",
    ...

~Andrew



Re: [PATCH v4 03/11] xen/arm: add Dom0 cache coloring support

2024-01-12 Thread Michal Orzel



On 12/01/2024 10:24, Michal Orzel wrote:
> 
> 
> Hi Carlo,
> 
> On 23/01/2023 16:47, Carlo Nonato wrote:
>>
>>
>> From: Luca Miccio 
>>
>> This commit allows the user to set the cache coloring configuration for
>> Dom0 via a command line parameter.
>> Since cache coloring and static memory are incompatible, direct mapping
>> Dom0 isn't possible when coloring is enabled.
>>
>> Here is also introduced a common configuration syntax for cache colors.
>>
>> Signed-off-by: Luca Miccio 
>> Signed-off-by: Marco Solieri 
>> Signed-off-by: Carlo Nonato 
>> ---
>> v4:
>> - dom0 colors are dynamically allocated as for any other domain
>>   (colors are duplicated in dom0_colors and in the new array, but logic
>>   is simpler)
>> ---
>>  docs/misc/arm/cache-coloring.rst| 32 ++---
>>  xen/arch/arm/domain_build.c | 17 +++--
>>  xen/arch/arm/include/asm/llc_coloring.h |  4 
>>  xen/arch/arm/llc_coloring.c | 14 +++
>>  4 files changed, 62 insertions(+), 5 deletions(-)
>>
>> diff --git a/docs/misc/arm/cache-coloring.rst 
>> b/docs/misc/arm/cache-coloring.rst
>> index 0244d2f606..c2e0e87426 100644
>> --- a/docs/misc/arm/cache-coloring.rst
>> +++ b/docs/misc/arm/cache-coloring.rst
>> @@ -83,12 +83,38 @@ manually set the way size it's left for the user to 
>> overcome failing situations
>>  or for debugging/testing purposes. See `Coloring parameters and domain
>>  configurations`_ section for more information on that.
>>
>> +Colors selection format
>> +***
>> +
>> +Regardless of the memory pool that has to be colored (Xen, Dom0/DomUs),
>> +the color selection can be expressed using the same syntax. In particular a
>> +comma-separated list of colors or ranges of colors is used.
>> +Ranges are hyphen-separated intervals (such as `0-4`) and are inclusive on 
>> both
>> +sides.
>> +
>> +Note that:
>> + - no spaces are allowed between values.
>> + - no overlapping ranges or duplicated colors are allowed.
>> + - values must be written in ascending order.
>> +
>> +Examples:
>> +
>> ++-+---+
>> +|**Configuration**|**Actual selection**   |
>> ++-+---+
>> +|  1-2,5-8| [1, 2, 5, 6, 7, 8]|
>> ++-+---+
>> +|  4-8,10,11,12   | [4, 5, 6, 7, 8, 10, 11, 12]   |
>> ++-+---+
>> +|  0  | [0]   |
>> ++-+---+
>> +
>>  Coloring parameters and domain configurations
>>  *
>>
>> -LLC way size (as previously discussed) can be set using the appropriate 
>> command
>> -line parameter. See the relevant documentation in
>> -"docs/misc/xen-command-line.pandoc".
>> +LLC way size (as previously discussed) and Dom0 colors can be set using the
>> +appropriate command line parameters. See the relevant documentation
>> +in "docs/misc/xen-command-line.pandoc".
>>
>>  **Note:** If no color configuration is provided for a domain, the default 
>> one,
>>  which corresponds to all available colors, is used instead.
>> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
>> index f35f4d2456..093d4ad6f6 100644
>> --- a/xen/arch/arm/domain_build.c
>> +++ b/xen/arch/arm/domain_build.c
>> @@ -2,6 +2,7 @@
>>  #include 
>>  #include 
>>  #include 
>> +#include 
>>  #include 
>>  #include 
>>  #include 
>> @@ -4014,7 +4015,10 @@ static int __init construct_dom0(struct domain *d)
>>  /* type must be set before allocate_memory */
>>  d->arch.type = kinfo.type;
>>  #endif
>> -allocate_memory_11(d, &kinfo);
>> +if ( is_domain_llc_colored(d) )
>> +allocate_memory(d, &kinfo);
> While doing some checks, I realized that the issue from previous series is 
> still present.
> Given that dom0 is hwdom, how are you going to prevent conflicts between 
> allocated RAM and HW resources
> that are to be mapped to dom0?
Sorry. I answered to the wrong revision :)
Anyway, the remark still applies to v5.

~Michal



Re: [PATCH v3 21/34] xen/riscv: introduce p2m.h

2024-01-12 Thread Julien Grall

Hi Oleksii,

On 22/12/2023 15:13, Oleksii Kurochko wrote:

Signed-off-by: Oleksii Kurochko 
---
Changes in V3:
  - add SPDX
  - drop unneeded for now p2m types.
  - return false in all functions implemented with BUG() inside.
  - update the commit message
---
Changes in V2:
  - Nothing changed. Only rebase.
---
  xen/arch/ppc/include/asm/p2m.h   |   3 +-
  xen/arch/riscv/include/asm/p2m.h | 102 +++
  2 files changed, 103 insertions(+), 2 deletions(-)
  create mode 100644 xen/arch/riscv/include/asm/p2m.h

diff --git a/xen/arch/ppc/include/asm/p2m.h b/xen/arch/ppc/include/asm/p2m.h
index 25ba054668..3bc05b7c05 100644
--- a/xen/arch/ppc/include/asm/p2m.h
+++ b/xen/arch/ppc/include/asm/p2m.h
@@ -50,8 +50,7 @@ static inline void memory_type_changed(struct domain *d)
  static inline int guest_physmap_mark_populate_on_demand(struct domain *d, 
unsigned long gfn,
  unsigned int order)
  {
-BUG_ON("unimplemented");
-return 1;
+return -EOPNOTSUPP;
  }
  
  static inline int guest_physmap_add_entry(struct domain *d,

diff --git a/xen/arch/riscv/include/asm/p2m.h b/xen/arch/riscv/include/asm/p2m.h
new file mode 100644
index 00..d270ef6635
--- /dev/null
+++ b/xen/arch/riscv/include/asm/p2m.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_RISCV_P2M_H__
+#define __ASM_RISCV_P2M_H__
+
+#include 
+
+#define paddr_bits PADDR_BITS
+
+/*
+ * List of possible type for each page in the p2m entry.
+ * The number of available bit per page in the pte for this purpose is 4 bits.
+ * So it's possible to only have 16 fields. If we run out of value in the
+ * future, it's possible to use higher value for pseudo-type and don't store
+ * them in the p2m entry.
+ */


This looks like a verbatim copy from Arm. Did you actually check RISC-V 
has 4 bits available in the PTE to store this value?



+typedef enum {
+p2m_invalid = 0,/* Nothing mapped here */
+p2m_ram_rw, /* Normal read/write guest RAM */


s/guest/domain/ as this also applies for dom0.


+} p2m_type_t;
+
+#include 
+
+static inline int get_page_and_type(struct page_info *page,
+struct domain *domain,
+unsigned long type)
+{
+BUG();


I understand your goal with the BUG() but I find it risky. This is not a 
problem right now, it is more when we will decide to have RISC-V 
supported. You will have to go through all the BUG() to figure out which 
one are warrant or not.


To reduce the load, I would recommend to switch to ASSERT_UNREACHABLE() 
(or maybe introduced a different macro) that would lead to a crash on 
debug build but propagate the error normally on production build.


Of course, if you can't propagate an error, then the right course of 
action is a BUG(). But I expect this case to be limited.


[...]


+static inline mfn_t gfn_to_mfn(struct domain *d, gfn_t gfn)
+{
+BUG();
+return _mfn(0);


This wants to be INVALID_MFN.

[...]


--
Julien Grall



Re: [PATCH v2 3/3] x86/vmx: Disallow the use of inactivity states

2024-01-12 Thread Jan Beulich
On 12.01.2024 00:13, Andrew Cooper wrote:
> Right now, vvmx will blindly copy L12's ACTIVITY_STATE into the L02 VMCS and
> enter the vCPU.  Luckily for us, nested-virt is explicitly unsupported for
> security bugs.
> 
> The inactivity states are HLT, SHUTDOWN and WAIT-FOR-SIPI, and as noted by the
> SDM in Vol3 27.7 "Special Features of VM Entry":
> 
>   If VM entry ends with the logical processor in an inactive activity state,
>   the VM entry generates any special bus cycle that is normally generated when
>   that activity state is entered from the active state.
> 
> Also,
> 
>   Some activity states unconditionally block certain events.
> 
> I.e. A VMEntry with ACTIVITY=SHUTDOWN will initiate a platform reset, while a
> VMEntry with ACTIVITY=WAIT-FOR-SIPI will really block everything other than
> SIPIs.
> 
> Both of these activity states are for the TXT ACM to use, not for regular
> hypervisors, and Xen doesn't support dropping the HLT intercept either.
> 
> There are two paths in Xen which operate on ACTIVITY_STATE.
> 
> 1) The vmx_{get,set}_nonreg_state() helpers for VM-Fork.
> 
>As regular VMs can't use any inactivity states, this is just duplicating
>the 0 from construct_vmcs().  Retain the ability to query activity_state,
>but crash the domain on any attempt to set an inactivity state.
> 
> 2) Nested virt, because of ACTIVITY_STATE in vmcs_gstate_field[].
> 
>Explicitly hide the inactivity states in the guest's view of MSR_VMX_MISC,
>and remove ACTIVITY_STATE from vmcs_gstate_field[].
> 
>In virtual_vmentry(), we should trigger a VMEntry failure for the use of
>any inactivity states, but there's no support for that in the code at all
>so leave a TODO for when we finally start working on nested-virt in
>earnest.
> 
> Reported-by: Reima Ishii 
> Signed-off-by: Andrew Cooper 

Reviewed-by: Jan Beulich 
with one remark/suggestion:

> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -1551,7 +1551,10 @@ static void cf_check vmx_set_nonreg_state(struct vcpu 
> *v,
>  {
>  vmx_vmcs_enter(v);
>  
> -__vmwrite(GUEST_ACTIVITY_STATE, nrs->vmx.activity_state);
> +if ( nrs->vmx.activity_state )
> +domain_crash(v->domain, "Attempt to set activity_state %#lx\n",
> + nrs->vmx.activity_state);

Might be useful to log the offending vCPU here?

Jan



Re: [PATCH v2 2/3] x86/vmx: Fix IRQ handling for EXIT_REASON_INIT

2024-01-12 Thread Jan Beulich
On 12.01.2024 00:13, Andrew Cooper wrote:
> When receiving an INIT, a prior bugfix tried to ignore the INIT and continue
> onwards.
> 
> Unfortunately it's not safe to return at that point in vmx_vmexit_handler().
> Just out of context in the first hunk is a local_irqs_enabled() which is
> depended-upon by the return-to-guest path, causing the following checklock
> failure in debug builds:
> 
>   (XEN) Error: INIT received - ignoring
>   (XEN) CHECKLOCK FAILURE: prev irqsafe: 0, curr irqsafe 1
>   (XEN) Xen BUG at common/spinlock.c:132
>   (XEN) [ Xen-4.19-unstable  x86_64  debug=y  Tainted: H  ]
>   ...
>   (XEN) Xen call trace:
>   (XEN)[] R check_lock+0xcd/0xe1
>   (XEN)[] F _spin_lock+0x1b/0x60
>   (XEN)[] F pt_update_irq+0x32/0x3bb
>   (XEN)[] F vmx_intr_assist+0x3b/0x51d
>   (XEN)[] F vmx_asm_vmexit_handler+0xf7/0x210
> 
> Luckily, this is benign in release builds.  Accidentally having IRQs disabled
> when trying to take an IRQs-on lock isn't a deadlock-vulnerable pattern.
> 
> Drop the problematic early return.  In hindsight, it's wrong to skip other
> normal VMExit steps.
> 
> Fixes: b1f11273d5a7 ("x86/vmx: Don't spuriously crash the domain when INIT is 
> received")
> Reported-by: Reima ISHII 
> Signed-off-by: Andrew Cooper 

Reviewed-by: Jan Beulich 





Re: [PATCH v2 1/3] x86/vmx: Collect all emtpy VMExit cases together

2024-01-12 Thread Jan Beulich
On 12.01.2024 00:13, Andrew Cooper wrote:
> ... rather than having them spread out.  Explain consicely why each is empty.
> 
> No functional change.
> 
> Signed-off-by: Andrew Cooper 

Reviewed-by: Jan Beulich 





Re: [PATCH v1-alt] xen/livepatch: Make check_for_livepatch_work() faster in the common case

2024-01-12 Thread Jan Beulich
On 11.01.2024 21:11, Andrew Cooper wrote:
> When livepatching is enabled, this function is used all the time.  Really do
> check the fastpath first, and annotate it likely() as this is the right answer
> 100% of the time (to many significant figures).  This cuts out 3 pointer
> dereferences in the "nothing to do path".
> 
> However, GCC still needs some help to persuade it not to set the full stack
> frame (6 spilled registers, 3 slots of locals) even on the fastpath.
> 
> Create a new check_for_livepatch_work() with the fastpath only, and make the
> "new" do_livepatch_work() noinline.  This causes the fastpath to need no stack
> frame, making it faster still.
> 
> Signed-off-by: Andrew Cooper 

Reviewed-by: Jan Beulich 





Re: [PATCH v5 08/13] xen/page_alloc: introduce preserved page flags macro

2024-01-12 Thread Jan Beulich
On 12.01.2024 11:01, Carlo Nonato wrote:
> On Mon, Jan 8, 2024 at 6:08 PM Jan Beulich  wrote:
>> On 02.01.2024 10:51, Carlo Nonato wrote:
>>> PGC_static and PGC_extra are flags that needs to be preserved when assigning
>>> a page. Define a new macro that groups those flags and use it instead of
>>> or'ing every time.
>>>
>>> The new macro is used also in free_heap_pages() allowing future commits to
>>> extended it with other flags that must stop merging, as it now works for
>>> PGC_static. PGC_extra is no harm here since it's only ever being set on
>>> allocated pages.
>>
>> Is it? I can't see where free_domheap_pages() would clear it before calling
>> free_heap_pages(). Or wait, that may happen in mark_page_free(), but then
>> PGC_static would be cleared there, too. I must be missing something.
>>
>>> --- a/xen/common/page_alloc.c
>>> +++ b/xen/common/page_alloc.c
>>> @@ -158,6 +158,8 @@
>>>  #define PGC_static 0
>>>  #endif
>>>
>>> +#define preserved_flags (PGC_extra | PGC_static)
>>
>> I think this wants to (a) have a PGC_ prefix and (b) as a #define be all
>> capitals.
>>
>>> @@ -1504,7 +1506,7 @@ static void free_heap_pages(
>>>  /* Merge with predecessor block? */
>>>  if ( !mfn_valid(page_to_mfn(predecessor)) ||
>>>   !page_state_is(predecessor, free) ||
>>> - (predecessor->count_info & PGC_static) ||
>>> + (predecessor->count_info & preserved_flags) ||
>>>   (PFN_ORDER(predecessor) != order) ||
>>>   (page_to_nid(predecessor) != node) )
>>>  break;
>>> @@ -1528,7 +1530,7 @@ static void free_heap_pages(
>>>  /* Merge with successor block? */
>>>  if ( !mfn_valid(page_to_mfn(successor)) ||
>>>   !page_state_is(successor, free) ||
>>> - (successor->count_info & PGC_static) ||
>>> + (successor->count_info & preserved_flags) ||
>>>   (PFN_ORDER(successor) != order) ||
>>>   (page_to_nid(successor) != node) )
>>>  break;
>>
>> Irrespective of the comment at the top, this looks like an abuse of the
>> new constant: There's nothing inherently making preserved flags also
>> suppress merging (assuming it was properly checked that both sided have
>> the same flags set/clear).
> 
> Sorry, I may have misinterpreted your comments on the previous version of the
> series (I know it was a really long time ago)
> 
> https://patchew.org/Xen/20230123154735.74832-1-carlo.non...@minervasys.tech/20230123154735.74832-8-carlo.non...@minervasys.tech/#c843b031-52f7-056d-e8c0-75fe9c426...@suse.com
> 
> Anyway, would the solution here be to have two distinct #define? One for
> suppress merging and the other for preserved flags. This would probably also
> remove any confusion with the usage of PGC_extra.

That's one way to deal with this. Another would be to refine the above
checks, such that both buddies' preserved flags are actually compared,
and merging be suppressed if they're different. Then going with a single
#define would imo be quite okay.

Jan



Re: [PATCH v2] x86/intel: ensure Global Performance Counter Control is setup correctly

2024-01-12 Thread Jan Beulich
On 12.01.2024 11:08, Roger Pau Monné wrote:
> On Fri, Jan 12, 2024 at 08:42:27AM +0100, Jan Beulich wrote:
>> On 11.01.2024 17:53, Roger Pau Monné wrote:
>>> On Thu, Jan 11, 2024 at 04:52:04PM +0100, Jan Beulich wrote:
 On 11.01.2024 15:15, Roger Pau Monné wrote:
> On Thu, Jan 11, 2024 at 03:01:01PM +0100, Jan Beulich wrote:
>> On 11.01.2024 13:22, Roger Pau Monné wrote:
>>> Oh, indeed, can adjust on this same patch if that's OK (seeing as the
>>> issue was already there previous to my change).
>>
>> Well, I'm getting the impression that it was deliberate there, i.e. set
>> setting of the feature flag may want to remain thus constrained.
>
> Hm, I find it weird, but the original commit message doesn't help at
> all.  Xen itself only uses PMC0, and I don't find any other
> justification in the current code to require at least 2 counters in
> order to expose arch performance monitoring to be present.
>
> Looking at the SDM vol3, the figures there only contain PMC0 and PMC1,
> so someone only reading that manual might assume there must always be
> 2 global PMCs?

 That may have been the impression at the time. It may have been wrong
 already back then, or ...

> (vol4 clarifies the that the number of global PMCs is variable).

 ... it may have been clarified in the SDM later on. My vague guess is
 that the > 1 check was to skip what may have been "obviously buggy"
 back at the time.
>>>
>>> Let me know if you are OK with the adjustment in v3, or whether you
>>> would rather leave the > 1 check as-is (or maybe adjust in a different
>>> patch).
>>
>> Well, I haven't been able to make up my mind as to whether the original
>> check was wrong. Without clear indication, I think we should retain the
>> original behavior by having the __set_bit() gated by an additional if().
>> Then, since the line needs touching anyway, a further question would be
>> whether to properly switch to setup_force_cpu_cap() at the same time.
> 
> Having looked at Linux, it has exactly the same check for > 1, which I
> guess is to be expected since the code in Xen is quite likely adapted
> from the code in Linux.
> 
> Overall, it might be best to leave the check as > 1.  It's possible (as
> I think you also mention in a previous email) that there's simply no
> hardware with 1 counter.  This might no longer be true when
> virtualized, but given the current checks in both Xen and Linux any
> virtualization environment that attempts to expose arch perf support
> would need to expose at least 2 PMCs.
> 
> My suggestion is to leave the cnt > 1 check as it is in v2.
> 
> I can send a v4 with that check fixed if there's nothing else in v3
> that needs fixing.
> 
> IMO doing the adjustment to PERF_GLOBAL_CTRL without setting
> ARCH_PERFMON would be contradictory.  Either we set ARCH_PERFMON
> support and consequently adjust PERF_GLOBAL_CTRL, or we don't.

Probably fair enough.

Jan



[linux-linus test] 184324: regressions - FAIL

2024-01-12 Thread osstest service owner
flight 184324 linux-linus real [real]
http://logs.test-lab.xenproject.org/osstest/logs/184324/

Regressions :-(

Tests which did not succeed and are blocking,
including tests which could not be run:
 test-amd64-amd64-xl-pvhv2-intel 20 guest-localmigrate/x10 fail REGR. vs. 184270
 test-arm64-arm64-xl-credit1  12 debian-install   fail REGR. vs. 184270
 test-arm64-arm64-xl  12 debian-install   fail REGR. vs. 184270
 test-arm64-arm64-xl-thunderx 12 debian-install   fail REGR. vs. 184270
 test-arm64-arm64-xl-credit2  12 debian-install   fail REGR. vs. 184270
 test-arm64-arm64-xl-xsm  12 debian-install   fail REGR. vs. 184270
 test-arm64-arm64-libvirt-xsm 12 debian-install   fail REGR. vs. 184270
 test-armhf-armhf-xl-arndale   8 xen-boot fail REGR. vs. 184270
 test-amd64-amd64-xl-qemut-debianhvm-i386-xsm 12 debian-hvm-install fail REGR. 
vs. 184270

Tests which did not succeed, but are not blocking:
 test-armhf-armhf-libvirt 16 saverestore-support-checkfail  like 184270
 test-amd64-amd64-xl-qemut-win7-amd64 19 guest-stopfail like 184270
 test-amd64-amd64-xl-qemuu-win7-amd64 19 guest-stopfail like 184270
 test-amd64-amd64-xl-qemuu-ws16-amd64 19 guest-stopfail like 184270
 test-armhf-armhf-libvirt-raw 15 saverestore-support-checkfail  like 184270
 test-amd64-amd64-xl-qemut-ws16-amd64 19 guest-stopfail like 184270
 test-armhf-armhf-libvirt-qcow2 15 saverestore-support-check   fail like 184270
 test-amd64-amd64-qemuu-nested-amd 20 debian-hvm-install/l1/l2 fail like 184270
 test-amd64-amd64-libvirt 15 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt-xsm 15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-multivcpu 15 migrate-support-checkfail  never pass
 test-armhf-armhf-xl-multivcpu 16 saverestore-support-checkfail  never pass
 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check 
fail never pass
 test-armhf-armhf-libvirt 15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-rtds 15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-rtds 16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-credit1  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit1  16 saverestore-support-checkfail   never pass
 test-amd64-amd64-libvirt-qcow2 14 migrate-support-checkfail never pass
 test-armhf-armhf-xl-credit2  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit2  16 saverestore-support-checkfail   never pass
 test-amd64-amd64-libvirt-raw 14 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-raw 14 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-raw 15 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-vhd  14 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-vhd  15 saverestore-support-checkfail   never pass
 test-armhf-armhf-libvirt-raw 14 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-vhd  14 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-vhd  15 saverestore-support-checkfail   never pass
 test-armhf-armhf-libvirt-qcow2 14 migrate-support-checkfail never pass

version targeted for testing:
 linux3e7aeb78ab01c2c2f0e1f784e5ddec88fcd3d106
baseline version:
 linux0dd3ee31125508cd67f7e7172247f05b7fd1753a

Last test of basis   184270  2024-01-07 20:42:19 Z4 days
Failing since184283  2024-01-08 20:10:43 Z3 days6 attempts
Testing same since   184324  2024-01-11 20:11:36 Z0 days1 attempts


903 people touched revisions under test,
not listing them all

jobs:
 build-amd64-xsm  pass
 build-arm64-xsm  pass
 build-i386-xsm   pass
 build-amd64  pass
 build-arm64  pass
 build-armhf  pass
 build-i386   pass
 build-amd64-libvirt  pass
 build-arm64-libvirt  pass
 build-armhf-libvirt  pass
 build-i386-libvirt   pass
 build-amd64-pvopspass
 build-arm64-pvopspass
 

Re: [PATCH v2] x86/intel: ensure Global Performance Counter Control is setup correctly

2024-01-12 Thread Roger Pau Monné
On Fri, Jan 12, 2024 at 08:42:27AM +0100, Jan Beulich wrote:
> On 11.01.2024 17:53, Roger Pau Monné wrote:
> > On Thu, Jan 11, 2024 at 04:52:04PM +0100, Jan Beulich wrote:
> >> On 11.01.2024 15:15, Roger Pau Monné wrote:
> >>> On Thu, Jan 11, 2024 at 03:01:01PM +0100, Jan Beulich wrote:
>  On 11.01.2024 13:22, Roger Pau Monné wrote:
> > Oh, indeed, can adjust on this same patch if that's OK (seeing as the
> > issue was already there previous to my change).
> 
>  Well, I'm getting the impression that it was deliberate there, i.e. set
>  setting of the feature flag may want to remain thus constrained.
> >>>
> >>> Hm, I find it weird, but the original commit message doesn't help at
> >>> all.  Xen itself only uses PMC0, and I don't find any other
> >>> justification in the current code to require at least 2 counters in
> >>> order to expose arch performance monitoring to be present.
> >>>
> >>> Looking at the SDM vol3, the figures there only contain PMC0 and PMC1,
> >>> so someone only reading that manual might assume there must always be
> >>> 2 global PMCs?
> >>
> >> That may have been the impression at the time. It may have been wrong
> >> already back then, or ...
> >>
> >>> (vol4 clarifies the that the number of global PMCs is variable).
> >>
> >> ... it may have been clarified in the SDM later on. My vague guess is
> >> that the > 1 check was to skip what may have been "obviously buggy"
> >> back at the time.
> > 
> > Let me know if you are OK with the adjustment in v3, or whether you
> > would rather leave the > 1 check as-is (or maybe adjust in a different
> > patch).
> 
> Well, I haven't been able to make up my mind as to whether the original
> check was wrong. Without clear indication, I think we should retain the
> original behavior by having the __set_bit() gated by an additional if().
> Then, since the line needs touching anyway, a further question would be
> whether to properly switch to setup_force_cpu_cap() at the same time.

Having looked at Linux, it has exactly the same check for > 1, which I
guess is to be expected since the code in Xen is quite likely adapted
from the code in Linux.

Overall, it might be best to leave the check as > 1.  It's possible (as
I think you also mention in a previous email) that there's simply no
hardware with 1 counter.  This might no longer be true when
virtualized, but given the current checks in both Xen and Linux any
virtualization environment that attempts to expose arch perf support
would need to expose at least 2 PMCs.

My suggestion is to leave the cnt > 1 check as it is in v2.

I can send a v4 with that check fixed if there's nothing else in v3
that needs fixing.

IMO doing the adjustment to PERF_GLOBAL_CTRL without setting
ARCH_PERFMON would be contradictory.  Either we set ARCH_PERFMON
support and consequently adjust PERF_GLOBAL_CTRL, or we don't.

Thanks, Roger.



Re: [PATCH v5 08/13] xen/page_alloc: introduce preserved page flags macro

2024-01-12 Thread Carlo Nonato
Hi Jan,

On Mon, Jan 8, 2024 at 6:08 PM Jan Beulich  wrote:
>
> On 02.01.2024 10:51, Carlo Nonato wrote:
> > PGC_static and PGC_extra are flags that needs to be preserved when assigning
> > a page. Define a new macro that groups those flags and use it instead of
> > or'ing every time.
> >
> > The new macro is used also in free_heap_pages() allowing future commits to
> > extended it with other flags that must stop merging, as it now works for
> > PGC_static. PGC_extra is no harm here since it's only ever being set on
> > allocated pages.
>
> Is it? I can't see where free_domheap_pages() would clear it before calling
> free_heap_pages(). Or wait, that may happen in mark_page_free(), but then
> PGC_static would be cleared there, too. I must be missing something.
>
> > --- a/xen/common/page_alloc.c
> > +++ b/xen/common/page_alloc.c
> > @@ -158,6 +158,8 @@
> >  #define PGC_static 0
> >  #endif
> >
> > +#define preserved_flags (PGC_extra | PGC_static)
>
> I think this wants to (a) have a PGC_ prefix and (b) as a #define be all
> capitals.
>
> > @@ -1504,7 +1506,7 @@ static void free_heap_pages(
> >  /* Merge with predecessor block? */
> >  if ( !mfn_valid(page_to_mfn(predecessor)) ||
> >   !page_state_is(predecessor, free) ||
> > - (predecessor->count_info & PGC_static) ||
> > + (predecessor->count_info & preserved_flags) ||
> >   (PFN_ORDER(predecessor) != order) ||
> >   (page_to_nid(predecessor) != node) )
> >  break;
> > @@ -1528,7 +1530,7 @@ static void free_heap_pages(
> >  /* Merge with successor block? */
> >  if ( !mfn_valid(page_to_mfn(successor)) ||
> >   !page_state_is(successor, free) ||
> > - (successor->count_info & PGC_static) ||
> > + (successor->count_info & preserved_flags) ||
> >   (PFN_ORDER(successor) != order) ||
> >   (page_to_nid(successor) != node) )
> >  break;
>
> Irrespective of the comment at the top, this looks like an abuse of the
> new constant: There's nothing inherently making preserved flags also
> suppress merging (assuming it was properly checked that both sided have
> the same flags set/clear).

Sorry, I may have misinterpreted your comments on the previous version of the
series (I know it was a really long time ago)

https://patchew.org/Xen/20230123154735.74832-1-carlo.non...@minervasys.tech/20230123154735.74832-8-carlo.non...@minervasys.tech/#c843b031-52f7-056d-e8c0-75fe9c426...@suse.com

Anyway, would the solution here be to have two distinct #define? One for
suppress merging and the other for preserved flags. This would probably also
remove any confusion with the usage of PGC_extra.

Thanks.

> Jan



Re: [PATCH v5 07/13] xen/page_alloc: introduce init_free_page_fields() helper

2024-01-12 Thread Carlo Nonato
Hi Jan,

On Tue, Jan 9, 2024 at 11:36 AM Jan Beulich  wrote:
>
> On 02.01.2024 10:51, Carlo Nonato wrote:
> > Introduce a new helper to initialize fields that have different uses for
> > free pages.
> >
> > Signed-off-by: Carlo Nonato 
> > Signed-off-by: Marco Solieri 
>
> I might in principle ack this change, but what's the deal with this 2nd
> S-o-b? The typical expectation is for yours to be last, and the 1st one
> being the original author's (which generally means you can't be the
> original author in such a case, yet the absence of a From: suggests
> you are).

You're right. I mistakenly copied it from other patches.
I did the same in #10. Will remove Marco Solieri from those.
But there are some patches where we are both authors, so I will leave both
Signed-off-by.

Thanks.

> Jan



Re: [PATCH v4 03/11] xen/arm: add Dom0 cache coloring support

2024-01-12 Thread Michal Orzel
Hi Carlo,

On 23/01/2023 16:47, Carlo Nonato wrote:
> 
> 
> From: Luca Miccio 
> 
> This commit allows the user to set the cache coloring configuration for
> Dom0 via a command line parameter.
> Since cache coloring and static memory are incompatible, direct mapping
> Dom0 isn't possible when coloring is enabled.
> 
> Here is also introduced a common configuration syntax for cache colors.
> 
> Signed-off-by: Luca Miccio 
> Signed-off-by: Marco Solieri 
> Signed-off-by: Carlo Nonato 
> ---
> v4:
> - dom0 colors are dynamically allocated as for any other domain
>   (colors are duplicated in dom0_colors and in the new array, but logic
>   is simpler)
> ---
>  docs/misc/arm/cache-coloring.rst| 32 ++---
>  xen/arch/arm/domain_build.c | 17 +++--
>  xen/arch/arm/include/asm/llc_coloring.h |  4 
>  xen/arch/arm/llc_coloring.c | 14 +++
>  4 files changed, 62 insertions(+), 5 deletions(-)
> 
> diff --git a/docs/misc/arm/cache-coloring.rst 
> b/docs/misc/arm/cache-coloring.rst
> index 0244d2f606..c2e0e87426 100644
> --- a/docs/misc/arm/cache-coloring.rst
> +++ b/docs/misc/arm/cache-coloring.rst
> @@ -83,12 +83,38 @@ manually set the way size it's left for the user to 
> overcome failing situations
>  or for debugging/testing purposes. See `Coloring parameters and domain
>  configurations`_ section for more information on that.
> 
> +Colors selection format
> +***
> +
> +Regardless of the memory pool that has to be colored (Xen, Dom0/DomUs),
> +the color selection can be expressed using the same syntax. In particular a
> +comma-separated list of colors or ranges of colors is used.
> +Ranges are hyphen-separated intervals (such as `0-4`) and are inclusive on 
> both
> +sides.
> +
> +Note that:
> + - no spaces are allowed between values.
> + - no overlapping ranges or duplicated colors are allowed.
> + - values must be written in ascending order.
> +
> +Examples:
> +
> ++-+---+
> +|**Configuration**|**Actual selection**   |
> ++-+---+
> +|  1-2,5-8| [1, 2, 5, 6, 7, 8]|
> ++-+---+
> +|  4-8,10,11,12   | [4, 5, 6, 7, 8, 10, 11, 12]   |
> ++-+---+
> +|  0  | [0]   |
> ++-+---+
> +
>  Coloring parameters and domain configurations
>  *
> 
> -LLC way size (as previously discussed) can be set using the appropriate 
> command
> -line parameter. See the relevant documentation in
> -"docs/misc/xen-command-line.pandoc".
> +LLC way size (as previously discussed) and Dom0 colors can be set using the
> +appropriate command line parameters. See the relevant documentation
> +in "docs/misc/xen-command-line.pandoc".
> 
>  **Note:** If no color configuration is provided for a domain, the default 
> one,
>  which corresponds to all available colors, is used instead.
> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
> index f35f4d2456..093d4ad6f6 100644
> --- a/xen/arch/arm/domain_build.c
> +++ b/xen/arch/arm/domain_build.c
> @@ -2,6 +2,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -4014,7 +4015,10 @@ static int __init construct_dom0(struct domain *d)
>  /* type must be set before allocate_memory */
>  d->arch.type = kinfo.type;
>  #endif
> -allocate_memory_11(d, &kinfo);
> +if ( is_domain_llc_colored(d) )
> +allocate_memory(d, &kinfo);
While doing some checks, I realized that the issue from previous series is 
still present.
Given that dom0 is hwdom, how are you going to prevent conflicts between 
allocated RAM and HW resources
that are to be mapped to dom0?

~Michal




Re: [PATCH v2] xen/arm: bootfdt: Harden handling of malformed mem reserve map

2024-01-12 Thread Michal Orzel



On 12/01/2024 00:24, Shawn Anastasio wrote:
> 
> 
> The early_print_info routine in bootfdt.c incorrectly stores the result
> of a call to fdt_num_mem_rsv() in an unsigned int, which results in the
> negative error code being interpreted incorrectly in a subsequent loop
> in the case where the device tree is malformed. Fix this by properly
> checking the return code for an error and calling panic().
> 
> Signed-off-by: Shawn Anastasio 
Reviewed-by: Michal Orzel 

~Michal




Re: [PATCH v2 2/2] xen/arm32: head: Improve logging in head.S

2024-01-12 Thread Michal Orzel
Hi Julien,

On 11/01/2024 19:34, Julien Grall wrote:
> 
> 
> From: Julien Grall 
> 
> The sequence to enable the MMU on arm32 is quite complex as we may need
> to jump to a temporary mapping to map Xen.
> 
> Recently, we had one bug in the logic (see f5a49eb7f8b3 ("xen/arm32:
> head: Add mising isb in switch_to_runtime_mapping()") and it was
> a pain to debug because there are no logging.
> 
> In order to improve the logging in the MMU switch we need to add
> support for early printk while running on the identity mapping
> and also on the temporary mapping.
> 
> For the identity mapping, we have only the first page of Xen mapped.
> So all the strings should reside in the first page. For that purpose
> a new macro PRINT_ID is introduced.
> 
> For the temporary mapping, the fixmap is already linked in the temporary
> area (and so does the UART). So we just need to update the register
> storing the UART address (i.e. r11) to point to the UART temporary
> mapping.
> 
> Take the opportunity to introduce mov_w_on_cond in order to
> conditionally execute mov_w and avoid branches.
> 
> Signed-off-by: Julien Grall 
Reviewed-by: Michal Orzel 

with some questions below:

> 
> 
> Changelog since v1:
> - Rebase
> - Move one hunk to the first patch to unbreak compilation
> - Add more logging
> - Remove duplicated entry
> ---
>  xen/arch/arm/arm32/head.S   |  9 --
>  xen/arch/arm/arm32/mmu/head.S   | 39 +
>  xen/arch/arm/include/asm/arm32/macros.h | 33 +++--
>  xen/arch/arm/include/asm/asm_defns.h|  6 ++--
>  xen/arch/arm/include/asm/early_printk.h |  3 ++
>  xen/arch/arm/include/asm/mmu/layout.h   |  4 +++
>  xen/arch/arm/mmu/setup.c|  3 ++
>  xen/arch/arm/xen.lds.S  |  1 +
>  8 files changed, 78 insertions(+), 20 deletions(-)
> 
> diff --git a/xen/arch/arm/arm32/head.S b/xen/arch/arm/arm32/head.S
> index 34ab14a9e228..99d7d4aa63d1 100644
> --- a/xen/arch/arm/arm32/head.S
> +++ b/xen/arch/arm/arm32/head.S
> @@ -98,10 +98,6 @@ past_zImage:
>  b enable_boot_cpu_mm
> 
>  primary_switched:
> -#ifdef CONFIG_EARLY_PRINTK
> -/* Use a virtual address to access the UART. */
> -mov_w r11, EARLY_UART_VIRTUAL_ADDRESS
> -#endif
>  blzero_bss
>  PRINT("- Ready -\r\n")
>  /* Setup the arguments for start_xen and jump to C world */
> @@ -142,12 +138,7 @@ GLOBAL(init_secondary)
> 
>  mov_w lr, secondary_switched
>  b enable_secondary_cpu_mm
> -
>  secondary_switched:
> -#ifdef CONFIG_EARLY_PRINTK
> -/* Use a virtual address to access the UART. */
> -mov_w r11, EARLY_UART_VIRTUAL_ADDRESS
> -#endif
>  PRINT("- Ready -\r\n")
>  /* Jump to C world */
>  mov_w r2, start_secondary
> diff --git a/xen/arch/arm/arm32/mmu/head.S b/xen/arch/arm/arm32/mmu/head.S
> index a90799ad5451..f4abd690b612 100644
> --- a/xen/arch/arm/arm32/mmu/head.S
> +++ b/xen/arch/arm/arm32/mmu/head.S
> @@ -298,6 +298,21 @@ enable_mmu:
>  mcr   CP32(r0, HSCTLR)   /* now paging is enabled */
>  isb  /* Now, flush the icache */
> 
> +/*
> + * At this stage, the UART address will depend on whether the
> + * temporary mapping was created or not.
> + *
> + * If it was, then the UART will be mapped in the temporary
> + * area. Otherwise, it will be mapped at runtime virtual
> + * mapping.
> + */
> +#ifdef CONFIG_EARLY_PRINTK
> +teq   r12, #1   /* Was the temporary mapping created? */
> +mov_w_on_cond eq, r11, TEMPORARY_EARLY_UART_VIRTUAL_ADDRESS
> +mov_w_on_cond ne, r11, EARLY_UART_VIRTUAL_ADDRESS
> +#endif
> +PRINT_ID("- Paging turned on -\r\n")
> +
>  /*
>   * The MMU is turned on and we are in the 1:1 mapping. Switch
>   * to the runtime mapping.
> @@ -307,6 +322,17 @@ enable_mmu:
>  b switch_to_runtime_mapping
>  1:
>  mov   lr, r5/* Restore LR */
> +
> +/*
> + * Now we are running at the runtime address. The UART can
> + * be accessed using its runtime virtual address.
> + */
> +#ifdef CONFIG_EARLY_PRINTK
> +mov_w r11, EARLY_UART_VIRTUAL_ADDRESS
> +#endif
> +
> +PRINT("- Switched to the runtime mapping -\r\n")
> +
>  /*
>   * At this point, either the 1:1 map or the temporary mapping
>   * will be present. The former may clash with other parts of the
> @@ -348,12 +374,14 @@ switch_to_runtime_mapping:
>  teq   r12, #0
>  beq   ready_to_switch
> 
> +PRINT_ID("- Switching to the temporary mapping -\r\n")
>  /* We are still in the 1:1 mapping. Jump to the temporary Virtual 
> address. */
>  mov_w r0, 1f
>  add   r0, r0, #XEN_TEMPORARY_OFFSET /* r0 := address in temporary 
> mapping */
>  mov