commit xen for openSUSE:Factory

Source-Sync Mon, 03 Feb 2025 12:50:56 -0800

Script 'mail_helper' called by obssrc
Hello community,

here is the log from the commit of package xen for openSUSE:Factory checked in 
at 2025-02-03 21:41:44
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/xen (Old)
 and      /work/SRC/openSUSE:Factory/.xen.new.2316 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


Package is "xen"

Mon Feb  3 21:41:44 2025 rev:357 rq:1241755 version:4.20.0_06

Changes:
--------
--- /work/SRC/openSUSE:Factory/xen/xen.changes  2025-01-22 16:31:32.746012058 
+0100
+++ /work/SRC/openSUSE:Factory/.xen.new.2316/xen.changes        2025-02-03 
21:42:14.471324932 +0100
@@ -1,0 +2,22 @@
+Fri Jan 31 09:59:45 MST 2025 - carn...@suse.com
+
+- Update to Xen 4.20.0 RC3 release
+  * x86/HVM: correct MMIO emulation cache bounds check
+  * x86/HVM: allocate emulation cache entries dynamically
+  * x86/HVM: correct read/write split at page boundaries
+  * x86/iommu: check for CMPXCHG16B when enabling IOMMU
+  * iommu/vtd: remove non-CX16 logic from interrupt remapping
+  * x86/iommu: remove non-CX16 logic from DMA remapping
+  * iommu/amd: atomically update IRTE
+  * x86emul: further correct 64-bit mode zero count repeated string
+    insn handling
+  * x86/PV: further harden guest memory accesses against speculative
+    abuse
+  * x86/intel: Fix PERF_GLOBAL fixup when virtualised
+
+-------------------------------------------------------------------
+Fri Jan 31 08:49:14 UTC 2025 - MarkÃ©ta MachovÃ¡ <mmach...@suse.com>
+
+- Add explicit build dependency on python3-setuptools, needed by python313
+
+-------------------------------------------------------------------

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ xen.spec ++++++
--- /var/tmp/diff_new_pack.p9nuox/_old  2025-02-03 21:42:15.923385116 +0100
+++ /var/tmp/diff_new_pack.p9nuox/_new  2025-02-03 21:42:15.927385282 +0100
@@ -103,6 +103,7 @@
 BuildRequires:  ncurses-devel
 BuildRequires:  openssl-devel
 BuildRequires:  python3-devel
+BuildRequires:  python3-setuptools
 BuildRequires:  xz-devel
 BuildRequires:  pkgconfig(systemd)
 %ifarch x86_64
@@ -911,6 +912,10 @@
        -name "s390*" -o \
        -name "slof*" -o \
        -name "spapr*" -o \
+        -name "PKG-INFO" -o \
+        -name "SOURCES.txt" -o \
+        -name "dependency_links.txt" -o \
+        -name "top_level.txt" -o \
        -name "*.egg-info" \) \
        -print -delete
 # Wipe empty directories

++++++ xen-4.20.0-testing-src.tar.bz2 ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/xen-4.20.0-testing/ChangeLog 
new/xen-4.20.0-testing/ChangeLog
--- old/xen-4.20.0-testing/ChangeLog    2025-01-20 13:45:27.000000000 +0100
+++ new/xen-4.20.0-testing/ChangeLog    2025-01-31 17:59:06.000000000 +0100
@@ -1,18 +1,22 @@
-commit c3f5d1bb40b57d467cb4051eafa86f5933ec9003
-Author: Roger Pau Monne <roger....@citrix.com>
-Date:   Thu Jan 16 09:06:26 2025 +0100
+commit 45c65669bf34bfad9ff6de0dabae2cb201239e34
+Author: Michal Orzel <michal.or...@amd.com>
+Date:   Tue Jan 28 10:40:02 2025 +0100
 
-    automation/cirrus-ci: introduce FreeBSD randconfig builds
+    xen/arm: Fix build issue when CONFIG_PHYS_ADDR_T_32=y
     
-    Add a new randconfig job for each FreeBSD version.  This requires some
-    rework of the template so common parts can be shared between the full and
-    the randconfig builds.  Such randconfig builds are relevant because FreeBSD
-    is the only tested system that has a full non-GNU toolchain.
+    On Arm32, when CONFIG_PHYS_ADDR_T_32 is set, a build failure is observed:
+    arch/arm/platforms/vexpress.c: In function 'vexpress_smp_init':
+    arch/arm/platforms/vexpress.c:102:12: error: format '%lx' expects argument 
of type 'long unsigned int', but argument 2 has type 'long long unsigned int' 
[-Werror=format=]
+      102 |     printk("Set SYS_FLAGS to %"PRIpaddr" (%p)\n",
     
-    While there replace the usage of the python311 package with python3, which 
is
-    already using 3.11, and remove the install of the plain python package for 
full
-    builds.
+    When CONFIG_PHYS_ADDR_T_32 is set, paddr_t is defined as unsigned long.
+    Commit 96f35de69e59 dropped __virt_to_maddr() which used paddr_t as a
+    return type. Without a cast, the expression type is unsigned long long
+    which causes the issue. Fix it.
     
-    Signed-off-by: Roger Pau MonnÃ© <roger....@citrix.com>
-    Reviewed-by: Andrew Cooper <andrew.coop...@citrix.com>
-    Release-Acked-by: Oleksii Kurochko<oleksii.kuroc...@gmail.com>
+    Fixes: 96f35de69e59 ("x86+Arm: drop (rename) __virt_to_maddr() / 
__maddr_to_virt()")
+    Signed-off-by: Michal Orzel <michal.or...@amd.com>
+    Release-Acked-by: Oleksii Kurochko <oleksii.kuroc...@gmail.com>
+    Reviewed-by: Luca Fancellu <luca.fance...@arm.com>
+    Tested-by: Luca Fancellu <luca.fance...@arm.com>
+    Reviewed-by: Stefano Stabellini <sstabell...@kernel.org>
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' 
old/xen-4.20.0-testing/docs/fusa/reqs/design-reqs/arm64/generic-timer.rst 
new/xen-4.20.0-testing/docs/fusa/reqs/design-reqs/arm64/generic-timer.rst
--- old/xen-4.20.0-testing/docs/fusa/reqs/design-reqs/arm64/generic-timer.rst   
2025-01-20 13:45:27.000000000 +0100
+++ new/xen-4.20.0-testing/docs/fusa/reqs/design-reqs/arm64/generic-timer.rst   
2025-01-31 17:59:06.000000000 +0100
@@ -21,7 +21,7 @@
 Domains can detect the presence of the Generic Timer device tree node.
 
 Covers:
- - `XenProd~emulated_timer~1`
+ - `XenProd~arm64_emulated_timer~1`
 
 Read system counter frequency
 -----------------------------
@@ -37,7 +37,7 @@
 Comments:
 
 Covers:
- - `XenProd~emulated_timer~1`
+ - `XenProd~arm64_emulated_timer~1`
 
 Access CNTKCTL_EL1 system register from a domain
 ------------------------------------------------
@@ -53,7 +53,7 @@
 Comments:
 
 Covers:
- - `XenProd~emulated_timer~1`
+ - `XenProd~arm64_emulated_timer~1`
 
 Access virtual timer from a domain
 ----------------------------------
@@ -69,7 +69,7 @@
 Comments:
 
 Covers:
- - `XenProd~emulated_timer~1`
+ - `XenProd~arm64_emulated_timer~1`
 
 Access physical timer from a domain
 -----------------------------------
@@ -85,7 +85,7 @@
 Comments:
 
 Covers:
- - `XenProd~emulated_timer~1`
+ - `XenProd~arm64_emulated_timer~1`
 
 Trigger the virtual timer interrupt from a domain
 -------------------------------------------------
@@ -101,7 +101,7 @@
 Comments:
 
 Covers:
- - `XenProd~emulated_timer~1`
+ - `XenProd~arm64_emulated_timer~1`
 
 Trigger the physical timer interrupt from a domain
 --------------------------------------------------
@@ -117,7 +117,7 @@
 Comments:
 
 Covers:
- - `XenProd~emulated_timer~1`
+ - `XenProd~arm64_emulated_timer~1`
 
 Assumption of Use on the Platform
 =================================
@@ -139,7 +139,7 @@
 dt property [2], the use of this property is strongly discouraged.
 
 Covers:
- - `XenProd~emulated_timer~1`
+ - `XenProd~arm64_emulated_timer~1`
 
 [1] Arm Architecture Reference Manual for A-profile architecture, Chapter 11
 [2] 
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/devicetree/bindings/timer/arm,arch_timer.yaml
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' 
old/xen-4.20.0-testing/docs/fusa/reqs/design-reqs/arm64/sbsa-uart.rst 
new/xen-4.20.0-testing/docs/fusa/reqs/design-reqs/arm64/sbsa-uart.rst
--- old/xen-4.20.0-testing/docs/fusa/reqs/design-reqs/arm64/sbsa-uart.rst       
2025-01-20 13:45:27.000000000 +0100
+++ new/xen-4.20.0-testing/docs/fusa/reqs/design-reqs/arm64/sbsa-uart.rst       
2025-01-31 17:59:06.000000000 +0100
@@ -21,7 +21,7 @@
 Domains can detect the presence of the SBSA UART device tree node.
 
 Covers:
- - `XenProd~emulated_uart~1`
+ - `XenProd~arm64_emulated_uart~1`
 
 Transmit data in software polling mode
 --------------------------------------
@@ -36,7 +36,7 @@
 Comments:
 
 Covers:
- - `XenProd~emulated_uart~1`
+ - `XenProd~arm64_emulated_uart~1`
 
 Transmit data in interrupt driven mode
 --------------------------------------
@@ -51,7 +51,7 @@
 Comments:
 
 Covers:
- - `XenProd~emulated_uart~1`
+ - `XenProd~arm64_emulated_uart~1`
 
 Receive data in software polling mode
 -------------------------------------
@@ -66,7 +66,7 @@
 Comments:
 
 Covers:
- - `XenProd~emulated_uart~1`
+ - `XenProd~arm64_emulated_uart~1`
 
 Receive data in interrupt driven mode
 -------------------------------------
@@ -81,7 +81,7 @@
 Comments:
 
 Covers:
- - `XenProd~emulated_uart~1`
+ - `XenProd~arm64_emulated_uart~1`
 
 Access UART data register
 -------------------------
@@ -96,7 +96,7 @@
 Comments:
 
 Covers:
- - `XenProd~emulated_uart~1`
+ - `XenProd~arm64_emulated_uart~1`
 
 Access UART receive status register
 -----------------------------------
@@ -111,7 +111,7 @@
 Comments:
 
 Covers:
- - `XenProd~emulated_uart~1`
+ - `XenProd~arm64_emulated_uart~1`
 
 Access UART flag register
 -------------------------
@@ -126,7 +126,7 @@
 Comments:
 
 Covers:
- - `XenProd~emulated_uart~1`
+ - `XenProd~arm64_emulated_uart~1`
 
 Access UART mask set/clear register
 -----------------------------------
@@ -141,7 +141,7 @@
 Comments:
 
 Covers:
- - `XenProd~emulated_uart~1`
+ - `XenProd~arm64_emulated_uart~1`
 
 Access UART raw interrupt status register
 -----------------------------------------
@@ -156,7 +156,7 @@
 Comments:
 
 Covers:
- - `XenProd~emulated_uart~1`
+ - `XenProd~arm64_emulated_uart~1`
 
 Access UART masked interrupt status register
 --------------------------------------------
@@ -171,7 +171,7 @@
 Comments:
 
 Covers:
- - `XenProd~emulated_uart~1`
+ - `XenProd~arm64_emulated_uart~1`
 
 Access UART interrupt clear register
 ------------------------------------
@@ -186,7 +186,7 @@
 Comments:
 
 Covers:
- - `XenProd~emulated_uart~1`
+ - `XenProd~arm64_emulated_uart~1`
 
 Receive UART TX interrupt
 -------------------------
@@ -202,7 +202,7 @@
 Comments:
 
 Covers:
- - `XenProd~emulated_uart~1`
+ - `XenProd~arm64_emulated_uart~1`
 
 Receive UART RX interrupt reception
 -----------------------------------
@@ -218,7 +218,7 @@
 Comments:
 
 Covers:
- - `XenProd~emulated_uart~1`
+ - `XenProd~arm64_emulated_uart~1`
 
 [1] Arm Base System Architecture, chapter B
-[2] 
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/devicetree/bindings/serial/arm_sbsa_uart.txt
\ No newline at end of file
+[2] 
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/devicetree/bindings/serial/arm_sbsa_uart.txt
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/xen-4.20.0-testing/xen/arch/arm/include/asm/mm.h 
new/xen-4.20.0-testing/xen/arch/arm/include/asm/mm.h
--- old/xen-4.20.0-testing/xen/arch/arm/include/asm/mm.h        2025-01-20 
13:45:27.000000000 +0100
+++ new/xen-4.20.0-testing/xen/arch/arm/include/asm/mm.h        2025-01-31 
17:59:06.000000000 +0100
@@ -263,7 +263,7 @@
 
 #define virt_to_maddr(va) ({                                        \
     vaddr_t va_ = (vaddr_t)(va);                                    \
-    (va_to_par(va_) & PADDR_MASK & PAGE_MASK) | (va_ & ~PAGE_MASK); \
+    (paddr_t)((va_to_par(va_) & PADDR_MASK & PAGE_MASK) | (va_ & ~PAGE_MASK)); 
\
 })
 
 #ifdef CONFIG_ARM_32
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/xen-4.20.0-testing/xen/arch/x86/cpu/intel.c 
new/xen-4.20.0-testing/xen/arch/x86/cpu/intel.c
--- old/xen-4.20.0-testing/xen/arch/x86/cpu/intel.c     2025-01-20 
13:45:27.000000000 +0100
+++ new/xen-4.20.0-testing/xen/arch/x86/cpu/intel.c     2025-01-31 
17:59:06.000000000 +0100
@@ -535,39 +535,49 @@
     printk("%u MHz\n", (factor * max_ratio + 50) / 100);
 }
 
+static void init_intel_perf(struct cpuinfo_x86 *c)
+{
+    uint64_t val;
+    unsigned int eax, ver, nr_cnt;
+
+    if ( c->cpuid_level <= 9 ||
+         ({  rdmsrl(MSR_IA32_MISC_ENABLE, val);
+             !(val & MSR_IA32_MISC_ENABLE_PERF_AVAIL); }) )
+        return;
+
+    eax = cpuid_eax(10);
+    ver = eax & 0xff;
+    nr_cnt = (eax >> 8) & 0xff;
+
+    if ( ver && nr_cnt > 1 && nr_cnt <= 32 )
+    {
+        unsigned int cnt_mask = (1UL << nr_cnt) - 1;
+
+        /*
+         * On (some?) Sapphire/Emerald Rapids platforms each package-BSP
+         * starts with all the enable bits for the general-purpose PMCs
+         * cleared.  Adjust so counters can be enabled from EVNTSEL.
+         */
+        rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, val);
+
+        if ( (val & cnt_mask) != cnt_mask )
+        {
+            printk("FIRMWARE BUG: CPU%u invalid PERF_GLOBAL_CTRL: %#"PRIx64" 
adjusting to %#"PRIx64"\n",
+                   smp_processor_id(), val, val | cnt_mask);
+            wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, val | cnt_mask);
+        }
+
+        __set_bit(X86_FEATURE_ARCH_PERFMON, c->x86_capability);
+    }
+}
+
 static void cf_check init_intel(struct cpuinfo_x86 *c)
 {
        /* Detect the extended topology information if available */
        detect_extended_topology(c);
 
        init_intel_cacheinfo(c);
-       if (c->cpuid_level > 9) {
-               unsigned eax = cpuid_eax(10);
-               unsigned int cnt = (eax >> 8) & 0xff;
-
-               /* Check for version and the number of counters */
-               if ((eax & 0xff) && (cnt > 1) && (cnt <= 32)) {
-                       uint64_t global_ctrl;
-                       unsigned int cnt_mask = (1UL << cnt) - 1;
-
-                       /*
-                        * On (some?) Sapphire/Emerald Rapids platforms each
-                        * package-BSP starts with all the enable bits for the
-                        * general-purpose PMCs cleared.  Adjust so counters
-                        * can be enabled from EVNTSEL.
-                        */
-                       rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, global_ctrl);
-                       if ((global_ctrl & cnt_mask) != cnt_mask) {
-                               printk("CPU%u: invalid PERF_GLOBAL_CTRL: %#"
-                                      PRIx64 " adjusting to %#" PRIx64 "\n",
-                                      smp_processor_id(), global_ctrl,
-                                      global_ctrl | cnt_mask);
-                               wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL,
-                                      global_ctrl | cnt_mask);
-                       }
-                       __set_bit(X86_FEATURE_ARCH_PERFMON, c->x86_capability);
-               }
-       }
+       init_intel_perf(c);
 
        if ( !cpu_has(c, X86_FEATURE_XTOPOLOGY) )
        {
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/xen-4.20.0-testing/xen/arch/x86/hvm/emulate.c 
new/xen-4.20.0-testing/xen/arch/x86/hvm/emulate.c
--- old/xen-4.20.0-testing/xen/arch/x86/hvm/emulate.c   2025-01-20 
13:45:27.000000000 +0100
+++ new/xen-4.20.0-testing/xen/arch/x86/hvm/emulate.c   2025-01-31 
17:59:06.000000000 +0100
@@ -26,6 +26,19 @@
 #include <asm/iocap.h>
 #include <asm/vm_event.h>
 
+/*
+ * We may read or write up to m512 or up to a tile row as a number of
+ * device-model transactions.
+ */
+struct hvm_mmio_cache {
+    unsigned long gla;     /* Start of original access (e.g. insn operand). */
+    unsigned int skip;     /* Offset to start of MMIO */
+    unsigned int size;     /* Amount of buffer[] actually used, incl @skip. */
+    unsigned int space:31; /* Allocated size of buffer[]. */
+    unsigned int dir:1;
+    uint8_t buffer[] __aligned(sizeof(long));
+};
+
 struct hvmemul_cache
 {
     /* The cache is disabled as long as num_ents > max_ents. */
@@ -935,7 +948,14 @@
     }
 
     /* Accesses must not overflow the cache's buffer. */
-    if ( size > sizeof(cache->buffer) )
+    if ( offset + size > cache->space )
+    {
+        ASSERT_UNREACHABLE();
+        return X86EMUL_UNHANDLEABLE;
+    }
+
+    /* Accesses must not be to the unused leading space. */
+    if ( offset < cache->skip )
     {
         ASSERT_UNREACHABLE();
         return X86EMUL_UNHANDLEABLE;
@@ -998,27 +1018,33 @@
 
 /*
  * Multi-cycle MMIO handling is based upon the assumption that emulation
- * of the same instruction will not access the same MMIO region more
- * than once. Hence we can deal with re-emulation (for secondary or
- * subsequent cycles) by looking up the result or previous I/O in a
- * cache indexed by linear MMIO address.
+ * of the same instruction will not access the exact same MMIO region
+ * more than once in exactly the same way (if it does, the accesses will
+ * be "folded"). Hence we can deal with re-emulation (for secondary or
+ * subsequent cycles) by looking up the result of previous I/O in a cache
+ * indexed by linear address and access type.
  */
 static struct hvm_mmio_cache *hvmemul_find_mmio_cache(
-    struct hvm_vcpu_io *hvio, unsigned long gla, uint8_t dir, bool create)
+    struct hvm_vcpu_io *hvio, unsigned long gla, uint8_t dir,
+    unsigned int skip)
 {
     unsigned int i;
     struct hvm_mmio_cache *cache;
 
     for ( i = 0; i < hvio->mmio_cache_count; i ++ )
     {
-        cache = &hvio->mmio_cache[i];
+        cache = hvio->mmio_cache[i];
 
         if ( gla == cache->gla &&
              dir == cache->dir )
             return cache;
     }
 
-    if ( !create )
+    /*
+     * Bail if a new entry shouldn't be allocated, relying on ->space having
+     * the same value for all entries.
+     */
+    if ( skip >= hvio->mmio_cache[0]->space )
         return NULL;
 
     i = hvio->mmio_cache_count;
@@ -1027,10 +1053,12 @@
 
     ++hvio->mmio_cache_count;
 
-    cache = &hvio->mmio_cache[i];
-    memset(cache, 0, sizeof (*cache));
+    cache = hvio->mmio_cache[i];
+    memset(cache->buffer, 0, cache->space);
 
     cache->gla = gla;
+    cache->skip = skip;
+    cache->size = skip;
     cache->dir = dir;
 
     return cache;
@@ -1051,12 +1079,14 @@
 
 static int hvmemul_linear_mmio_access(
     unsigned long gla, unsigned int size, uint8_t dir, void *buffer,
-    uint32_t pfec, struct hvm_emulate_ctxt *hvmemul_ctxt, bool known_gpfn)
+    uint32_t pfec, struct hvm_emulate_ctxt *hvmemul_ctxt,
+    unsigned long start_gla, bool known_gpfn)
 {
     struct hvm_vcpu_io *hvio = &current->arch.hvm.hvm_io;
     unsigned long offset = gla & ~PAGE_MASK;
-    struct hvm_mmio_cache *cache = hvmemul_find_mmio_cache(hvio, gla, dir, 
true);
-    unsigned int chunk, buffer_offset = 0;
+    unsigned int chunk, buffer_offset = gla - start_gla;
+    struct hvm_mmio_cache *cache = hvmemul_find_mmio_cache(hvio, start_gla,
+                                                           dir, buffer_offset);
     paddr_t gpa;
     unsigned long one_rep = 1;
     int rc;
@@ -1104,19 +1134,19 @@
 static inline int hvmemul_linear_mmio_read(
     unsigned long gla, unsigned int size, void *buffer,
     uint32_t pfec, struct hvm_emulate_ctxt *hvmemul_ctxt,
-    bool translate)
+    unsigned long start_gla, bool translate)
 {
-    return hvmemul_linear_mmio_access(gla, size, IOREQ_READ, buffer,
-                                      pfec, hvmemul_ctxt, translate);
+    return hvmemul_linear_mmio_access(gla, size, IOREQ_READ, buffer, pfec,
+                                      hvmemul_ctxt, start_gla, translate);
 }
 
 static inline int hvmemul_linear_mmio_write(
     unsigned long gla, unsigned int size, void *buffer,
     uint32_t pfec, struct hvm_emulate_ctxt *hvmemul_ctxt,
-    bool translate)
+    unsigned long start_gla, bool translate)
 {
-    return hvmemul_linear_mmio_access(gla, size, IOREQ_WRITE, buffer,
-                                      pfec, hvmemul_ctxt, translate);
+    return hvmemul_linear_mmio_access(gla, size, IOREQ_WRITE, buffer, pfec,
+                                      hvmemul_ctxt, start_gla, translate);
 }
 
 static bool known_gla(unsigned long addr, unsigned int bytes, uint32_t pfec)
@@ -1145,7 +1175,10 @@
 {
     pagefault_info_t pfinfo;
     struct hvm_vcpu_io *hvio = &current->arch.hvm.hvm_io;
+    void *buffer = p_data;
+    unsigned long start = addr;
     unsigned int offset = addr & ~PAGE_MASK;
+    const struct hvm_mmio_cache *cache;
     int rc;
 
     if ( offset + bytes > PAGE_SIZE )
@@ -1169,8 +1202,17 @@
      * an access that was previously handled as MMIO. Thus it is imperative 
that
      * we handle this access in the same way to guarantee completion and hence
      * clean up any interim state.
+     *
+     * Care must be taken, however, to correctly deal with crossing RAM/MMIO or
+     * MMIO/RAM boundaries. While we want to use a single cache entry (tagged
+     * by the starting linear address), we need to continue issuing (i.e. also
+     * upon replay) the RAM access for anything that's ahead of or past MMIO,
+     * i.e. in RAM.
      */
-    if ( !hvmemul_find_mmio_cache(hvio, addr, IOREQ_READ, false) )
+    cache = hvmemul_find_mmio_cache(hvio, start, IOREQ_READ, ~0);
+    if ( !cache ||
+         addr + bytes <= start + cache->skip ||
+         addr >= start + cache->size )
         rc = hvm_copy_from_guest_linear(p_data, addr, bytes, pfec, &pfinfo);
 
     switch ( rc )
@@ -1186,8 +1228,8 @@
         if ( pfec & PFEC_insn_fetch )
             return X86EMUL_UNHANDLEABLE;
 
-        return hvmemul_linear_mmio_read(addr, bytes, p_data, pfec,
-                                        hvmemul_ctxt,
+        return hvmemul_linear_mmio_read(addr, bytes, buffer, pfec,
+                                        hvmemul_ctxt, start,
                                         known_gla(addr, bytes, pfec));
 
     case HVMTRANS_gfn_paged_out:
@@ -1204,7 +1246,10 @@
 {
     pagefault_info_t pfinfo;
     struct hvm_vcpu_io *hvio = &current->arch.hvm.hvm_io;
+    void *buffer = p_data;
+    unsigned long start = addr;
     unsigned int offset = addr & ~PAGE_MASK;
+    const struct hvm_mmio_cache *cache;
     int rc;
 
     if ( offset + bytes > PAGE_SIZE )
@@ -1223,13 +1268,11 @@
 
     rc = HVMTRANS_bad_gfn_to_mfn;
 
-    /*
-     * If there is an MMIO cache entry for the access then we must be 
re-issuing
-     * an access that was previously handled as MMIO. Thus it is imperative 
that
-     * we handle this access in the same way to guarantee completion and hence
-     * clean up any interim state.
-     */
-    if ( !hvmemul_find_mmio_cache(hvio, addr, IOREQ_WRITE, false) )
+    /* See commentary in linear_read(). */
+    cache = hvmemul_find_mmio_cache(hvio, start, IOREQ_WRITE, ~0);
+    if ( !cache ||
+         addr + bytes <= start + cache->skip ||
+         addr >= start + cache->size )
         rc = hvm_copy_to_guest_linear(addr, p_data, bytes, pfec, &pfinfo);
 
     switch ( rc )
@@ -1242,8 +1285,8 @@
         return X86EMUL_EXCEPTION;
 
     case HVMTRANS_bad_gfn_to_mfn:
-        return hvmemul_linear_mmio_write(addr, bytes, p_data, pfec,
-                                         hvmemul_ctxt,
+        return hvmemul_linear_mmio_write(addr, bytes, buffer, pfec,
+                                         hvmemul_ctxt, start,
                                          known_gla(addr, bytes, pfec));
 
     case HVMTRANS_gfn_paged_out:
@@ -1630,7 +1673,7 @@
     {
         /* Fix this in case the guest is really relying on r-m-w atomicity. */
         return hvmemul_linear_mmio_write(addr, bytes, p_new, pfec,
-                                         hvmemul_ctxt,
+                                         hvmemul_ctxt, addr,
                                          hvio->mmio_access.write_access &&
                                          hvio->mmio_gla == (addr & PAGE_MASK));
     }
@@ -2980,16 +3023,21 @@
 int hvmemul_cache_init(struct vcpu *v)
 {
     /*
-     * No insn can access more than 16 independent linear addresses (AVX512F
-     * scatters/gathers being the worst). Each such linear range can span a
-     * page boundary, i.e. may require two page walks. Account for each insn
-     * byte individually, for simplicity.
+     * AVX512F scatter/gather insns can access up to 16 independent linear
+     * addresses, up to 8 bytes size. Each such linear range can span a page
+     * boundary, i.e. may require two page walks.
      */
-    const unsigned int nents = (CONFIG_PAGING_LEVELS + 1) *
-                               (MAX_INST_LEN + 16 * 2);
-    struct hvmemul_cache *cache = xmalloc_flex_struct(struct hvmemul_cache,
-                                                      ents, nents);
+    unsigned int nents = 16 * 2 * (CONFIG_PAGING_LEVELS + 1);
+    unsigned int i, max_bytes = 64;
+    struct hvmemul_cache *cache;
 
+    /*
+     * Account for each insn byte individually, both for simplicity and to
+     * leave some slack space.
+     */
+    nents += MAX_INST_LEN * (CONFIG_PAGING_LEVELS + 1);
+
+    cache = xvmalloc_flex_struct(struct hvmemul_cache, ents, nents);
     if ( !cache )
         return -ENOMEM;
 
@@ -2999,6 +3047,15 @@
 
     v->arch.hvm.hvm_io.cache = cache;
 
+    for ( i = 0; i < ARRAY_SIZE(v->arch.hvm.hvm_io.mmio_cache); ++i )
+    {
+        v->arch.hvm.hvm_io.mmio_cache[i] =
+            xvmalloc_flex_struct(struct hvm_mmio_cache, buffer, max_bytes);
+        if ( !v->arch.hvm.hvm_io.mmio_cache[i] )
+            return -ENOMEM;
+        v->arch.hvm.hvm_io.mmio_cache[i]->space = max_bytes;
+    }
+
     return 0;
 }
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' 
old/xen-4.20.0-testing/xen/arch/x86/include/asm/asm-defns.h 
new/xen-4.20.0-testing/xen/arch/x86/include/asm/asm-defns.h
--- old/xen-4.20.0-testing/xen/arch/x86/include/asm/asm-defns.h 2025-01-20 
13:45:27.000000000 +0100
+++ new/xen-4.20.0-testing/xen/arch/x86/include/asm/asm-defns.h 2025-01-31 
17:59:06.000000000 +0100
@@ -1,3 +1,5 @@
+#include <asm/page-bits.h>
+
 #ifndef HAVE_AS_CLAC_STAC
 .macro clac
     .byte 0x0f, 0x01, 0xca
@@ -65,17 +67,36 @@
 .macro guest_access_mask_ptr ptr:req, scratch1:req, scratch2:req
 #if defined(CONFIG_SPECULATIVE_HARDEN_GUEST_ACCESS)
     /*
-     * Here we want
-     *
-     * ptr &= ~0ull >> (ptr < HYPERVISOR_VIRT_END);
-     *
+     * Here we want to adjust \ptr such that
+     * - if it's within Xen range, it becomes non-canonical,
+     * - otherwise if it's (non-)canonical on input, it retains that property,
+     * - if the result is non-canonical, bit 47 is clear (to avoid
+     *   potentially populating the cache with Xen data on AMD-like hardware),
      * but guaranteed without any conditional branches (hence in assembly).
+     *
+     * To achieve this we determine which bit to forcibly clear: Either bit 47
+     * (in case the address is below HYPERVISOR_VIRT_END) or bit 63.  Further
+     * we determine whether for forcably set bit 63: In case we first cleared
+     * it, we'll merely restore the original address.  In case we ended up
+     * clearing bit 47 (i.e. the address was either non-canonical or within Xen
+     * range), setting the bit will yield a guaranteed non-canonical address.
+     * If we didn't clear a bit, we also won't set one: The address was in the
+     * low half of address space in that case with bit 47 already clear.  The
+     * address can thus be left unchanged, whether canonical or not.
      */
     mov $(HYPERVISOR_VIRT_END - 1), \scratch1
-    mov $~0, \scratch2
+    mov $(VADDR_BITS - 1), \scratch2
     cmp \ptr, \scratch1
+    /*
+     * Not needed: The value we have in \scratch1 will be truncated to 6 bits,
+     * thus yielding the value we need.
+    mov $63, \scratch1
+     */
+    cmovnb \scratch2, \scratch1
+    xor \scratch2, \scratch2
+    btr \scratch1, \ptr
     rcr $1, \scratch2
-    and \scratch2, \ptr
+    or \scratch2, \ptr
 #elif defined(CONFIG_DEBUG) && defined(CONFIG_PV)
     xor $~\@, \scratch1
     xor $~\@, \scratch2
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' 
old/xen-4.20.0-testing/xen/arch/x86/include/asm/hvm/emulate.h 
new/xen-4.20.0-testing/xen/arch/x86/include/asm/hvm/emulate.h
--- old/xen-4.20.0-testing/xen/arch/x86/include/asm/hvm/emulate.h       
2025-01-20 13:45:27.000000000 +0100
+++ new/xen-4.20.0-testing/xen/arch/x86/include/asm/hvm/emulate.h       
2025-01-31 17:59:06.000000000 +0100
@@ -15,6 +15,7 @@
 #include <xen/err.h>
 #include <xen/mm.h>
 #include <xen/sched.h>
+#include <xen/xvmalloc.h>
 #include <asm/hvm/hvm.h>
 #include <asm/x86_emulate.h>
 
@@ -119,7 +120,11 @@
 int __must_check hvmemul_cache_init(struct vcpu *v);
 static inline void hvmemul_cache_destroy(struct vcpu *v)
 {
-    XFREE(v->arch.hvm.hvm_io.cache);
+    unsigned int i;
+
+    for ( i = 0; i < ARRAY_SIZE(v->arch.hvm.hvm_io.mmio_cache); ++i )
+        XFREE(v->arch.hvm.hvm_io.mmio_cache[i]);
+    XVFREE(v->arch.hvm.hvm_io.cache);
 }
 bool hvmemul_read_cache(const struct vcpu *v, paddr_t gpa,
                         void *buffer, unsigned int size);
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' 
old/xen-4.20.0-testing/xen/arch/x86/include/asm/hvm/vcpu.h 
new/xen-4.20.0-testing/xen/arch/x86/include/asm/hvm/vcpu.h
--- old/xen-4.20.0-testing/xen/arch/x86/include/asm/hvm/vcpu.h  2025-01-20 
13:45:27.000000000 +0100
+++ new/xen-4.20.0-testing/xen/arch/x86/include/asm/hvm/vcpu.h  2025-01-31 
17:59:06.000000000 +0100
@@ -22,17 +22,6 @@
     uint32_t asid;
 };
 
-/*
- * We may read or write up to m512 as a number of device-model
- * transactions.
- */
-struct hvm_mmio_cache {
-    unsigned long gla;
-    unsigned int size;
-    uint8_t dir;
-    uint8_t buffer[64] __aligned(sizeof(long));
-};
-
 struct hvm_vcpu_io {
     /*
      * HVM emulation:
@@ -48,7 +37,7 @@
      * We may need to handle up to 3 distinct memory accesses per
      * instruction.
      */
-    struct hvm_mmio_cache mmio_cache[3];
+    struct hvm_mmio_cache *mmio_cache[3];
     unsigned int mmio_cache_count;
 
     /* For retries we shouldn't re-fetch the instruction. */
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' 
old/xen-4.20.0-testing/xen/arch/x86/x86_emulate/x86_emulate.c 
new/xen-4.20.0-testing/xen/arch/x86/x86_emulate/x86_emulate.c
--- old/xen-4.20.0-testing/xen/arch/x86/x86_emulate/x86_emulate.c       
2025-01-20 13:45:27.000000000 +0100
+++ new/xen-4.20.0-testing/xen/arch/x86/x86_emulate/x86_emulate.c       
2025-01-31 17:59:06.000000000 +0100
@@ -513,7 +513,7 @@
         regs->r(cx) = ad_bytes == 4 ? (uint32_t)count : count;
 }
 
-#define get_rep_prefix(using_si, using_di) ({                           \
+#define get_rep_prefix(extend_si, extend_di) ({                         \
     unsigned long max_reps = 1;                                         \
     if ( rep_prefix() )                                                 \
         max_reps = get_loop_count(&_regs, ad_bytes);                    \
@@ -521,14 +521,14 @@
     {                                                                   \
         /*                                                              \
          * Skip the instruction if no repetitions are required, but     \
-         * zero extend involved registers first when using 32-bit       \
+         * zero extend relevant registers first when using 32-bit       \
          * addressing in 64-bit mode.                                   \
          */                                                             \
-        if ( mode_64bit() && ad_bytes == 4 )                            \
+        if ( !amd_like(ctxt) && mode_64bit() && ad_bytes == 4 )         \
         {                                                               \
             _regs.r(cx) = 0;                                            \
-            if ( using_si ) _regs.r(si) = (uint32_t)_regs.r(si);        \
-            if ( using_di ) _regs.r(di) = (uint32_t)_regs.r(di);        \
+            if ( extend_si ) _regs.r(si) = _regs.esi;                   \
+            if ( extend_di ) _regs.r(di) = _regs.edi;                   \
         }                                                               \
         goto complete_insn;                                             \
     }                                                                   \
@@ -1818,7 +1818,7 @@
         dst.bytes = !(b & 1) ? 1 : (op_bytes == 8) ? 4 : op_bytes;
         if ( (rc = ioport_access_check(port, dst.bytes, ctxt, ops)) != 0 )
             goto done;
-        nr_reps = get_rep_prefix(false, true);
+        nr_reps = get_rep_prefix(false, false /* don't extend RSI/RDI */);
         dst.mem.off = truncate_ea_and_reps(_regs.r(di), nr_reps, dst.bytes);
         dst.mem.seg = x86_seg_es;
         /* Try the presumably most efficient approach first. */
@@ -1860,7 +1860,7 @@
         dst.bytes = !(b & 1) ? 1 : (op_bytes == 8) ? 4 : op_bytes;
         if ( (rc = ioport_access_check(port, dst.bytes, ctxt, ops)) != 0 )
             goto done;
-        nr_reps = get_rep_prefix(true, false);
+        nr_reps = get_rep_prefix(false, false /* don't extend RSI/RDI */);
         ea.mem.off = truncate_ea_and_reps(_regs.r(si), nr_reps, dst.bytes);
         /* Try the presumably most efficient approach first. */
         if ( !ops->rep_outs )
@@ -2198,7 +2198,7 @@
     case 0xa6 ... 0xa7: /* cmps */ {
         unsigned long next_eip = _regs.r(ip);
 
-        get_rep_prefix(true, true);
+        get_rep_prefix(false, false /* don't extend RSI/RDI */);
         src.bytes = dst.bytes = (d & ByteOp) ? 1 : op_bytes;
         if ( (rc = read_ulong(ea.mem.seg, truncate_ea(_regs.r(si)),
                               &dst.val, dst.bytes, ctxt, ops)) ||
@@ -2240,7 +2240,7 @@
     }
 
     case 0xac ... 0xad: /* lods */
-        get_rep_prefix(true, false);
+        get_rep_prefix(false, false /* don't extend RSI/RDI */);
         if ( (rc = read_ulong(ea.mem.seg, truncate_ea(_regs.r(si)),
                               &dst.val, dst.bytes, ctxt, ops)) != 0 )
             goto done;
@@ -2251,7 +2251,7 @@
     case 0xae ... 0xaf: /* scas */ {
         unsigned long next_eip = _regs.r(ip);
 
-        get_rep_prefix(false, true);
+        get_rep_prefix(false, false /* don't extend RSI/RDI */);
         if ( (rc = read_ulong(x86_seg_es, truncate_ea(_regs.r(di)),
                               &dst.val, src.bytes, ctxt, ops)) != 0 )
             goto done;
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/xen-4.20.0-testing/xen/common/device-tree/bootfdt.c 
new/xen-4.20.0-testing/xen/common/device-tree/bootfdt.c
--- old/xen-4.20.0-testing/xen/common/device-tree/bootfdt.c     2025-01-20 
13:45:27.000000000 +0100
+++ new/xen-4.20.0-testing/xen/common/device-tree/bootfdt.c     2025-01-31 
17:59:06.000000000 +0100
@@ -27,8 +27,8 @@
      */
     BUILD_BUG_ON((offsetof(struct membanks, bank) !=
                  offsetof(struct meminfo, bank)));
-    /* Ensure "struct membanks" is 8-byte aligned */
-    BUILD_BUG_ON(alignof(struct membanks) != 8);
+    /* Ensure "struct membanks" and "struct membank" are equally aligned */
+    BUILD_BUG_ON(alignof(struct membanks) != alignof(struct membank));
 }
 
 static bool __init device_tree_node_is_available(const void *fdt, int node)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' 
old/xen-4.20.0-testing/xen/drivers/passthrough/amd/iommu_intr.c 
new/xen-4.20.0-testing/xen/drivers/passthrough/amd/iommu_intr.c
--- old/xen-4.20.0-testing/xen/drivers/passthrough/amd/iommu_intr.c     
2025-01-20 13:45:27.000000000 +0100
+++ new/xen-4.20.0-testing/xen/drivers/passthrough/amd/iommu_intr.c     
2025-01-31 17:59:06.000000000 +0100
@@ -39,7 +39,8 @@
 };
 
 union irte128 {
-    uint64_t raw[2];
+    uint64_t raw64[2];
+    __uint128_t raw128;
     struct {
         bool remap_en:1;
         bool sup_io_pf:1;
@@ -187,7 +188,7 @@
 
     if ( iommu->ctrl.ga_en )
     {
-        ACCESS_ONCE(entry.ptr128->raw[0]) = 0;
+        ACCESS_ONCE(entry.ptr128->raw64[0]) = 0;
         /*
          * Low half (containing RemapEn) needs to be cleared first.  Note that
          * strictly speaking smp_wmb() isn't enough, as conceptually it expands
@@ -197,7 +198,7 @@
          * variant will do.
          */
         smp_wmb();
-        entry.ptr128->raw[1] = 0;
+        entry.ptr128->raw64[1] = 0;
     }
     else
         ACCESS_ONCE(entry.ptr32->raw) = 0;
@@ -212,7 +213,7 @@
 {
     if ( iommu->ctrl.ga_en )
     {
-        union irte128 irte = {
+        const union irte128 irte = {
             .full = {
                 .remap_en = true,
                 .int_type = int_type,
@@ -222,19 +223,26 @@
                 .vector = vector,
             },
         };
+        __uint128_t old = entry.ptr128->raw128;
+        __uint128_t res = cmpxchg16b(&entry.ptr128->raw128, &old,
+                                     &irte.raw128);
 
-        ASSERT(!entry.ptr128->full.remap_en);
-        entry.ptr128->raw[1] = irte.raw[1];
         /*
-         * High half needs to be set before low one (containing RemapEn).  See
-         * comment in free_intremap_entry() regarding the choice of barrier.
+         * Hardware does not update the IRTE behind our backs, so the return
+         * value should match "old".
          */
-        smp_wmb();
-        ACCESS_ONCE(entry.ptr128->raw[0]) = irte.raw[0];
+        if ( res != old )
+        {
+            printk(XENLOG_ERR
+                   "unexpected IRTE %016lx_%016lx (expected %016lx_%016lx)\n",
+                   (uint64_t)(res >> 64), (uint64_t)res,
+                   (uint64_t)(old >> 64), (uint64_t)old);
+            ASSERT_UNREACHABLE();
+        }
     }
     else
     {
-        union irte32 irte = {
+        const union irte32 irte = {
             .flds = {
                 .remap_en = true,
                 .int_type = int_type,
@@ -299,21 +307,13 @@
 
     entry = get_intremap_entry(iommu, req_id, offset);
 
-    /* The RemapEn fields match for all formats. */
-    while ( iommu->enabled && entry.ptr32->flds.remap_en )
-    {
-        entry.ptr32->flds.remap_en = false;
-        spin_unlock(lock);
-
-        amd_iommu_flush_intremap(iommu, req_id);
-
-        spin_lock(lock);
-    }
-
     update_intremap_entry(iommu, entry, vector, delivery_mode, dest_mode, 
dest);
 
     spin_unlock_irqrestore(lock, flags);
 
+    if ( !fresh )
+        amd_iommu_flush_intremap(iommu, req_id);
+
     set_rte_index(rte, offset);
 
     return 0;
@@ -322,7 +322,7 @@
 void cf_check amd_iommu_ioapic_update_ire(
     unsigned int apic, unsigned int pin, uint64_t rte)
 {
-    struct IO_APIC_route_entry old_rte, new_rte;
+    struct IO_APIC_route_entry new_rte;
     int seg, bdf, rc;
     struct amd_iommu *iommu;
     unsigned int idx;
@@ -346,14 +346,6 @@
         return;
     }
 
-    old_rte = __ioapic_read_entry(apic, pin, true);
-    /* mask the interrupt while we change the intremap table */
-    if ( !old_rte.mask )
-    {
-        old_rte.mask = 1;
-        __ioapic_write_entry(apic, pin, true, old_rte);
-    }
-
     /* Update interrupt remapping entry */
     rc = update_intremap_entry_from_ioapic(
              bdf, iommu, &new_rte,
@@ -425,6 +417,7 @@
     uint8_t delivery_mode, vector, dest_mode;
     spinlock_t *lock;
     unsigned int dest, offset, i;
+    bool fresh = false;
 
     req_id = get_dma_requestor_id(iommu->seg, bdf);
     alias_id = get_intremap_requestor_id(iommu->seg, bdf);
@@ -468,26 +461,21 @@
             return -ENOSPC;
         }
         *remap_index = offset;
+        fresh = true;
     }
 
     entry = get_intremap_entry(iommu, req_id, offset);
 
-    /* The RemapEn fields match for all formats. */
-    while ( iommu->enabled && entry.ptr32->flds.remap_en )
-    {
-        entry.ptr32->flds.remap_en = false;
-        spin_unlock(lock);
+    update_intremap_entry(iommu, entry, vector, delivery_mode, dest_mode, 
dest);
+    spin_unlock_irqrestore(lock, flags);
 
+    if ( !fresh )
+    {
         amd_iommu_flush_intremap(iommu, req_id);
         if ( alias_id != req_id )
             amd_iommu_flush_intremap(iommu, alias_id);
-
-        spin_lock(lock);
     }
 
-    update_intremap_entry(iommu, entry, vector, delivery_mode, dest_mode, 
dest);
-    spin_unlock_irqrestore(lock, flags);
-
     *data = (msg->data & ~(INTREMAP_MAX_ENTRIES - 1)) | offset;
 
     /*
@@ -649,6 +637,19 @@
     if ( !iommu_enable || !iommu_intremap )
         return false;
 
+    if ( unlikely(!cpu_has_cx16) )
+    {
+        AMD_IOMMU_ERROR("no CMPXCHG16B support, disabling IOMMU\n");
+        /*
+         * Disable IOMMU support at once: there's no reason to check for CX16
+         * yet again when attempting to initialize IOMMU DMA remapping
+         * functionality or interrupt remapping without x2APIC support.
+         */
+        iommu_enable = false;
+        iommu_intremap = iommu_intremap_off;
+        return false;
+    }
+
     if ( amd_iommu_prepare(true) )
         return false;
 
@@ -722,7 +723,7 @@
     for ( count = 0; count < nr; count++ )
     {
         if ( iommu->ctrl.ga_en
-             ? !tbl.ptr128[count].raw[0] && !tbl.ptr128[count].raw[1]
+             ? !tbl.ptr128[count].raw64[0] && !tbl.ptr128[count].raw64[1]
              : !tbl.ptr32[count].raw )
                 continue;
 
@@ -735,7 +736,8 @@
 
         if ( iommu->ctrl.ga_en )
             printk("    IRTE[%03x] %016lx_%016lx\n",
-                   count, tbl.ptr128[count].raw[1], tbl.ptr128[count].raw[0]);
+                   count, tbl.ptr128[count].raw64[1],
+                   tbl.ptr128[count].raw64[0]);
         else
             printk("    IRTE[%03x] %08x\n", count, tbl.ptr32[count].raw);
     }
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' 
old/xen-4.20.0-testing/xen/drivers/passthrough/amd/iommu_map.c 
new/xen-4.20.0-testing/xen/drivers/passthrough/amd/iommu_map.c
--- old/xen-4.20.0-testing/xen/drivers/passthrough/amd/iommu_map.c      
2025-01-20 13:45:27.000000000 +0100
+++ new/xen-4.20.0-testing/xen/drivers/passthrough/amd/iommu_map.c      
2025-01-31 17:59:06.000000000 +0100
@@ -167,15 +167,14 @@
 {
     bool valid = flags & SET_ROOT_VALID;
 
-    if ( dte->v && dte->tv &&
-         (cpu_has_cx16 || (flags & SET_ROOT_WITH_UNITY_MAP)) )
+    if ( dte->v && dte->tv )
     {
         union {
             struct amd_iommu_dte dte;
             uint64_t raw64[4];
             __uint128_t raw128[2];
         } ldte = { .dte = *dte };
-        __uint128_t old = ldte.raw128[0];
+        __uint128_t res, old = ldte.raw128[0];
         int ret = 0;
 
         ldte.dte.domain_id = domain_id;
@@ -185,33 +184,20 @@
         ldte.dte.paging_mode = paging_mode;
         ldte.dte.v = valid;
 
-        if ( cpu_has_cx16 )
-        {
-            __uint128_t res = cmpxchg16b(dte, &old, &ldte.raw128[0]);
+        res = cmpxchg16b(dte, &old, &ldte.raw128[0]);
 
-            /*
-             * Hardware does not update the DTE behind our backs, so the
-             * return value should match "old".
-             */
-            if ( res != old )
-            {
-                printk(XENLOG_ERR
-                       "Dom%d: unexpected DTE %016lx_%016lx (expected 
%016lx_%016lx)\n",
-                       domain_id,
-                       (uint64_t)(res >> 64), (uint64_t)res,
-                       (uint64_t)(old >> 64), (uint64_t)old);
-                ret = -EILSEQ;
-            }
-        }
-        else /* Best effort, updating domain_id last. */
+        /*
+         * Hardware does not update the DTE behind our backs, so the
+         * return value should match "old".
+         */
+        if ( res != old )
         {
-            uint64_t *ptr = (void *)dte;
-
-            write_atomic(ptr + 0, ldte.raw64[0]);
-            /* No barrier should be needed between these two. */
-            write_atomic(ptr + 1, ldte.raw64[1]);
-
-            ret = 1;
+            printk(XENLOG_ERR
+                   "Dom%d: unexpected DTE %016lx_%016lx (expected 
%016lx_%016lx)\n",
+                   domain_id,
+                   (uint64_t)(res >> 64), (uint64_t)res,
+                   (uint64_t)(old >> 64), (uint64_t)old);
+            ret = -EILSEQ;
         }
 
         return ret;
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' 
old/xen-4.20.0-testing/xen/drivers/passthrough/amd/pci_amd_iommu.c 
new/xen-4.20.0-testing/xen/drivers/passthrough/amd/pci_amd_iommu.c
--- old/xen-4.20.0-testing/xen/drivers/passthrough/amd/pci_amd_iommu.c  
2025-01-20 13:45:27.000000000 +0100
+++ new/xen-4.20.0-testing/xen/drivers/passthrough/amd/pci_amd_iommu.c  
2025-01-31 17:59:06.000000000 +0100
@@ -309,6 +309,12 @@
     if ( !iommu_enable && !iommu_intremap )
         return 0;
 
+    if ( unlikely(!cpu_has_cx16) )
+    {
+        AMD_IOMMU_ERROR("no CMPXCHG16B support, disabling IOMMU\n");
+        return -ENODEV;
+    }
+
     if ( (init_done ? amd_iommu_init_late()
                     : amd_iommu_init(false)) != 0 )
     {
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' 
old/xen-4.20.0-testing/xen/drivers/passthrough/vtd/intremap.c 
new/xen-4.20.0-testing/xen/drivers/passthrough/vtd/intremap.c
--- old/xen-4.20.0-testing/xen/drivers/passthrough/vtd/intremap.c       
2025-01-20 13:45:27.000000000 +0100
+++ new/xen-4.20.0-testing/xen/drivers/passthrough/vtd/intremap.c       
2025-01-31 17:59:06.000000000 +0100
@@ -150,6 +150,19 @@
     if ( !iommu_qinval || !iommu_intremap || list_empty(&acpi_drhd_units) )
         return false;
 
+    if ( unlikely(!cpu_has_cx16) )
+    {
+        printk(XENLOG_ERR VTDPREFIX "no CMPXCHG16B support, disabling 
IOMMU\n");
+        /*
+         * Disable IOMMU support at once: there's no reason to check for CX16
+         * yet again when attempting to initialize IOMMU DMA remapping
+         * functionality or interrupt remapping without x2APIC support.
+         */
+        iommu_enable = false;
+        iommu_intremap = iommu_intremap_off;
+        return false;
+    }
+
     /* We MUST have a DRHD unit for each IOAPIC. */
     for ( apic = 0; apic < nr_ioapics; apic++ )
         if ( !ioapic_to_drhd(IO_APIC_ID(apic)) )
@@ -171,49 +184,26 @@
 
 /*
  * Assume iremap_lock has been acquired. It is to make sure software will not
- * change the same IRTE behind us. With this assumption, if only high qword or
- * low qword in IRTE is to be updated, this function's atomic variant can
- * present an atomic update to VT-d hardware even when cmpxchg16b
- * instruction is not supported.
+ * change the same IRTE behind us.
  */
 static void update_irte(struct vtd_iommu *iommu, struct iremap_entry *entry,
                         const struct iremap_entry *new_ire, bool atomic)
 {
-    ASSERT(spin_is_locked(&iommu->intremap.lock));
+    __uint128_t ret;
+    struct iremap_entry old_ire;
 
-    if ( cpu_has_cx16 )
-    {
-        __uint128_t ret;
-        struct iremap_entry old_ire;
+    ASSERT(spin_is_locked(&iommu->intremap.lock));
 
-        old_ire = *entry;
-        ret = cmpxchg16b(entry, &old_ire, new_ire);
+    old_ire = *entry;
+    ret = cmpxchg16b(entry, &old_ire, new_ire);
 
-        /*
-         * In the above, we use cmpxchg16 to atomically update the 128-bit
-         * IRTE, and the hardware cannot update the IRTE behind us, so
-         * the return value of cmpxchg16 should be the same as old_ire.
-         * This ASSERT validate it.
-         */
-        ASSERT(ret == old_ire.val);
-    }
-    else
-    {
-        /*
-         * VT-d hardware doesn't update IRTEs behind us, nor the software
-         * since we hold iremap_lock. If the caller wants VT-d hardware to
-         * always see a consistent entry, but we can't meet it, a bug will
-         * be raised.
-         */
-        if ( entry->lo == new_ire->lo )
-            write_atomic(&entry->hi, new_ire->hi);
-        else if ( entry->hi == new_ire->hi )
-            write_atomic(&entry->lo, new_ire->lo);
-        else if ( !atomic )
-            *entry = *new_ire;
-        else
-            BUG();
-    }
+    /*
+     * In the above, we use cmpxchg16 to atomically update the 128-bit
+     * IRTE, and the hardware cannot update the IRTE behind us, so
+     * the return value of cmpxchg16 should be the same as old_ire.
+     * This ASSERT validate it.
+     */
+    ASSERT(ret == old_ire.val);
 }
 
 /* Mark specified intr remap entry as free */
@@ -395,7 +385,6 @@
     /* Indicate remap format. */
     remap_rte->format = 1;
 
-    /* If cmpxchg16b is not available the caller must mask the IO-APIC pin. */
     update_irte(iommu, iremap_entry, &new_ire, !init && !masked);
     iommu_sync_cache(iremap_entry, sizeof(*iremap_entry));
     iommu_flush_iec_index(iommu, 0, index);
@@ -434,38 +423,15 @@
 {
     struct IO_xAPIC_route_entry old_rte = {}, new_rte;
     struct vtd_iommu *iommu = ioapic_to_iommu(IO_APIC_ID(apic));
-    bool masked = true;
     int rc;
 
-    if ( !cpu_has_cx16 )
-    {
-       /*
-        * Cannot atomically update the IRTE entry: mask the IO-APIC pin to
-        * avoid interrupts seeing an inconsistent IRTE entry.
-        */
-        old_rte = __ioapic_read_entry(apic, pin, true);
-        if ( !old_rte.mask )
-        {
-            masked = false;
-            old_rte.mask = 1;
-            __ioapic_write_entry(apic, pin, true, old_rte);
-        }
-    }
-
     /* Not the initializer, for old gcc to cope. */
     new_rte.raw = rte;
 
     rc = ioapic_rte_to_remap_entry(iommu, apic, pin, &old_rte, new_rte);
     if ( rc )
-    {
-        if ( !masked )
-        {
-            /* Recover the original value of 'mask' bit */
-            old_rte.mask = 0;
-            __ioapic_write_entry(apic, pin, true, old_rte);
-        }
         return;
-    }
+
     /* old_rte will contain the updated IO-APIC RTE on success. */
     __ioapic_write_entry(apic, pin, true, old_rte);
 }
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' 
old/xen-4.20.0-testing/xen/drivers/passthrough/vtd/iommu.c 
new/xen-4.20.0-testing/xen/drivers/passthrough/vtd/iommu.c
--- old/xen-4.20.0-testing/xen/drivers/passthrough/vtd/iommu.c  2025-01-20 
13:45:27.000000000 +0100
+++ new/xen-4.20.0-testing/xen/drivers/passthrough/vtd/iommu.c  2025-01-31 
17:59:06.000000000 +0100
@@ -1485,7 +1485,7 @@
 {
     struct domain_iommu *hd = dom_iommu(domain);
     struct context_entry *context, *context_entries, lctxt;
-    __uint128_t old;
+    __uint128_t res, old;
     uint64_t maddr;
     uint16_t seg = iommu->drhd->segment, prev_did = 0;
     struct domain *prev_dom = NULL;
@@ -1583,55 +1583,23 @@
         ASSERT(!context_fault_disable(lctxt));
     }
 
-    if ( cpu_has_cx16 )
-    {
-        __uint128_t res = cmpxchg16b(context, &old, &lctxt.full);
+    res = cmpxchg16b(context, &old, &lctxt.full);
 
-        /*
-         * Hardware does not update the context entry behind our backs,
-         * so the return value should match "old".
-         */
-        if ( res != old )
-        {
-            if ( pdev )
-                check_cleanup_domid_map(domain, pdev, iommu);
-            printk(XENLOG_ERR
-                   "%pp: unexpected context entry %016lx_%016lx (expected 
%016lx_%016lx)\n",
-                   &PCI_SBDF(seg, bus, devfn),
-                   (uint64_t)(res >> 64), (uint64_t)res,
-                   (uint64_t)(old >> 64), (uint64_t)old);
-            rc = -EILSEQ;
-            goto unlock;
-        }
-    }
-    else if ( !prev_dom || !(mode & MAP_WITH_RMRR) )
+    /*
+     * Hardware does not update the context entry behind our backs,
+     * so the return value should match "old".
+     */
+    if ( res != old )
     {
-        context_clear_present(*context);
-        iommu_sync_cache(context, sizeof(*context));
-
-        write_atomic(&context->hi, lctxt.hi);
-        /* No barrier should be needed between these two. */
-        write_atomic(&context->lo, lctxt.lo);
-    }
-    else /* Best effort, updating DID last. */
-    {
-         /*
-          * By non-atomically updating the context entry's DID field last,
-          * during a short window in time TLB entries with the old domain ID
-          * but the new page tables may be inserted.  This could affect I/O
-          * of other devices using this same (old) domain ID.  Such updating
-          * therefore is not a problem if this was the only device associated
-          * with the old domain ID.  Diverting I/O of any of a dying domain's
-          * devices to the quarantine page tables is intended anyway.
-          */
-        if ( !(mode & (MAP_OWNER_DYING | MAP_SINGLE_DEVICE)) )
-            printk(XENLOG_WARNING VTDPREFIX
-                   " %pp: reassignment may cause %pd data corruption\n",
-                   &PCI_SBDF(seg, bus, devfn), prev_dom);
-
-        write_atomic(&context->lo, lctxt.lo);
-        /* No barrier should be needed between these two. */
-        write_atomic(&context->hi, lctxt.hi);
+        if ( pdev )
+            check_cleanup_domid_map(domain, pdev, iommu);
+        printk(XENLOG_ERR
+                "%pp: unexpected context entry %016lx_%016lx (expected 
%016lx_%016lx)\n",
+                &PCI_SBDF(seg, bus, devfn),
+                (uint64_t)(res >> 64), (uint64_t)res,
+                (uint64_t)(old >> 64), (uint64_t)old);
+        rc = -EILSEQ;
+        goto unlock;
     }
 
     iommu_sync_cache(context, sizeof(struct context_entry));
@@ -1727,15 +1695,9 @@
         break;
     }
 
-    if ( domain != pdev->domain && pdev->domain != dom_io )
-    {
-        if ( pdev->domain->is_dying )
-            mode |= MAP_OWNER_DYING;
-        else if ( drhd &&
-                  !any_pdev_behind_iommu(pdev->domain, pdev, drhd->iommu) &&
-                  !pdev->phantom_stride )
-            mode |= MAP_SINGLE_DEVICE;
-    }
+    if ( domain != pdev->domain && pdev->domain != dom_io &&
+         pdev->domain->is_dying )
+        mode |= MAP_OWNER_DYING;
 
     switch ( pdev->type )
     {
@@ -2633,6 +2595,13 @@
     int ret;
     bool reg_inval_supported = true;
 
+    if ( unlikely(!cpu_has_cx16) )
+    {
+        printk(XENLOG_ERR VTDPREFIX "no CMPXCHG16B support, disabling 
IOMMU\n");
+        ret = -ENODEV;
+        goto error;
+    }
+
     if ( list_empty(&acpi_drhd_units) )
     {
         ret = -ENODEV;
@@ -2695,12 +2664,7 @@
             iommu_intremap = iommu_intremap_off;
 
 #ifndef iommu_intpost
-        /*
-         * We cannot use posted interrupt if X86_FEATURE_CX16 is
-         * not supported, since we count on this feature to
-         * atomically update 16-byte IRTE in posted format.
-         */
-        if ( !cap_intr_post(iommu->cap) || !iommu_intremap || !cpu_has_cx16 )
+        if ( !cap_intr_post(iommu->cap) || !iommu_intremap )
             iommu_intpost = false;
 #endif
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/xen-4.20.0-testing/xen/drivers/passthrough/vtd/vtd.h 
new/xen-4.20.0-testing/xen/drivers/passthrough/vtd/vtd.h
--- old/xen-4.20.0-testing/xen/drivers/passthrough/vtd/vtd.h    2025-01-20 
13:45:27.000000000 +0100
+++ new/xen-4.20.0-testing/xen/drivers/passthrough/vtd/vtd.h    2025-01-31 
17:59:06.000000000 +0100
@@ -28,9 +28,8 @@
  */
 #define MAP_WITH_RMRR         (1u << 0)
 #define MAP_OWNER_DYING       (1u << 1)
-#define MAP_SINGLE_DEVICE     (1u << 2)
-#define MAP_ERROR_RECOVERY    (1u << 3)
-#define UNMAP_ME_PHANTOM_FUNC (1u << 4)
+#define MAP_ERROR_RECOVERY    (1u << 2)
+#define UNMAP_ME_PHANTOM_FUNC (1u << 3)
 
 /* Allow for both IOAPIC and IOSAPIC. */
 #define IO_xAPIC_route_entry IO_APIC_route_entry

commit xen for openSUSE:Factory

Reply via email to