On 27.06.2025 16:19, Andrew Cooper wrote:
> For features which are unconditionally set in the max policies, making the
> default policy to match the host can be done with a conditional clear.
> 
> This is simpler than the unconditional clear, conditional set currently
> performed.
> 
> Signed-off-by: Andrew Cooper <andrew.coop...@citrix.com>

Btw, in the context of some other work I came to remember that I have some
other, never submitted approach queued. I'll reproduce the raw patch (not
yet re-based over your change, and not pruned of ERMS bits) below, and I'd
be glad if you could tell me your opinion. I.e. in particular to possibly
let me know that this is all wrong, and hence not worth submitting at all.
It certainly has a nice negative diffstat (albeit that'll reduce some once
re-based) ...

Jan

x86/cpu-policy: defer setting certain bits in max policies

Instead of adding features to the max policies just to remove them again
from the (derived) default ones, defer such additions until after the
respective default policy was derived.

Signed-off-by: Jan Beulich <jbeul...@suse.com>
---
Of course this comes with the requirement that features forced on here
aren't having dependencies in either direction, as sanitise_featureset()
won't be run on the result. Would be nice to have this checked at build
time, but right now I can't think of a nice way.

Same thing for the possibility of OR-ing in INIT_SIMPLE_OR in the new
function. That would apparently need to go through featuresets though,
in which case it might even be an option to run sanitise_featureset()
over the result. Which in turn may mean some more re-structuring to be
desirable here. Using INIT_SIMPLE_OR would cover the three *_CLEAR bits
dealt with explicitly here, except that we want to set them for Intel
only; I wonder if HTT and CMP_LEGACY shouldn't also be marked '|'.

--- a/xen/arch/x86/cpu-policy.c
+++ b/xen/arch/x86/cpu-policy.c
@@ -475,7 +475,34 @@ static void __init guest_common_max_feat
         __set_bit(X86_FEATURE_ARCH_CAPS, fs);
         __set_bit(X86_FEATURE_RSBA, fs);
         __set_bit(X86_FEATURE_RRSBA, fs);
+    }
+
+    /*
+     * To mitigate Native-BHI, one option is to use a TSX Abort on capable
+     * systems.  This is safe even if RTM has been disabled for other reasons
+     * via MSR_TSX_{CTRL,FORCE_ABORT}.  However, a guest kernel doesn't get to
+     * know this type of information.
+     *
+     * Therefore the meaning of RTM_ALWAYS_ABORT has been adjusted, to instead
+     * mean "XBEGIN won't fault".  This is enough for a guest kernel to make
+     * an informed choice WRT mitigating Native-BHI.
+     *
+     * If RTM-capable, we can run a VM which has seen RTM_ALWAYS_ABORT.
+     */
+    if ( test_bit(X86_FEATURE_RTM, fs) )
+        __set_bit(X86_FEATURE_RTM_ALWAYS_ABORT, fs);
 
+    /*
+     * We expose MISC_ENABLE to guests, so our internal clearing of ERMS when
+     * FAST_STRING is not set should not affect the view of migrating-in 
guests.
+     */
+    __set_bit(X86_FEATURE_ERMS, fs);
+}
+
+static void __init guest_common_max_feature_enforcements(struct cpu_policy *p)
+{
+    if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
+    {
         /*
          * These bits indicate that the VERW instruction may have gained
          * scrubbing side effects.  With pooling, they mean "you might migrate
@@ -483,9 +510,9 @@ static void __init guest_common_max_feat
          * unaffected hardware.  This is fine, because the VERW instruction
          * has been around since the 286.
          */
-        __set_bit(X86_FEATURE_MD_CLEAR, fs);
-        __set_bit(X86_FEATURE_FB_CLEAR, fs);
-        __set_bit(X86_FEATURE_RFDS_CLEAR, fs);
+        p->feat.md_clear = true;
+        p->arch_caps.fb_clear = true;
+        p->arch_caps.rfds_clear = true;
 
         /*
          * The Gather Data Sampling microcode mitigation (August 2023) has an
@@ -497,7 +524,7 @@ static void __init guest_common_max_feat
         if ( boot_cpu_data.x86 == 6 &&
              boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X &&
              raw_cpu_policy.feat.clwb )
-            __set_bit(X86_FEATURE_CLWB, fs);
+            p->feat.clwb = true;
     }
 
     /*
@@ -507,29 +534,8 @@ static void __init guest_common_max_feat
      * HTT identifies p->basic.lppp as valid
      * CMP_LEGACY identifies p->extd.nc as valid
      */
-    __set_bit(X86_FEATURE_HTT, fs);
-    __set_bit(X86_FEATURE_CMP_LEGACY, fs);
-
-    /*
-     * To mitigate Native-BHI, one option is to use a TSX Abort on capable
-     * systems.  This is safe even if RTM has been disabled for other reasons
-     * via MSR_TSX_{CTRL,FORCE_ABORT}.  However, a guest kernel doesn't get to
-     * know this type of information.
-     *
-     * Therefore the meaning of RTM_ALWAYS_ABORT has been adjusted, to instead
-     * mean "XBEGIN won't fault".  This is enough for a guest kernel to make
-     * an informed choice WRT mitigating Native-BHI.
-     *
-     * If RTM-capable, we can run a VM which has seen RTM_ALWAYS_ABORT.
-     */
-    if ( test_bit(X86_FEATURE_RTM, fs) )
-        __set_bit(X86_FEATURE_RTM_ALWAYS_ABORT, fs);
-
-    /*
-     * We expose MISC_ENABLE to guests, so our internal clearing of ERMS when
-     * FAST_STRING is not set should not affect the view of migrating-in 
guests.
-     */
-    __set_bit(X86_FEATURE_ERMS, fs);
+    p->basic.htt = true;
+    p->extd.cmp_legacy = true;
 }
 
 static void __init guest_common_default_feature_adjustments(uint32_t *fs)
@@ -551,52 +557,9 @@ static void __init guest_common_default_
              boot_cpu_data.x86_model == INTEL_FAM6_IVYBRIDGE &&
              cpu_has_rdrand && !is_forced_cpu_cap(X86_FEATURE_RDRAND) )
             __clear_bit(X86_FEATURE_RDRAND, fs);
-
-        /*
-         * These bits indicate that the VERW instruction may have gained
-         * scrubbing side effects.  The max policy has them set for migration
-         * reasons, so reset the default policy back to the host values in
-         * case we're unaffected.
-         */
-        __clear_bit(X86_FEATURE_MD_CLEAR, fs);
-        if ( cpu_has_md_clear )
-            __set_bit(X86_FEATURE_MD_CLEAR, fs);
-
-        __clear_bit(X86_FEATURE_FB_CLEAR, fs);
-        if ( cpu_has_fb_clear )
-            __set_bit(X86_FEATURE_FB_CLEAR, fs);
-
-        __clear_bit(X86_FEATURE_RFDS_CLEAR, fs);
-        if ( cpu_has_rfds_clear )
-            __set_bit(X86_FEATURE_RFDS_CLEAR, fs);
-
-        /*
-         * The Gather Data Sampling microcode mitigation (August 2023) has an
-         * adverse performance impact on the CLWB instruction on SKX/CLX/CPX.
-         *
-         * We hid CLWB in the host policy to stop Xen using it, but re-added
-         * it to the max policy to let VMs migrate in.  Re-hide it in the
-         * default policy to disuade VMs from using it in the common case.
-         */
-        if ( boot_cpu_data.x86 == 6 &&
-             boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X &&
-             raw_cpu_policy.feat.clwb )
-            __clear_bit(X86_FEATURE_CLWB, fs);
     }
 
     /*
-     * Topology information is at the toolstack's discretion so these are
-     * unconditionally set in max, but pick a default which matches the host.
-     */
-    __clear_bit(X86_FEATURE_HTT, fs);
-    if ( cpu_has_htt )
-        __set_bit(X86_FEATURE_HTT, fs);
-
-    __clear_bit(X86_FEATURE_CMP_LEGACY, fs);
-    if ( cpu_has_cmp_legacy )
-        __set_bit(X86_FEATURE_CMP_LEGACY, fs);
-
-    /*
      * On certain hardware, speculative or errata workarounds can result in
      * TSX being placed in "force-abort" mode, where it doesn't actually
      * function as expected, but is technically compatible with the ISA.
@@ -939,12 +902,14 @@ void __init init_guest_cpu_policies(void
     {
         calculate_pv_max_policy();
         calculate_pv_def_policy();
+        guest_common_max_feature_enforcements(&pv_max_cpu_policy);
     }
 
     if ( hvm_enabled )
     {
         calculate_hvm_max_policy();
         calculate_hvm_def_policy();
+        guest_common_max_feature_enforcements(&hvm_max_cpu_policy);
     }
 }
 


Reply via email to