In all 3 examples, we're iterating over a scaler.  No caller can pass the
COMPRESSED flag in, so the upper bound of 63, as opposed to 64, doesn't
matter.

This alone produces:

  add/remove: 0/0 grow/shrink: 0/4 up/down: 0/-161 (-161)
  Function                                     old     new   delta
  compress_xsave_states                         66      58      -8
  xstate_uncompressed_size                     119      71     -48
  xstate_compressed_size                       124      76     -48
  recalculate_xstate                           347     290     -57

where xstate_{un,}compressed_size() have practically halved in size despite
being small before.

The change in compress_xsave_states() is unexpected.  The function is almost
entirely dead code, and within what remains there's a smaller stack frame.  I
suspect it's leftovers that the optimiser couldn't fully discard.

Signed-off-by: Andrew Cooper <andrew.coop...@citrix.com>
---
CC: Jan Beulich <jbeul...@suse.com>
CC: Roger Pau Monné <roger....@citrix.com>
CC: Stefano Stabellini <sstabell...@kernel.org>
CC: Julien Grall <jul...@xen.org>
CC: Volodymyr Babchuk <volodymyr_babc...@epam.com>
CC: Bertrand Marquis <bertrand.marq...@arm.com>
CC: Michal Orzel <michal.or...@amd.com>
CC: Oleksii Kurochko <oleksii.kuroc...@gmail.com>
---
 xen/arch/x86/cpu-policy.c | 4 ++--
 xen/arch/x86/xstate.c     | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c
index cd53bac777dc..fa55f6073089 100644
--- a/xen/arch/x86/cpu-policy.c
+++ b/xen/arch/x86/cpu-policy.c
@@ -193,7 +193,7 @@ static void sanitise_featureset(uint32_t *fs)
 static void recalculate_xstate(struct cpu_policy *p)
 {
     uint64_t xstates = XSTATE_FP_SSE;
-    unsigned int i, ecx_mask = 0, Da1 = p->xstate.Da1;
+    unsigned int ecx_mask = 0, Da1 = p->xstate.Da1;
 
     /*
      * The Da1 leaf is the only piece of information preserved in the common
@@ -237,7 +237,7 @@ static void recalculate_xstate(struct cpu_policy *p)
     /* Subleafs 2+ */
     xstates &= ~XSTATE_FP_SSE;
     BUILD_BUG_ON(ARRAY_SIZE(p->xstate.comp) < 63);
-    bitmap_for_each ( i, &xstates, 63 )
+    for_each_set_bit ( i, xstates )
     {
         /*
          * Pass through size (eax) and offset (ebx) directly.  Visbility of
diff --git a/xen/arch/x86/xstate.c b/xen/arch/x86/xstate.c
index da9053c0a262..88dbfbeafacd 100644
--- a/xen/arch/x86/xstate.c
+++ b/xen/arch/x86/xstate.c
@@ -589,7 +589,7 @@ static bool valid_xcr0(uint64_t xcr0)
 
 unsigned int xstate_uncompressed_size(uint64_t xcr0)
 {
-    unsigned int size = XSTATE_AREA_MIN_SIZE, i;
+    unsigned int size = XSTATE_AREA_MIN_SIZE;
 
     /* Non-XCR0 states don't exist in an uncompressed image. */
     ASSERT((xcr0 & ~X86_XCR0_STATES) == 0);
@@ -606,7 +606,7 @@ unsigned int xstate_uncompressed_size(uint64_t xcr0)
      * with respect their index.
      */
     xcr0 &= ~(X86_XCR0_SSE | X86_XCR0_X87);
-    bitmap_for_each ( i, &xcr0, 63 )
+    for_each_set_bit ( i, xcr0 )
     {
         const struct xstate_component *c = &raw_cpu_policy.xstate.comp[i];
         unsigned int s = c->offset + c->size;
@@ -621,7 +621,7 @@ unsigned int xstate_uncompressed_size(uint64_t xcr0)
 
 unsigned int xstate_compressed_size(uint64_t xstates)
 {
-    unsigned int i, size = XSTATE_AREA_MIN_SIZE;
+    unsigned int size = XSTATE_AREA_MIN_SIZE;
 
     if ( xstates == 0 )
         return 0;
@@ -634,7 +634,7 @@ unsigned int xstate_compressed_size(uint64_t xstates)
      * componenets require aligning to 64 first.
      */
     xstates &= ~(X86_XCR0_SSE | X86_XCR0_X87);
-    bitmap_for_each ( i, &xstates, 63 )
+    for_each_set_bit ( i, xstates )
     {
         const struct xstate_component *c = &raw_cpu_policy.xstate.comp[i];
 
-- 
2.39.2


Reply via email to