Re: [Xen-devel] [PATCH v2 2/4] x86/emul: Optimise decode_register() somewhat

2018-01-31 Thread Jan Beulich
>>> On 30.01.18 at 16:56,  wrote:
> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -1935,36 +1935,67 @@ load_seg(
>  return rc;
>  }
>  
> +/* Map GPRs by ModRM encoding to their offset within struct cpu_user_regs. */
> +static const uint8_t cpu_user_regs_gpr_offsets[] = {
> +offsetof(struct cpu_user_regs, r(ax)),
> +offsetof(struct cpu_user_regs, r(cx)),
> +offsetof(struct cpu_user_regs, r(dx)),
> +offsetof(struct cpu_user_regs, r(bx)),
> +offsetof(struct cpu_user_regs, r(sp)),
> +offsetof(struct cpu_user_regs, r(bp)),
> +offsetof(struct cpu_user_regs, r(si)),
> +offsetof(struct cpu_user_regs, r(di)),
> +#ifdef __x86_64__
> +offsetof(struct cpu_user_regs, r8),
> +offsetof(struct cpu_user_regs, r9),
> +offsetof(struct cpu_user_regs, r10),
> +offsetof(struct cpu_user_regs, r11),
> +offsetof(struct cpu_user_regs, r12),
> +offsetof(struct cpu_user_regs, r13),
> +offsetof(struct cpu_user_regs, r14),
> +offsetof(struct cpu_user_regs, r15),
> +#endif
> +};
> +
>  void *
>  decode_register(
>  uint8_t modrm_reg, struct cpu_user_regs *regs, int highbyte_regs)
>  {
> -void *p;
> +static const uint8_t byteop_offsets[] = {

byte_reg_offsets[] ?

With that (or a suitable other name not using "op" when registers
are meant)
Reviewed-by: Jan Beulich 

Jan


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [PATCH v2 2/4] x86/emul: Optimise decode_register() somewhat

2018-01-30 Thread Andrew Cooper
The positions of GPRs inside struct cpu_user_regs doesn't follow any
particular order, so as compiled, decode_register() becomes a jump table to 16
blocks which calculate the appropriate offset, at a total of 207 bytes.

Instead, pre-compute the offsets at build time and use pointer arithmetic to
calculate the result.  By observation, most callers in x86_emulate() inline
and constant-propagate the highbyte_regs value of 0.

The splitting of the general and legacy byte-op cases means that we will now
hit an ASSERT if any code path tries to use the legacy byte-op encoding with a
REX prefix.

Signed-off-by: Andrew Cooper 
---
CC: Jan Beulich 

v2:
 * Move byteop_offsets[] into function scope.  Rearrange to have a smaller
   byteop_offsets[] array.
---
 xen/arch/x86/x86_emulate/x86_emulate.c | 75 --
 1 file changed, 53 insertions(+), 22 deletions(-)

diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c 
b/xen/arch/x86/x86_emulate/x86_emulate.c
index ff0a003..123d941 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1935,36 +1935,67 @@ load_seg(
 return rc;
 }
 
+/* Map GPRs by ModRM encoding to their offset within struct cpu_user_regs. */
+static const uint8_t cpu_user_regs_gpr_offsets[] = {
+offsetof(struct cpu_user_regs, r(ax)),
+offsetof(struct cpu_user_regs, r(cx)),
+offsetof(struct cpu_user_regs, r(dx)),
+offsetof(struct cpu_user_regs, r(bx)),
+offsetof(struct cpu_user_regs, r(sp)),
+offsetof(struct cpu_user_regs, r(bp)),
+offsetof(struct cpu_user_regs, r(si)),
+offsetof(struct cpu_user_regs, r(di)),
+#ifdef __x86_64__
+offsetof(struct cpu_user_regs, r8),
+offsetof(struct cpu_user_regs, r9),
+offsetof(struct cpu_user_regs, r10),
+offsetof(struct cpu_user_regs, r11),
+offsetof(struct cpu_user_regs, r12),
+offsetof(struct cpu_user_regs, r13),
+offsetof(struct cpu_user_regs, r14),
+offsetof(struct cpu_user_regs, r15),
+#endif
+};
+
 void *
 decode_register(
 uint8_t modrm_reg, struct cpu_user_regs *regs, int highbyte_regs)
 {
-void *p;
+static const uint8_t byteop_offsets[] = {
+offsetof(struct cpu_user_regs, al),
+offsetof(struct cpu_user_regs, cl),
+offsetof(struct cpu_user_regs, dl),
+offsetof(struct cpu_user_regs, bl),
+offsetof(struct cpu_user_regs, ah),
+offsetof(struct cpu_user_regs, ch),
+offsetof(struct cpu_user_regs, dh),
+offsetof(struct cpu_user_regs, bh),
+};
 
-switch ( modrm_reg )
+if ( !highbyte_regs )
 {
-case  0: p = >r(ax); break;
-case  1: p = >r(cx); break;
-case  2: p = >r(dx); break;
-case  3: p = >r(bx); break;
-case  4: p = (highbyte_regs ? >ah : (void *)>r(sp)); break;
-case  5: p = (highbyte_regs ? >ch : (void *)>r(bp)); break;
-case  6: p = (highbyte_regs ? >dh : (void *)>r(si)); break;
-case  7: p = (highbyte_regs ? >bh : (void *)>r(di)); break;
-#if defined(__x86_64__)
-case  8: p = >r8;  break;
-case  9: p = >r9;  break;
-case 10: p = >r10; break;
-case 11: p = >r11; break;
-case 12: p = >r12; break;
-case 13: p = >r13; break;
-case 14: p = >r14; break;
-case 15: p = >r15; break;
-#endif
-default: BUG(); p = NULL; break;
+/* Check that the array is a power of two. */
+BUILD_BUG_ON(ARRAY_SIZE(cpu_user_regs_gpr_offsets) &
+ (ARRAY_SIZE(cpu_user_regs_gpr_offsets) - 1));
+
+ASSERT(modrm_reg < ARRAY_SIZE(cpu_user_regs_gpr_offsets));
+
+/* For safety in release builds.  Debug builds will hit the ASSERT() */
+modrm_reg &= ARRAY_SIZE(cpu_user_regs_gpr_offsets) - 1;
+
+return (void *)regs + cpu_user_regs_gpr_offsets[modrm_reg];
 }
 
-return p;
+/* Check that the array is a power of two. */
+BUILD_BUG_ON(ARRAY_SIZE(byteop_offsets) &
+ (ARRAY_SIZE(byteop_offsets) - 1));
+
+ASSERT(modrm_reg < ARRAY_SIZE(byteop_offsets));
+
+/* For safety in release builds.  Debug builds will hit the ASSERT() */
+modrm_reg &= ARRAY_SIZE(byteop_offsets) - 1;
+
+return (void *)regs + byteop_offsets[modrm_reg];
 }
 
 static void *decode_vex_gpr(unsigned int vex_reg, struct cpu_user_regs *regs,
-- 
2.1.4


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel