Re: [RFC PATCH 1/4] KVM: emulate: speed up do_insn_fetch

2014-05-07 Thread Paolo Bonzini

Il 07/05/2014 04:30, Bandan Das ha scritto:

> +  if (unlikely(ctxt->_eip == fc->end)) {

Is this really going to be unlikely ?



Yes, it happens at most once per instruction and only for instructions 
that cross pages.


Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH 1/4] KVM: emulate: speed up do_insn_fetch

2014-05-06 Thread Bandan Das
Ok! Now that you posted your changes, I am getting to understand this
a little :)
 
Paolo Bonzini  writes:

> Hoist the common case up from do_insn_fetch_byte to do_insn_fetch,
> and prime the fetch_cache in x86_decode_insn.  This helps both the
> compiler and the branch predictor.
>
> Signed-off-by: Paolo Bonzini 
> ---
>  arch/x86/kvm/emulate.c | 67 
> +++---
>  1 file changed, 36 insertions(+), 31 deletions(-)
>
> diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
> index 42820f5fdd04..c7b625bf0b5d 100644
> --- a/arch/x86/kvm/emulate.c
> +++ b/arch/x86/kvm/emulate.c
> @@ -703,51 +703,51 @@ static int segmented_read_std(struct x86_emulate_ctxt 
> *ctxt,
>  }
>  
>  /*
> - * Fetch the next byte of the instruction being emulated which is pointed to
> - * by ctxt->_eip, then increment ctxt->_eip.
> - *
> - * Also prefetch the remaining bytes of the instruction without crossing page
> + * Prefetch the remaining bytes of the instruction without crossing page
>   * boundary if they are not in fetch_cache yet.
>   */
> -static int do_insn_fetch_byte(struct x86_emulate_ctxt *ctxt, u8 *dest)
> +static int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt)
>  {
>   struct fetch_cache *fc = &ctxt->fetch;
>   int rc;
>   int size, cur_size;
> -
> - if (ctxt->_eip == fc->end) {
> - unsigned long linear;
> - struct segmented_address addr = { .seg = VCPU_SREG_CS,
> -   .ea  = ctxt->_eip };
> - cur_size = fc->end - fc->start;
> - size = min(15UL - cur_size,
> -PAGE_SIZE - offset_in_page(ctxt->_eip));
> - rc = __linearize(ctxt, addr, size, false, true, &linear);
> - if (unlikely(rc != X86EMUL_CONTINUE))
> - return rc;
> - rc = ctxt->ops->fetch(ctxt, linear, fc->data + cur_size,
> -   size, &ctxt->exception);
> - if (unlikely(rc != X86EMUL_CONTINUE))
> - return rc;
> - fc->end += size;
> - }
> - *dest = fc->data[ctxt->_eip - fc->start];
> - ctxt->_eip++;
> + unsigned long linear;
> +
> + struct segmented_address addr = { .seg = VCPU_SREG_CS,
> +   .ea  = fc->end };
> + cur_size = fc->end - fc->start;
> + size = min(15UL - cur_size,
> +PAGE_SIZE - offset_in_page(fc->end));
> + if (unlikely(size == 0))
> + return X86EMUL_UNHANDLEABLE;
> + rc = __linearize(ctxt, addr, size, false, true, &linear);
> + if (unlikely(rc != X86EMUL_CONTINUE))
> + return rc;
> + rc = ctxt->ops->fetch(ctxt, linear, fc->data + cur_size,
> +   size, &ctxt->exception);
> + if (unlikely(rc != X86EMUL_CONTINUE))
> + return rc;
> + fc->end += size;
>   return X86EMUL_CONTINUE;
>  }
>  
>  static int do_insn_fetch(struct x86_emulate_ctxt *ctxt,
> -  void *dest, unsigned size)
> +  void *__dest, unsigned size)
>  {
>   int rc;
> + struct fetch_cache *fc = &ctxt->fetch;
> + u8 *dest = __dest;
> + u8 *src = &fc->data[ctxt->_eip - fc->start];
>  
> - /* x86 instructions are limited to 15 bytes. */
> - if (unlikely(ctxt->_eip + size - ctxt->eip > 15))
> - return X86EMUL_UNHANDLEABLE;
>   while (size--) {
> - rc = do_insn_fetch_byte(ctxt, dest++);
> - if (rc != X86EMUL_CONTINUE)
> - return rc;
> + if (unlikely(ctxt->_eip == fc->end)) {

Is this really going to be unlikely ?

> + rc = do_insn_fetch_bytes(ctxt);
> + if (rc != X86EMUL_CONTINUE)
> + return rc;
> + }
> + *dest++ = *src++;
> + ctxt->_eip++;
> + continue;
>   }
>   return X86EMUL_CONTINUE;
>  }
> @@ -4272,6 +4272,11 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, 
> void *insn, int insn_len)
>   ctxt->opcode_len = 1;
>   if (insn_len > 0)
>   memcpy(ctxt->fetch.data, insn, insn_len);
> + else {
> + rc = do_insn_fetch_bytes(ctxt);
> + if (rc != X86EMUL_CONTINUE)
> + return rc;
> + }
>  
>   switch (mode) {
>   case X86EMUL_MODE_REAL:
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC PATCH 1/4] KVM: emulate: speed up do_insn_fetch

2014-05-06 Thread Paolo Bonzini
Hoist the common case up from do_insn_fetch_byte to do_insn_fetch,
and prime the fetch_cache in x86_decode_insn.  This helps both the
compiler and the branch predictor.

Signed-off-by: Paolo Bonzini 
---
 arch/x86/kvm/emulate.c | 67 +++---
 1 file changed, 36 insertions(+), 31 deletions(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 42820f5fdd04..c7b625bf0b5d 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -703,51 +703,51 @@ static int segmented_read_std(struct x86_emulate_ctxt 
*ctxt,
 }
 
 /*
- * Fetch the next byte of the instruction being emulated which is pointed to
- * by ctxt->_eip, then increment ctxt->_eip.
- *
- * Also prefetch the remaining bytes of the instruction without crossing page
+ * Prefetch the remaining bytes of the instruction without crossing page
  * boundary if they are not in fetch_cache yet.
  */
-static int do_insn_fetch_byte(struct x86_emulate_ctxt *ctxt, u8 *dest)
+static int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt)
 {
struct fetch_cache *fc = &ctxt->fetch;
int rc;
int size, cur_size;
-
-   if (ctxt->_eip == fc->end) {
-   unsigned long linear;
-   struct segmented_address addr = { .seg = VCPU_SREG_CS,
- .ea  = ctxt->_eip };
-   cur_size = fc->end - fc->start;
-   size = min(15UL - cur_size,
-  PAGE_SIZE - offset_in_page(ctxt->_eip));
-   rc = __linearize(ctxt, addr, size, false, true, &linear);
-   if (unlikely(rc != X86EMUL_CONTINUE))
-   return rc;
-   rc = ctxt->ops->fetch(ctxt, linear, fc->data + cur_size,
- size, &ctxt->exception);
-   if (unlikely(rc != X86EMUL_CONTINUE))
-   return rc;
-   fc->end += size;
-   }
-   *dest = fc->data[ctxt->_eip - fc->start];
-   ctxt->_eip++;
+   unsigned long linear;
+
+   struct segmented_address addr = { .seg = VCPU_SREG_CS,
+ .ea  = fc->end };
+   cur_size = fc->end - fc->start;
+   size = min(15UL - cur_size,
+  PAGE_SIZE - offset_in_page(fc->end));
+   if (unlikely(size == 0))
+   return X86EMUL_UNHANDLEABLE;
+   rc = __linearize(ctxt, addr, size, false, true, &linear);
+   if (unlikely(rc != X86EMUL_CONTINUE))
+   return rc;
+   rc = ctxt->ops->fetch(ctxt, linear, fc->data + cur_size,
+ size, &ctxt->exception);
+   if (unlikely(rc != X86EMUL_CONTINUE))
+   return rc;
+   fc->end += size;
return X86EMUL_CONTINUE;
 }
 
 static int do_insn_fetch(struct x86_emulate_ctxt *ctxt,
-void *dest, unsigned size)
+void *__dest, unsigned size)
 {
int rc;
+   struct fetch_cache *fc = &ctxt->fetch;
+   u8 *dest = __dest;
+   u8 *src = &fc->data[ctxt->_eip - fc->start];
 
-   /* x86 instructions are limited to 15 bytes. */
-   if (unlikely(ctxt->_eip + size - ctxt->eip > 15))
-   return X86EMUL_UNHANDLEABLE;
while (size--) {
-   rc = do_insn_fetch_byte(ctxt, dest++);
-   if (rc != X86EMUL_CONTINUE)
-   return rc;
+   if (unlikely(ctxt->_eip == fc->end)) {
+   rc = do_insn_fetch_bytes(ctxt);
+   if (rc != X86EMUL_CONTINUE)
+   return rc;
+   }
+   *dest++ = *src++;
+   ctxt->_eip++;
+   continue;
}
return X86EMUL_CONTINUE;
 }
@@ -4272,6 +4272,11 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void 
*insn, int insn_len)
ctxt->opcode_len = 1;
if (insn_len > 0)
memcpy(ctxt->fetch.data, insn, insn_len);
+   else {
+   rc = do_insn_fetch_bytes(ctxt);
+   if (rc != X86EMUL_CONTINUE)
+   return rc;
+   }
 
switch (mode) {
case X86EMUL_MODE_REAL:
-- 
1.8.3.1


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html