MMIO emulation reads the last instruction executed by the guest
and then emulates. If the guest is running in Little Endian mode,
the instruction needs to be byte-swapped before being emulated.

This patch stores the last instruction in the endian order of the
host, primarily doing a byte-swap if needed. The common code
which fetches last_inst uses a helper routine kvmppc_need_byteswap().
and the exit paths for the Book3S PV and HR guests use their own
version in assembly.

kvmppc_emulate_instruction() also uses kvmppc_need_byteswap() to
define in which endian order the mmio needs to be done.

The patch is based on Alex Graf's kvm-ppc-queue branch and it
has been tested on Big Endian and Little Endian HV guests and
Big Endian PR guests.

Signed-off-by: Cédric Le Goater <c...@fr.ibm.com>
---

Here are some comments/questions : 

 * the host is assumed to be running in Big Endian. when Little Endian
   hosts are supported in the future, we will use the cpu features to
   fix kvmppc_need_byteswap()

 * the 'is_bigendian' parameter of the routines kvmppc_handle_load()
   and kvmppc_handle_store() seems redundant but the *BRX opcodes 
   make the improvements unclear. We could eventually rename the
   parameter to byteswap and the attribute vcpu->arch.mmio_is_bigendian
   to vcpu->arch.mmio_need_byteswap. Anyhow, the current naming sucks
   and I would happy to have some directions to fix it.



 arch/powerpc/include/asm/kvm_book3s.h   |   15 ++++++-
 arch/powerpc/kvm/book3s_64_mmu_hv.c     |    4 ++
 arch/powerpc/kvm/book3s_hv_rmhandlers.S |   14 +++++-
 arch/powerpc/kvm/book3s_segment.S       |   14 +++++-
 arch/powerpc/kvm/emulate.c              |   71 +++++++++++++++++--------------
 5 files changed, 83 insertions(+), 35 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
b/arch/powerpc/include/asm/kvm_book3s.h
index 0ec00f4..36c5573 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -270,14 +270,22 @@ static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu)
        return vcpu->arch.pc;
 }
 
+static inline bool kvmppc_need_byteswap(struct kvm_vcpu *vcpu)
+{
+       return vcpu->arch.shared->msr & MSR_LE;
+}
+
 static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu)
 {
        ulong pc = kvmppc_get_pc(vcpu);
 
        /* Load the instruction manually if it failed to do so in the
         * exit path */
-       if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED)
+       if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED) {
                kvmppc_ld(vcpu, &pc, sizeof(u32), &vcpu->arch.last_inst, false);
+               if (kvmppc_need_byteswap(vcpu))
+                       vcpu->arch.last_inst = swab32(vcpu->arch.last_inst);
+       }
 
        return vcpu->arch.last_inst;
 }
@@ -293,8 +301,11 @@ static inline u32 kvmppc_get_last_sc(struct kvm_vcpu *vcpu)
 
        /* Load the instruction manually if it failed to do so in the
         * exit path */
-       if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED)
+       if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED) {
                kvmppc_ld(vcpu, &pc, sizeof(u32), &vcpu->arch.last_inst, false);
+               if (kvmppc_need_byteswap(vcpu))
+                       vcpu->arch.last_inst = swab32(vcpu->arch.last_inst);
+       }
 
        return vcpu->arch.last_inst;
 }
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c 
b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 3a89b85..28130c7 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -547,6 +547,10 @@ static int kvmppc_hv_emulate_mmio(struct kvm_run *run, 
struct kvm_vcpu *vcpu,
                ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false);
                if (ret != EMULATE_DONE || last_inst == KVM_INST_FETCH_FAILED)
                        return RESUME_GUEST;
+
+               if (kvmppc_need_byteswap(vcpu))
+                       last_inst = swab32(last_inst);
+
                vcpu->arch.last_inst = last_inst;
        }
 
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S 
b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index dd80953..1d3ee40 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1393,14 +1393,26 @@ fast_interrupt_c_return:
        lwz     r8, 0(r10)
        mtmsrd  r3
 
+       ld      r0, VCPU_MSR(r9)
+
+       /* r10 = vcpu->arch.msr & MSR_LE */
+       rldicl  r10, r0, 0, 63
+       cmpdi   r10, 0
+       bne     2f
+
        /* Store the result */
        stw     r8, VCPU_LAST_INST(r9)
 
        /* Unset guest mode. */
-       li      r0, KVM_GUEST_MODE_NONE
+1:     li      r0, KVM_GUEST_MODE_NONE
        stb     r0, HSTATE_IN_GUEST(r13)
        b       guest_exit_cont
 
+       /* Swap and store the result */
+2:     addi    r11, r9, VCPU_LAST_INST
+       stwbrx  r8, 0, r11
+       b       1b
+
 /*
  * Similarly for an HISI, reflect it to the guest as an ISI unless
  * it is an HPTE not found fault for a page that we have paged out.
diff --git a/arch/powerpc/kvm/book3s_segment.S 
b/arch/powerpc/kvm/book3s_segment.S
index 1abe478..bf20b45 100644
--- a/arch/powerpc/kvm/book3s_segment.S
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -287,7 +287,19 @@ ld_last_inst:
        sync
 
 #endif
-       stw     r0, SVCPU_LAST_INST(r13)
+       ld      r8, SVCPU_SHADOW_SRR1(r13)
+
+       /* r10 = vcpu->arch.msr & MSR_LE */
+       rldicl  r10, r0, 0, 63
+       cmpdi   r10, 0
+       beq     1f
+
+       /* swap and store the result */
+       addi    r11, r13, SVCPU_LAST_INST
+       stwbrx  r0, 0, r11
+       b       no_ld_last_inst
+
+1:     stw     r0, SVCPU_LAST_INST(r13)
 
 no_ld_last_inst:
 
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 751cd45..20529ca 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -232,6 +232,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct 
kvm_vcpu *vcpu)
        int sprn = get_sprn(inst);
        enum emulation_result emulated = EMULATE_DONE;
        int advance = 1;
+       int dont_byteswap = !kvmppc_need_byteswap(vcpu);
 
        /* this default type might be overwritten by subcategories */
        kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS);
@@ -266,47 +267,53 @@ int kvmppc_emulate_instruction(struct kvm_run *run, 
struct kvm_vcpu *vcpu)
                        advance = 0;
                        break;
                case OP_31_XOP_LWZX:
-                       emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
+                       emulated = kvmppc_handle_load(run, vcpu, rt, 4,
+                                                     dont_byteswap);
                        break;
 
                case OP_31_XOP_LBZX:
-                       emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
+                       emulated = kvmppc_handle_load(run, vcpu, rt, 1,
+                                                     dont_byteswap);
                        break;
 
                case OP_31_XOP_LBZUX:
-                       emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
+                       emulated = kvmppc_handle_load(run, vcpu, rt, 1,
+                                                     dont_byteswap);
                        kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
                        break;
 
                case OP_31_XOP_STWX:
                        emulated = kvmppc_handle_store(run, vcpu,
                                                       kvmppc_get_gpr(vcpu, rs),
-                                                      4, 1);
+                                                      4, dont_byteswap);
                        break;
 
                case OP_31_XOP_STBX:
                        emulated = kvmppc_handle_store(run, vcpu,
                                                       kvmppc_get_gpr(vcpu, rs),
-                                                      1, 1);
+                                                      1, dont_byteswap);
                        break;
 
                case OP_31_XOP_STBUX:
                        emulated = kvmppc_handle_store(run, vcpu,
                                                       kvmppc_get_gpr(vcpu, rs),
-                                                      1, 1);
+                                                      1, dont_byteswap);
                        kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
                        break;
 
                case OP_31_XOP_LHAX:
-                       emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
+                       emulated = kvmppc_handle_loads(run, vcpu, rt, 2,
+                                                      dont_byteswap);
                        break;
 
                case OP_31_XOP_LHZX:
-                       emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
+                       emulated = kvmppc_handle_load(run, vcpu, rt, 2,
+                                                     dont_byteswap);
                        break;
 
                case OP_31_XOP_LHZUX:
-                       emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
+                       emulated = kvmppc_handle_load(run, vcpu, rt, 2,
+                                                     dont_byteswap);
                        kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
                        break;
 
@@ -317,13 +324,13 @@ int kvmppc_emulate_instruction(struct kvm_run *run, 
struct kvm_vcpu *vcpu)
                case OP_31_XOP_STHX:
                        emulated = kvmppc_handle_store(run, vcpu,
                                                       kvmppc_get_gpr(vcpu, rs),
-                                                      2, 1);
+                                                      2, dont_byteswap);
                        break;
 
                case OP_31_XOP_STHUX:
                        emulated = kvmppc_handle_store(run, vcpu,
                                                       kvmppc_get_gpr(vcpu, rs),
-                                                      2, 1);
+                                                      2, dont_byteswap);
                        kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
                        break;
 
@@ -342,7 +349,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct 
kvm_vcpu *vcpu)
                        break;
 
                case OP_31_XOP_LWBRX:
-                       emulated = kvmppc_handle_load(run, vcpu, rt, 4, 0);
+                       emulated = kvmppc_handle_load(run, vcpu, rt, 4,
+                                                     !dont_byteswap);
                        break;
 
                case OP_31_XOP_TLBSYNC:
@@ -351,17 +359,18 @@ int kvmppc_emulate_instruction(struct kvm_run *run, 
struct kvm_vcpu *vcpu)
                case OP_31_XOP_STWBRX:
                        emulated = kvmppc_handle_store(run, vcpu,
                                                       kvmppc_get_gpr(vcpu, rs),
-                                                      4, 0);
+                                                      4, !dont_byteswap);
                        break;
 
                case OP_31_XOP_LHBRX:
-                       emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0);
+                       emulated = kvmppc_handle_load(run, vcpu, rt, 2,
+                                                     !dont_byteswap);
                        break;
 
                case OP_31_XOP_STHBRX:
                        emulated = kvmppc_handle_store(run, vcpu,
                                                       kvmppc_get_gpr(vcpu, rs),
-                                                      2, 0);
+                                                      2, !dont_byteswap);
                        break;
 
                default:
@@ -371,33 +380,33 @@ int kvmppc_emulate_instruction(struct kvm_run *run, 
struct kvm_vcpu *vcpu)
                break;
 
        case OP_LWZ:
-               emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
+               emulated = kvmppc_handle_load(run, vcpu, rt, 4, dont_byteswap);
                break;
 
        /* TBD: Add support for other 64 bit load variants like ldu, ldux, ldx 
etc. */
        case OP_LD:
                rt = get_rt(inst);
-               emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1);
+               emulated = kvmppc_handle_load(run, vcpu, rt, 8, dont_byteswap);
                break;
 
        case OP_LWZU:
-               emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
+               emulated = kvmppc_handle_load(run, vcpu, rt, 4, dont_byteswap);
                kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
                break;
 
        case OP_LBZ:
-               emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
+               emulated = kvmppc_handle_load(run, vcpu, rt, 1, dont_byteswap);
                break;
 
        case OP_LBZU:
-               emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
+               emulated = kvmppc_handle_load(run, vcpu, rt, 1, dont_byteswap);
                kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
                break;
 
        case OP_STW:
                emulated = kvmppc_handle_store(run, vcpu,
                                               kvmppc_get_gpr(vcpu, rs),
-                                              4, 1);
+                                              4, dont_byteswap);
                break;
 
        /* TBD: Add support for other 64 bit store variants like stdu, stdux, 
stdx etc. */
@@ -405,57 +414,57 @@ int kvmppc_emulate_instruction(struct kvm_run *run, 
struct kvm_vcpu *vcpu)
                rs = get_rs(inst);
                emulated = kvmppc_handle_store(run, vcpu,
                                               kvmppc_get_gpr(vcpu, rs),
-                                              8, 1);
+                                              8, dont_byteswap);
                break;
 
        case OP_STWU:
                emulated = kvmppc_handle_store(run, vcpu,
                                               kvmppc_get_gpr(vcpu, rs),
-                                              4, 1);
+                                              4, dont_byteswap);
                kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
                break;
 
        case OP_STB:
                emulated = kvmppc_handle_store(run, vcpu,
                                               kvmppc_get_gpr(vcpu, rs),
-                                              1, 1);
+                                              1, dont_byteswap);
                break;
 
        case OP_STBU:
                emulated = kvmppc_handle_store(run, vcpu,
                                               kvmppc_get_gpr(vcpu, rs),
-                                              1, 1);
+                                              1, dont_byteswap);
                kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
                break;
 
        case OP_LHZ:
-               emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
+               emulated = kvmppc_handle_load(run, vcpu, rt, 2, dont_byteswap);
                break;
 
        case OP_LHZU:
-               emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
+               emulated = kvmppc_handle_load(run, vcpu, rt, 2, dont_byteswap);
                kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
                break;
 
        case OP_LHA:
-               emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
+               emulated = kvmppc_handle_loads(run, vcpu, rt, 2, dont_byteswap);
                break;
 
        case OP_LHAU:
-               emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
+               emulated = kvmppc_handle_loads(run, vcpu, rt, 2, dont_byteswap);
                kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
                break;
 
        case OP_STH:
                emulated = kvmppc_handle_store(run, vcpu,
                                               kvmppc_get_gpr(vcpu, rs),
-                                              2, 1);
+                                              2, dont_byteswap);
                break;
 
        case OP_STHU:
                emulated = kvmppc_handle_store(run, vcpu,
                                               kvmppc_get_gpr(vcpu, rs),
-                                              2, 1);
+                                              2, dont_byteswap);
                kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
                break;
 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to