add emulate_step() changes to support vsx vector paired storage
access instructions that provides octword operands loads/stores
between storage and set of 64 Vector Scalar Registers (VSRs).

Signed-off-by: Balamuruhan S <bal...@linux.ibm.com>
---
 arch/powerpc/include/asm/sstep.h |  2 +-
 arch/powerpc/lib/sstep.c         | 58 +++++++++++++++++++++++++++-----
 2 files changed, 50 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h
index 3b01c69a44aa..a6c0b299bcc9 100644
--- a/arch/powerpc/include/asm/sstep.h
+++ b/arch/powerpc/include/asm/sstep.h
@@ -126,7 +126,7 @@ union vsx_reg {
        unsigned long d[2];
        float   fp[4];
        double  dp[2];
-       __vector128 v;
+       __vector128 v[2];
 };
 
 /*
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index c92890e71ca7..74c730cae7d8 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -279,6 +279,19 @@ static nokprobe_inline void do_byte_reverse(void *ptr, int 
nb)
                up[1] = tmp;
                break;
        }
+       case 32: {
+               unsigned long *up = (unsigned long *)ptr;
+               unsigned long tmp;
+
+               tmp = byterev_8(up[0]);
+               up[0] = byterev_8(up[3]);
+               up[3] = tmp;
+               tmp = byterev_8(up[2]);
+               up[2] = byterev_8(up[1]);
+               up[1] = tmp;
+               break;
+       }
+
 #endif
        default:
                WARN_ON_ONCE(1);
@@ -709,6 +722,8 @@ void emulate_vsx_load(struct instruction_op *op, union 
vsx_reg *reg,
        reg->d[0] = reg->d[1] = 0;
 
        switch (op->element_size) {
+       case 32:
+               /* [p]lxvp[x] or [p]stxvp[x] */
        case 16:
                /* whole vector; lxv[x] or lxvl[l] */
                if (size == 0)
@@ -717,7 +732,7 @@ void emulate_vsx_load(struct instruction_op *op, union 
vsx_reg *reg,
                if (IS_LE && (op->vsx_flags & VSX_LDLEFT))
                        rev = !rev;
                if (rev)
-                       do_byte_reverse(reg, 16);
+                       do_byte_reverse(reg, size);
                break;
        case 8:
                /* scalar loads, lxvd2x, lxvdsx */
@@ -793,6 +808,22 @@ void emulate_vsx_store(struct instruction_op *op, const 
union vsx_reg *reg,
        size = GETSIZE(op->type);
 
        switch (op->element_size) {
+       case 32:
+               /* [p]lxvp[x] or [p]stxvp[x] */
+               if (size == 0)
+                       break;
+               if (IS_LE && (op->vsx_flags & VSX_LDLEFT))
+                       rev = !rev;
+               if (rev) {
+                       /* reverse 32 bytes */
+                       buf.d[0] = byterev_8(reg->d[3]);
+                       buf.d[1] = byterev_8(reg->d[2]);
+                       buf.d[2] = byterev_8(reg->d[1]);
+                       buf.d[3] = byterev_8(reg->d[0]);
+                       reg = &buf;
+               }
+               memcpy(mem, reg, size);
+               break;
        case 16:
                /* stxv, stxvx, stxvl, stxvll */
                if (size == 0)
@@ -861,28 +892,33 @@ static nokprobe_inline int do_vsx_load(struct 
instruction_op *op,
                                       bool cross_endian)
 {
        int reg = op->reg;
-       u8 mem[16];
+       int i, nr_vsx_regs;
+       u8 mem[32];
        union vsx_reg buf;
        int size = GETSIZE(op->type);
 
        if (!address_ok(regs, ea, size) || copy_mem_in(mem, ea, size, regs))
                return -EFAULT;
 
+       nr_vsx_regs = size / sizeof(__vector128);
        emulate_vsx_load(op, &buf, mem, cross_endian);
        preempt_disable();
        if (reg < 32) {
                /* FP regs + extensions */
                if (regs->msr & MSR_FP) {
-                       load_vsrn(reg, &buf);
+                       for (i = 0; i < nr_vsx_regs; i++)
+                               load_vsrn(reg + i, &buf.v[i]);
                } else {
                        current->thread.fp_state.fpr[reg][0] = buf.d[0];
                        current->thread.fp_state.fpr[reg][1] = buf.d[1];
                }
        } else {
                if (regs->msr & MSR_VEC)
-                       load_vsrn(reg, &buf);
+                       for (i = 0; i < nr_vsx_regs; i++)
+                               load_vsrn(reg + i, &buf.v[i]);
+
                else
-                       current->thread.vr_state.vr[reg - 32] = buf.v;
+                       current->thread.vr_state.vr[reg - 32] = buf.v[0];
        }
        preempt_enable();
        return 0;
@@ -893,27 +929,31 @@ static nokprobe_inline int do_vsx_store(struct 
instruction_op *op,
                                        bool cross_endian)
 {
        int reg = op->reg;
-       u8 mem[16];
+       int i, nr_vsx_regs;
+       u8 mem[32];
        union vsx_reg buf;
        int size = GETSIZE(op->type);
 
        if (!address_ok(regs, ea, size))
                return -EFAULT;
 
+       nr_vsx_regs = size / sizeof(__vector128);
        preempt_disable();
        if (reg < 32) {
                /* FP regs + extensions */
                if (regs->msr & MSR_FP) {
-                       store_vsrn(reg, &buf);
+                       for (i = 0; i < nr_vsx_regs; i++)
+                               store_vsrn(reg + i, &buf.v[i]);
                } else {
                        buf.d[0] = current->thread.fp_state.fpr[reg][0];
                        buf.d[1] = current->thread.fp_state.fpr[reg][1];
                }
        } else {
                if (regs->msr & MSR_VEC)
-                       store_vsrn(reg, &buf);
+                       for (i = 0; i < nr_vsx_regs; i++)
+                               store_vsrn(reg + i, &buf.v[i]);
                else
-                       buf.v = current->thread.vr_state.vr[reg - 32];
+                       buf.v[0] = current->thread.vr_state.vr[reg - 32];
        }
        preempt_enable();
        emulate_vsx_store(op, &buf, mem, cross_endian);
-- 
2.24.1

Reply via email to