add emulate_step() changes to support vsx vector paired storage access instructions that provides octword operands loads/stores between storage and set of 64 Vector Scalar Registers (VSRs).
Signed-off-by: Balamuruhan S <bal...@linux.ibm.com> --- arch/powerpc/include/asm/sstep.h | 2 +- arch/powerpc/lib/sstep.c | 58 +++++++++++++++++++++++++++----- 2 files changed, 50 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h index 3b01c69a44aa..a6c0b299bcc9 100644 --- a/arch/powerpc/include/asm/sstep.h +++ b/arch/powerpc/include/asm/sstep.h @@ -126,7 +126,7 @@ union vsx_reg { unsigned long d[2]; float fp[4]; double dp[2]; - __vector128 v; + __vector128 v[2]; }; /* diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index c92890e71ca7..74c730cae7d8 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -279,6 +279,19 @@ static nokprobe_inline void do_byte_reverse(void *ptr, int nb) up[1] = tmp; break; } + case 32: { + unsigned long *up = (unsigned long *)ptr; + unsigned long tmp; + + tmp = byterev_8(up[0]); + up[0] = byterev_8(up[3]); + up[3] = tmp; + tmp = byterev_8(up[2]); + up[2] = byterev_8(up[1]); + up[1] = tmp; + break; + } + #endif default: WARN_ON_ONCE(1); @@ -709,6 +722,8 @@ void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, reg->d[0] = reg->d[1] = 0; switch (op->element_size) { + case 32: + /* [p]lxvp[x] or [p]stxvp[x] */ case 16: /* whole vector; lxv[x] or lxvl[l] */ if (size == 0) @@ -717,7 +732,7 @@ void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, if (IS_LE && (op->vsx_flags & VSX_LDLEFT)) rev = !rev; if (rev) - do_byte_reverse(reg, 16); + do_byte_reverse(reg, size); break; case 8: /* scalar loads, lxvd2x, lxvdsx */ @@ -793,6 +808,22 @@ void emulate_vsx_store(struct instruction_op *op, const union vsx_reg *reg, size = GETSIZE(op->type); switch (op->element_size) { + case 32: + /* [p]lxvp[x] or [p]stxvp[x] */ + if (size == 0) + break; + if (IS_LE && (op->vsx_flags & VSX_LDLEFT)) + rev = !rev; + if (rev) { + /* reverse 32 bytes */ + buf.d[0] = byterev_8(reg->d[3]); + buf.d[1] = byterev_8(reg->d[2]); + buf.d[2] = byterev_8(reg->d[1]); + buf.d[3] = byterev_8(reg->d[0]); + reg = &buf; + } + memcpy(mem, reg, size); + break; case 16: /* stxv, stxvx, stxvl, stxvll */ if (size == 0) @@ -861,28 +892,33 @@ static nokprobe_inline int do_vsx_load(struct instruction_op *op, bool cross_endian) { int reg = op->reg; - u8 mem[16]; + int i, nr_vsx_regs; + u8 mem[32]; union vsx_reg buf; int size = GETSIZE(op->type); if (!address_ok(regs, ea, size) || copy_mem_in(mem, ea, size, regs)) return -EFAULT; + nr_vsx_regs = size / sizeof(__vector128); emulate_vsx_load(op, &buf, mem, cross_endian); preempt_disable(); if (reg < 32) { /* FP regs + extensions */ if (regs->msr & MSR_FP) { - load_vsrn(reg, &buf); + for (i = 0; i < nr_vsx_regs; i++) + load_vsrn(reg + i, &buf.v[i]); } else { current->thread.fp_state.fpr[reg][0] = buf.d[0]; current->thread.fp_state.fpr[reg][1] = buf.d[1]; } } else { if (regs->msr & MSR_VEC) - load_vsrn(reg, &buf); + for (i = 0; i < nr_vsx_regs; i++) + load_vsrn(reg + i, &buf.v[i]); + else - current->thread.vr_state.vr[reg - 32] = buf.v; + current->thread.vr_state.vr[reg - 32] = buf.v[0]; } preempt_enable(); return 0; @@ -893,27 +929,31 @@ static nokprobe_inline int do_vsx_store(struct instruction_op *op, bool cross_endian) { int reg = op->reg; - u8 mem[16]; + int i, nr_vsx_regs; + u8 mem[32]; union vsx_reg buf; int size = GETSIZE(op->type); if (!address_ok(regs, ea, size)) return -EFAULT; + nr_vsx_regs = size / sizeof(__vector128); preempt_disable(); if (reg < 32) { /* FP regs + extensions */ if (regs->msr & MSR_FP) { - store_vsrn(reg, &buf); + for (i = 0; i < nr_vsx_regs; i++) + store_vsrn(reg + i, &buf.v[i]); } else { buf.d[0] = current->thread.fp_state.fpr[reg][0]; buf.d[1] = current->thread.fp_state.fpr[reg][1]; } } else { if (regs->msr & MSR_VEC) - store_vsrn(reg, &buf); + for (i = 0; i < nr_vsx_regs; i++) + store_vsrn(reg + i, &buf.v[i]); else - buf.v = current->thread.vr_state.vr[reg - 32]; + buf.v[0] = current->thread.vr_state.vr[reg - 32]; } preempt_enable(); emulate_vsx_store(op, &buf, mem, cross_endian); -- 2.24.1