Re: [PATCH] target/ppc: Fix regression in Radix MMU

2022-10-28 Thread Víctor Colombo

On 28/10/2022 15:36, Leandro Lupori wrote:

Commit 47e83d9107 ended up unintentionally changing the control flow
of ppc_radix64_process_scoped_xlate(). When guest_visible is false,
it must not raise an exception, even if the radix configuration is
not valid.

This regression prevented Linux boot in a nested environment with
L1 using TCG and emulating KVM (cap-nested-hv=on) and L2 using
KVM. L2 would hang on Linux's futex_init(), when it tested how a
futex_atomic_cmpxchg_inatomic() handled a fault, because L1 would
start a loop of trying to perform partition scoped translations
and raising exceptions.

Fixes: 47e83d9107 ("target/ppc: Improve Radix xlate level validation")
Reported-by: Victor Colombo 
Signed-off-by: Leandro Lupori 


It now reaches the login screen on L2

Tested-by: Víctor Colombo 

--
Víctor Cora Colombo
Instituto de Pesquisas ELDORADO
Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>



[RFC PATCH v2 5/5] target/ppc: Enable hardfpu for Power

2022-10-26 Thread Víctor Colombo
Change the build conditional from softfloat.c, allowing TARGET_PPC
to use hardfpu. For PPC, this is only implemented in linux-user.

Signed-off-by: Víctor Colombo 
---
 fpu/softfloat.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index c7454c3eb1..f395096275 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -220,11 +220,13 @@ GEN_INPUT_FLUSH3(float64_input_flush3, float64)
  * the use of hardfloat, since hardfloat relies on the inexact flag being
  * already set.
  */
-#if defined(TARGET_PPC) || defined(__FAST_MATH__)
-# if defined(__FAST_MATH__)
-#  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact 
\
+#if defined(__FAST_MATH__)
+# warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
 IEEE implementation
-# endif
+# define QEMU_NO_HARDFLOAT 1
+# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
+#elif defined(TARGET_PPC) && (!defined(CONFIG_USER_ONLY) || 
!defined(CONFIG_LINUX_USER))
+/* In PPC hardfloat only works for linux-user */
 # define QEMU_NO_HARDFLOAT 1
 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
 #else
-- 
2.25.1




[RFC PATCH v2 1/5] target/ppc: prepare instructions to work with caching last FP insn

2022-10-26 Thread Víctor Colombo
When enabling hardfpu for Power and adding the instruction caching
feature, it will be necessary to uncache when the instruction
is garanteed to be executed in softfloat. If the cache is not cleared
in this situation, it could lead to a previous instruction being
reexecuted and yield a different result than when only softfloat
was present.

This patch introduces the base code to allow for the implementation of
FP instructions caching, while also adding calls to a macro that clears
the cached instruction for every one that has not been 'migrated' to
hardfpu-compliance yet.

In the future, it will be necessary to implement the necessary code
for each FP instruction that wants to use hardfpu.

This implementation only works in linux-user. No test or effort
was done in this patch to make it work for softmmu. Future work
will be required to make it work correctly in this scenario.

Signed-off-by: Víctor Colombo 
---
 target/ppc/cpu.h   |  6 +++
 target/ppc/excp_helper.c   |  2 +
 target/ppc/fpu_helper.c| 71 ++
 target/ppc/helper.h|  1 +
 target/ppc/translate/fp-impl.c.inc |  1 +
 5 files changed, 81 insertions(+)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index cca6c4e51c..116ee639ff 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -1080,6 +1080,10 @@ struct ppc_radix_page_info {
 #define PPC_CPU_OPCODES_LEN  0x40
 #define PPC_CPU_INDIRECT_OPCODES_LEN 0x20
 
+enum {
+CACHED_FN_TYPE_NONE,
+};
+
 struct CPUArchState {
 /* Most commonly used resources during translated code execution first */
 target_ulong gpr[32];  /* general purpose registers */
@@ -1157,6 +1161,8 @@ struct CPUArchState {
 float_status fp_status; /* Floating point execution context */
 target_ulong fpscr; /* Floating point status and control register */
 
+int cached_fn_type;
+
 /* Internal devices resources */
 ppc_tb_t *tb_env;  /* Time base and decrementer */
 ppc_dcr_t *dcr_env;/* Device control registers */
diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index 43f2480e94..6de8c369b8 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -1910,6 +1910,8 @@ void raise_exception_err_ra(CPUPPCState *env, uint32_t 
exception,
 {
 CPUState *cs = env_cpu(env);
 
+helper_execute_fp_cached(env);
+
 cs->exception_index = exception;
 env->error_code = error_code;
 cpu_loop_exit_restore(cs, raddr);
diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index ae25f32d6e..34b242c025 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c
@@ -23,6 +23,17 @@
 #include "internal.h"
 #include "fpu/softfloat.h"
 
+#if defined(CONFIG_USER_ONLY) && defined(CONFIG_LINUX_USER)
+#define CACHE_FN_NONE(env)\
+do {  \
+assert(!(env->fp_status.float_exception_flags &   \
+ float_flag_inexact));\
+env->cached_fn_type = CACHED_FN_TYPE_NONE;\
+} while (0)
+#else
+#define CACHE_FN_NONE(env)
+#endif
+
 static inline float128 float128_snan_to_qnan(float128 x)
 {
 float128 r;
@@ -514,6 +525,24 @@ void helper_reset_fpstatus(CPUPPCState *env)
 set_float_exception_flags(0, >fp_status);
 }
 
+void helper_execute_fp_cached(CPUPPCState *env)
+{
+#if defined(CONFIG_USER_ONLY) && defined(CONFIG_LINUX_USER)
+switch (env->cached_fn_type) {
+case CACHED_FN_TYPE_NONE:
+/*
+ * the last fp instruction was executed in softfloat
+ * so no need to execute it again
+ */
+break;
+default:
+g_assert_not_reached();
+}
+
+env->cached_fn_type = CACHED_FN_TYPE_NONE;
+#endif
+}
+
 static void float_invalid_op_addsub(CPUPPCState *env, int flags,
 bool set_fpcc, uintptr_t retaddr)
 {
@@ -527,6 +556,7 @@ static void float_invalid_op_addsub(CPUPPCState *env, int 
flags,
 /* fadd - fadd. */
 float64 helper_fadd(CPUPPCState *env, float64 arg1, float64 arg2)
 {
+CACHE_FN_NONE(env);
 float64 ret = float64_add(arg1, arg2, >fp_status);
 int flags = get_float_exception_flags(>fp_status);
 
@@ -540,6 +570,7 @@ float64 helper_fadd(CPUPPCState *env, float64 arg1, float64 
arg2)
 /* fadds - fadds. */
 float64 helper_fadds(CPUPPCState *env, float64 arg1, float64 arg2)
 {
+CACHE_FN_NONE(env);
 float64 ret = float64r32_add(arg1, arg2, >fp_status);
 int flags = get_float_exception_flags(>fp_status);
 
@@ -552,6 +583,7 @@ float64 helper_fadds(CPUPPCState *env, float64 arg1, 
float64 arg2)
 /* fsub - fsub. */
 float64 helper_fsub(CPUPPCState *env, float64 arg1, float64 arg2)
 {
+CACHE_FN_NONE(env);
 float64 ret = float64_sub(arg1, arg2, >fp_

[RFC PATCH v2 3/5] target/ppc: Implement instruction caching for muladd

2022-10-26 Thread Víctor Colombo
This patch adds the code necessary to cache muladd instructions
for usage with hardfpu in Power.

muladd is an instruction that receives four arguments, three f64 and
one status, and returns f64. This info will be cached inside the
union in env, which grows when other instructions with other
signatures are added.

Hardfpu in QEMU only works when the inexact is already set. So,
CACHE_FN_5 will check if FP_XX is set, and set float_flag_inexact
to enable the hardfpu behavior. When the instruction is later
reexecuted, it will be with float_flag_inexact cleared, forcing
softfloat and correctly updating the relevant flags, as is today.

This implementation only works in linux-user. No test or effort
was done in this patch to make it work for softmmu. Future work
will be required to make it work correctly in this scenario.

Signed-off-by: Víctor Colombo 
---
 target/ppc/cpu.h| 11 +++
 target/ppc/fpu_helper.c | 35 +--
 2 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index e55c10b0db..f6803bf37b 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -1083,6 +1083,7 @@ struct ppc_radix_page_info {
 enum {
 CACHED_FN_TYPE_NONE,
 CACHED_FN_TYPE_F64_F64_FSTATUS,
+CACHED_FN_TYPE_F64_F64_F64_F64_I_FSTATUS,
 
 };
 
@@ -1092,6 +1093,15 @@ struct cached_fn_f64_f64_fstatus {
 float_status arg2;
 };
 
+struct cached_fn_f64_f64_f64_f64_i_fstatus {
+float64 (*fn)(float64, float64, float64, int, float_status*);
+float64 arg1;
+float64 arg2;
+float64 arg3;
+int arg4;
+float_status arg5;
+};
+
 struct CPUArchState {
 /* Most commonly used resources during translated code execution first */
 target_ulong gpr[32];  /* general purpose registers */
@@ -1172,6 +1182,7 @@ struct CPUArchState {
 int cached_fn_type;
 union {
 struct cached_fn_f64_f64_fstatus f64_f64_fstatus;
+struct cached_fn_f64_f64_f64_f64_i_fstatus f64_f64_f64_f64_i_fstatus;
 } cached_fn;
 
 /* Internal devices resources */
diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index 1756719664..a152c018b2 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c
@@ -45,9 +45,27 @@
 env->cached_fn_type = CACHED_FN_TYPE_NONE;\
 } \
 } while (0)
+
+#define CACHE_FN_5(env, FN, ARG1, ARG2, ARG3, ARG4, FIELD, TYPE)  \
+do {  \
+if (env->fpscr & FP_XX) { \
+env->cached_fn_type = TYPE;   \
+env->cached_fn.FIELD.fn = FN; \
+env->cached_fn.FIELD.arg1 = ARG1; \
+env->cached_fn.FIELD.arg2 = ARG2; \
+env->cached_fn.FIELD.arg3 = ARG3; \
+env->cached_fn.FIELD.arg4 = ARG4; \
+env->fp_status.float_exception_flags |= float_flag_inexact;   \
+} else {  \
+assert(!(env->fp_status.float_exception_flags &   \
+ float_flag_inexact));\
+env->cached_fn_type = CACHED_FN_TYPE_NONE;\
+} \
+} while (0)
 #else
 #define CACHE_FN_NONE(env)
 #define CACHE_FN_3(env, FN, ARG1, ARG2, FIELD, TYPE)
+#define CACHE_FN_5(env, FN, ARG1, ARG2, ARG3, ARG4, FIELD, TYPE)
 #endif
 
 static inline float128 float128_snan_to_qnan(float128 x)
@@ -572,6 +590,17 @@ void helper_execute_fp_cached(CPUPPCState *env)
 env->fpscr |= FP_FI | FP_XX;
 }
 break;
+case CACHED_FN_TYPE_F64_F64_F64_F64_I_FSTATUS:
+; /* hack to allow declaration below */
+struct cached_fn_f64_f64_f64_f64_i_fstatus args =
+env->cached_fn.f64_f64_f64_f64_i_fstatus;
+assert(!(args.arg5.float_exception_flags & float_flag_inexact));
+args.fn(args.arg1, args.arg2, args.arg3, args.arg4, );
+env->fpscr &= ~FP_FI;
+if (args.arg5.float_exception_flags & float_flag_inexact) {
+env->fpscr |= FP_FI | FP_XX;
+}
+break;
 default:
 g_assert_not_reached();
 }
@@ -843,7 +872,8 @@ static void float_invalid_op_madd(CPUPPCState *env, int 
flags,
 static float64 do_fmadd(CPUPPCState *env, float64 a, float64 b,
  float64 c, int madd_flags, uintptr_t retaddr)
 {
-CACHE_FN_NONE(env);
+CACHE_FN_5(env, float64_muladd, a, b, c, madd_flags,
+f6

[RFC PATCH v2 4/5] target/ppc: Implement instruction caching for add/sub/mul/div

2022-10-26 Thread Víctor Colombo
This patch adds the code necessary to cache add/sub/mul/div
instructions for usage with hardfpu in Power.

These instructions receives three arguments, two f64 and
one status, and returns f64. This info will be cached inside the
union in env, which grows when other instructions with other
signatures are added.

Hardfpu in QEMU only works when the inexact is already set. So,
CACHE_FN_4 will check if FP_XX is set, and set float_flag_inexact
to enable the hardfpu behavior. When the instruction is later
reexecuted, it will be with float_flag_inexact cleared, forcing
softfloat and correctly updating the relevant flags, as is today.

This implementation only works in linux-user. No test or effort
was done in this patch to make it work for softmmu. Future work
will be required to make it work correctly in this scenario.

Signed-off-by: Víctor Colombo 
---
 target/ppc/cpu.h|  9 +++
 target/ppc/fpu_helper.c | 56 +
 2 files changed, 60 insertions(+), 5 deletions(-)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index f6803bf37b..a25787d939 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -1083,6 +1083,7 @@ struct ppc_radix_page_info {
 enum {
 CACHED_FN_TYPE_NONE,
 CACHED_FN_TYPE_F64_F64_FSTATUS,
+CACHED_FN_TYPE_F64_F64_F64_FSTATUS,
 CACHED_FN_TYPE_F64_F64_F64_F64_I_FSTATUS,
 
 };
@@ -1093,6 +1094,13 @@ struct cached_fn_f64_f64_fstatus {
 float_status arg2;
 };
 
+struct cached_fn_f64_f64_f64_fstatus {
+float64 (*fn)(float64, float64, float_status*);
+float64 arg1;
+float64 arg2;
+float_status arg3;
+};
+
 struct cached_fn_f64_f64_f64_f64_i_fstatus {
 float64 (*fn)(float64, float64, float64, int, float_status*);
 float64 arg1;
@@ -1182,6 +1190,7 @@ struct CPUArchState {
 int cached_fn_type;
 union {
 struct cached_fn_f64_f64_fstatus f64_f64_fstatus;
+struct cached_fn_f64_f64_f64_fstatus f64_f64_f64_fstatus;
 struct cached_fn_f64_f64_f64_f64_i_fstatus f64_f64_f64_f64_i_fstatus;
 } cached_fn;
 
diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index a152c018b2..0bea9df361 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c
@@ -46,6 +46,22 @@
 } \
 } while (0)
 
+#define CACHE_FN_4(env, FN, ARG1, ARG2, ARG3, FIELD, TYPE)\
+do {  \
+if (env->fpscr & FP_XX) { \
+env->cached_fn_type = TYPE;   \
+env->cached_fn.FIELD.fn = FN; \
+env->cached_fn.FIELD.arg1 = ARG1; \
+env->cached_fn.FIELD.arg2 = ARG2; \
+env->cached_fn.FIELD.arg3 = ARG3; \
+env->fp_status.float_exception_flags |= float_flag_inexact;   \
+} else {  \
+assert(!(env->fp_status.float_exception_flags &   \
+ float_flag_inexact));\
+env->cached_fn_type = CACHED_FN_TYPE_NONE;\
+} \
+} while (0)
+
 #define CACHE_FN_5(env, FN, ARG1, ARG2, ARG3, ARG4, FIELD, TYPE)  \
 do {  \
 if (env->fpscr & FP_XX) { \
@@ -65,6 +81,7 @@
 #else
 #define CACHE_FN_NONE(env)
 #define CACHE_FN_3(env, FN, ARG1, ARG2, FIELD, TYPE)
+#define CACHE_FN_4(env, FN, ARG1, ARG2, ARG3, FIELD, TYPE)
 #define CACHE_FN_5(env, FN, ARG1, ARG2, ARG3, ARG4, FIELD, TYPE)
 #endif
 
@@ -590,6 +607,24 @@ void helper_execute_fp_cached(CPUPPCState *env)
 env->fpscr |= FP_FI | FP_XX;
 }
 break;
+case CACHED_FN_TYPE_F64_F64_F64_FSTATUS:
+assert((env->cached_fn.f64_f64_f64_fstatus.arg3.float_exception_flags &
+   float_flag_inexact) == 0);
+env->cached_fn.f64_f64_f64_fstatus.fn(
+env->cached_fn.f64_f64_f64_fstatus.arg1,
+env->cached_fn.f64_f64_f64_fstatus.arg2,
+>cached_fn.f64_f64_f64_fstatus.arg3);
+
+env->fpscr &= ~FP_FI;
+/*
+ * if the cached instruction resulted in FI being set
+ * then we update fpscr with this value
+ */
+if (env->cached_fn.f64_f64_f64_fstatus.arg3.float_exception_flags &
+float_flag_inexact) {
+env->fpscr |= FP_FI | FP_XX;
+}
+break;
 case CACHED_FN_TYPE_F64_F64_F64_F64_I_FSTATUS:
 ; /* hack to al

[RFC PATCH v2 2/5] target/ppc: Implement instruction caching for fsqrt

2022-10-26 Thread Víctor Colombo
This patch adds the code necessary to cache fsqrt for usage
with hardfpu in Power. It is also the first instruction to
use the new cache instruction system.

fsqrt is an instruction that receives two arguments, one f64 and
one status, and returns f64. This info will be cached inside a new
union in env, which will grow when other instructions with other
signatures are added.

Hardfpu in QEMU only works when the inexact is already set. So,
CACHE_FN_3 will check if FP_XX is set, and set float_flag_inexact
to enable the hardfpu behavior. When the instruction is later
reexecuted, it will be with float_flag_inexact cleared, forcing
softfloat and correctly updating the relevant flags, as is today.

This implementation only works in linux-user. No test or effort
was done in this patch to make it work for softmmu. Future work
will be required to make it work correctly in this scenario.

Signed-off-by: Víctor Colombo 
---
 target/ppc/cpu.h| 11 +++
 target/ppc/fpu_helper.c | 40 +++-
 2 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 116ee639ff..e55c10b0db 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -1082,6 +1082,14 @@ struct ppc_radix_page_info {
 
 enum {
 CACHED_FN_TYPE_NONE,
+CACHED_FN_TYPE_F64_F64_FSTATUS,
+
+};
+
+struct cached_fn_f64_f64_fstatus {
+float64 (*fn)(float64, float_status*);
+float64 arg1;
+float_status arg2;
 };
 
 struct CPUArchState {
@@ -1162,6 +1170,9 @@ struct CPUArchState {
 target_ulong fpscr; /* Floating point status and control register */
 
 int cached_fn_type;
+union {
+struct cached_fn_f64_f64_fstatus f64_f64_fstatus;
+} cached_fn;
 
 /* Internal devices resources */
 ppc_tb_t *tb_env;  /* Time base and decrementer */
diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index 34b242c025..1756719664 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c
@@ -30,8 +30,24 @@
  float_flag_inexact));\
 env->cached_fn_type = CACHED_FN_TYPE_NONE;\
 } while (0)
+
+#define CACHE_FN_3(env, FN, ARG1, ARG2, FIELD, TYPE)  \
+do {  \
+if (env->fpscr & FP_XX) { \
+env->cached_fn_type = TYPE;   \
+env->cached_fn.FIELD.fn = FN; \
+env->cached_fn.FIELD.arg1 = ARG1; \
+env->cached_fn.FIELD.arg2 = ARG2; \
+env->fp_status.float_exception_flags |= float_flag_inexact;   \
+} else {  \
+assert(!(env->fp_status.float_exception_flags &   \
+ float_flag_inexact));\
+env->cached_fn_type = CACHED_FN_TYPE_NONE;\
+} \
+} while (0)
 #else
 #define CACHE_FN_NONE(env)
+#define CACHE_FN_3(env, FN, ARG1, ARG2, FIELD, TYPE)
 #endif
 
 static inline float128 float128_snan_to_qnan(float128 x)
@@ -535,6 +551,27 @@ void helper_execute_fp_cached(CPUPPCState *env)
  * so no need to execute it again
  */
 break;
+case CACHED_FN_TYPE_F64_F64_FSTATUS:
+/*
+ * execute the cached insn. At this point, float_exception_flags
+ * should have FI not set, otherwise the result will not be correct
+ */
+assert((env->cached_fn.f64_f64_fstatus.arg2.float_exception_flags &
+   float_flag_inexact) == 0);
+env->cached_fn.f64_f64_fstatus.fn(
+env->cached_fn.f64_f64_fstatus.arg1,
+>cached_fn.f64_f64_fstatus.arg2);
+
+env->fpscr &= ~FP_FI;
+/*
+ * if the cached instruction resulted in FI being set
+ * then we update fpscr with this value
+ */
+if (env->cached_fn.f64_f64_fstatus.arg2.float_exception_flags &
+float_flag_inexact) {
+env->fpscr |= FP_FI | FP_XX;
+}
+break;
 default:
 g_assert_not_reached();
 }
@@ -878,7 +915,8 @@ static void float_invalid_op_sqrt(CPUPPCState *env, int 
flags,
 #define FPU_FSQRT(name, op)   \
 float64 helper_##name(CPUPPCState *env, float64 arg)  \
 { \
-CACHE_FN_NONE(env);   \
+CACHE_FN_3(env, op, arg, env->fp_status, f64

[RFC PATCH v2 0/5] Idea for using hardfloat in PPC

2022-10-26 Thread Víctor Colombo
oracolombo/f0d8b7c9f1cb63dac6ff0221209ec4ff
[7] https://gist.github.com/vcoracolombo/4b592644517c0efb3854872a4b30f6cc

Víctor Colombo (5):
  target/ppc: prepare instructions to work with caching last FP insn
  target/ppc: Implement instruction caching for fsqrt
  target/ppc: Implement instruction caching for muladd
  target/ppc: Implement instruction caching for add/sub/mul/div
  target/ppc: Enable hardfpu for Power

 fpu/softfloat.c|  10 +-
 target/ppc/cpu.h   |  37 ++
 target/ppc/excp_helper.c   |   2 +
 target/ppc/fpu_helper.c| 186 +
 target/ppc/helper.h|   1 +
 target/ppc/translate/fp-impl.c.inc |   1 +
 6 files changed, 233 insertions(+), 4 deletions(-)

-- 
2.25.1




[RFC PATCH 2/4] target/ppc: Implement instruction caching for fsqrt

2022-10-05 Thread Víctor Colombo
This patch adds the code necessary to cache fsqrt for usage
with hardfpu in Power. It is also the first instruction to
use the new cache instruction system.

fsqrt is an instruction that receives two arguments, one f64 and
one status, and returns f64. This info will be cached inside a new
union in env, which will grow when other instructions with other
signatures are added.

Hardfpu in QEMU only works when the inexact is already set. So,
CACHE_FN_3 will check if FP_XX is set, and set float_flag_inexact
to enable the hardfpu behavior. When the instruction is later
reexecuted, it will be with float_flag_inexact cleared, forcing
softfloat and correctly updating the relevant flags, as is today.

Signed-off-by: Víctor Colombo 
---
 target/ppc/cpu.h| 11 +++
 target/ppc/fpu_helper.c | 39 ++-
 2 files changed, 49 insertions(+), 1 deletion(-)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 1132d60162..b423e33a0c 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -1082,6 +1082,14 @@ struct ppc_radix_page_info {
 
 enum {
 CACHED_FN_TYPE_NONE,
+CACHED_FN_TYPE_F64_F64_FSTATUS,
+
+};
+
+struct cached_fn_f64_f64_fstatus {
+float64 (*fn)(float64, float_status*);
+float64 arg1;
+float_status arg2;
 };
 
 struct CPUArchState {
@@ -1162,6 +1170,9 @@ struct CPUArchState {
 target_ulong fpscr; /* Floating point status and control register */
 
 int cached_fn_type;
+union {
+struct cached_fn_f64_f64_fstatus f64_f64_fstatus;
+} cached_fn;
 
 /* Internal devices resources */
 ppc_tb_t *tb_env;  /* Time base and decrementer */
diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index 6aaee37619..b68f12a1a9 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c
@@ -30,6 +30,21 @@
 env->cached_fn_type = CACHED_FN_TYPE_NONE;\
 } while (0)
 
+#define CACHE_FN_3(env, FN, ARG1, ARG2, FIELD, TYPE)  \
+do {  \
+if (env->fpscr & FP_XX) { \
+env->cached_fn_type = TYPE;   \
+env->cached_fn.FIELD.fn = FN; \
+env->cached_fn.FIELD.arg1 = ARG1; \
+env->cached_fn.FIELD.arg2 = ARG2; \
+env->fp_status.float_exception_flags |= float_flag_inexact;   \
+} else {  \
+assert(!(env->fp_status.float_exception_flags &   \
+ float_flag_inexact));\
+env->cached_fn_type = CACHED_FN_TYPE_NONE;\
+} \
+} while (0)
+
 static inline float128 float128_snan_to_qnan(float128 x)
 {
 float128 r;
@@ -530,6 +545,27 @@ void helper_execute_fp_cached(CPUPPCState *env)
  * so no need to execute it again
  */
 break;
+case CACHED_FN_TYPE_F64_F64_FSTATUS:
+/*
+ * execute the cached insn. At this point, float_exception_flags
+ * should have FI not set, otherwise the result will not be correct
+ */
+assert((env->cached_fn.f64_f64_fstatus.arg2.float_exception_flags &
+   float_flag_inexact) == 0);
+env->cached_fn.f64_f64_fstatus.fn(
+env->cached_fn.f64_f64_fstatus.arg1,
+>cached_fn.f64_f64_fstatus.arg2);
+
+env->fpscr &= ~FP_FI;
+/*
+ * if the cached instruction resulted in FI being set
+ * then we update fpscr with this value
+ */
+if (env->cached_fn.f64_f64_fstatus.arg2.float_exception_flags &
+float_flag_inexact) {
+env->fpscr |= FP_FI | FP_XX;
+}
+break;
 default:
 g_assert_not_reached();
 }
@@ -872,7 +908,8 @@ static void float_invalid_op_sqrt(CPUPPCState *env, int 
flags,
 #define FPU_FSQRT(name, op)   \
 float64 helper_##name(CPUPPCState *env, float64 arg)  \
 { \
-CACHE_FN_NONE(env);   \
+CACHE_FN_3(env, op, arg, env->fp_status, f64_f64_fstatus, \
+CACHED_FN_TYPE_F64_F64_FSTATUS);  \
 float64 ret = op(arg, >fp_status);   \
 int flags = get_float_exception_flags(>fp_status);   \
   \
-- 
2.25.1




[RFC PATCH 0/4] Idea for using hardfloat in PPC

2022-10-05 Thread Víctor Colombo
As can be seem in the mailing thread that added hardfloat support in
QEMU [1], a requirement for it to work is to have float_flag_inexact
set when entering the API in softfloat.c. However, in the same thread,
it was explained that PPC target would not work by default with this
implementation.
The problem is that PPC has a non-sticky inexact bit (there is a
discussion about it in [2]), meaning that we can't just set the flag
and call the API in softfloat.c, as it would return the same flag set
to 1, and we wouldn't know if it is supposed to be updated on FPSCR or
not.
Over the last couple years, there were attempts to enable hardfpu
for Power, like [3]. But nothing got to master.
[5] shows a suggestion by Yonggang Luo and commentaries by Richard and
Zoltan, about caching the last FP instruction and reexecuting it when
necessary.

This patch set is a proposition on the idea to cache the last FP insn,
to be reexecuted later when the value of FPSCR is to be read by a
program. When executed in hardfloat, the instruction "context" is saved
inside `env`, and is expected to be reexecuted later, in softfloat,
to calculate the correct value of the inexact flag in FPSCR.
The instruction to be cached is the last instruction that changes FI.
If the instructions does not change FI, it keeps the cache intact.
If it changes FI, it caches itself and tries to execute in hardfpu.
It might or might not use hardfloat, but as the inexact flag was
artificially set, it will require to be reexecuted later. 'Later'
means when FPSCR is to be read, like during a call to MFFS, or when
a signal occurs. There are probably other places, e.g. other mffs-like
instructions, but this RFC only addresses these two scenarios.
This is supposed to be more efficient because programs very seldomly
read FPSCR, meaning the amount of reexecutions will be low.

For now, this was implemented and tested for linux-user, no softmmu
work or analysis was done.
I implemented the base code to keep all instructions working with
this new behavior (patch 1), and also implemented two instructions
as an example on what it would be necessary to do for every instruction
to use hardfpu (patches 1 and 2).

My tests with risu and other manual tests showed the behavior seems to
be correct. I tested mainly if FPSCR is the same after using softfloat
or hardfloat.

However, the impact in performance was not the expected. In x86_64 I
had a small 3% improvement, while in a Power9 machine there was a small
performance loss, as can be seem below (100 executions).

|| min [s] | max [s] | avg [s] |
| before | 122.309 | 123.459 | 122.747 |
| after  | 123.906 | 125.016 | 124.373 |

The test code can be found in [4].

The issue is most likely all the overhead with the caching, which is
negating the improvement from hardfpu execution.

With all that said, could you kindly take a look at my implementation
and see if it can be improved to result in better performance? Is there
any chance to save this idea?

Thank you very much!

[1] 
https://patchwork.kernel.org/project/qemu-devel/patch/20181124235553.17371-8-c...@braap.org/
[2] https://lists.nongnu.org/archive/html/qemu-ppc/2022-05/msg00246.html
[3] 
https://patchwork.kernel.org/project/qemu-devel/patch/20200218171702.979f0746...@zero.eik.bme.hu/
[4] https://gist.github.com/vcoracolombo/6ad884a402f1bba531e2e3da7e196656
[5] https://lists.gnu.org/archive/html/qemu-devel/2020-05/msg00064.html

Víctor Colombo (4):
  target/ppc: prepare instructions to work with caching last FP insn
  target/ppc: Implement instruction caching for fsqrt
  target/ppc: Implement instruction caching for muladd
  fpu/softfloat: Enable hardfpu for ppc target

 fpu/softfloat.c|   6 +-
 target/ppc/cpu.h   |  28 ++
 target/ppc/excp_helper.c   |   2 +
 target/ppc/fpu_helper.c| 132 +
 target/ppc/helper.h|   1 +
 target/ppc/translate/fp-impl.c.inc |   1 +
 6 files changed, 166 insertions(+), 4 deletions(-)

-- 
2.25.1




[RFC PATCH 3/4] target/ppc: Implement instruction caching for muladd

2022-10-05 Thread Víctor Colombo
This patch adds the code necessary to cache muladd instructions
for usage with hardfpu in Power.

muladd is an instruction that receives four arguments, three f64 and
one status, and returns f64. This info will be cached inside the
union in env, which grows when other instructions with other
signatures are added.

Hardfpu in QEMU only works when the inexact is already set. So,
CACHE_FN_5 will check if FP_XX is set, and set float_flag_inexact
to enable the hardfpu behavior. When the instruction is later
reexecuted, it will be with float_flag_inexact cleared, forcing
softfloat and correctly updating the relevant flags, as is today.

Signed-off-by: Víctor Colombo 
---
 target/ppc/cpu.h| 11 +++
 target/ppc/fpu_helper.c | 34 --
 2 files changed, 43 insertions(+), 2 deletions(-)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index b423e33a0c..87183de484 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -1083,6 +1083,7 @@ struct ppc_radix_page_info {
 enum {
 CACHED_FN_TYPE_NONE,
 CACHED_FN_TYPE_F64_F64_FSTATUS,
+CACHED_FN_TYPE_F64_F64_F64_F64_I_FSTATUS,
 
 };
 
@@ -1092,6 +1093,15 @@ struct cached_fn_f64_f64_fstatus {
 float_status arg2;
 };
 
+struct cached_fn_f64_f64_f64_f64_i_fstatus {
+float64 (*fn)(float64, float64, float64, int, float_status*);
+float64 arg1;
+float64 arg2;
+float64 arg3;
+int arg4;
+float_status arg5;
+};
+
 struct CPUArchState {
 /* Most commonly used resources during translated code execution first */
 target_ulong gpr[32];  /* general purpose registers */
@@ -1172,6 +1182,7 @@ struct CPUArchState {
 int cached_fn_type;
 union {
 struct cached_fn_f64_f64_fstatus f64_f64_fstatus;
+struct cached_fn_f64_f64_f64_f64_i_fstatus f64_f64_f64_f64_i_fstatus;
 } cached_fn;
 
 /* Internal devices resources */
diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index b68f12a1a9..3d06a0fc1a 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c
@@ -45,6 +45,23 @@
 } \
 } while (0)
 
+#define CACHE_FN_5(env, FN, ARG1, ARG2, ARG3, ARG4, FIELD, TYPE)  \
+do {  \
+if (env->fpscr & FP_XX) { \
+env->cached_fn_type = TYPE;   \
+env->cached_fn.FIELD.fn = FN; \
+env->cached_fn.FIELD.arg1 = ARG1; \
+env->cached_fn.FIELD.arg2 = ARG2; \
+env->cached_fn.FIELD.arg3 = ARG3; \
+env->cached_fn.FIELD.arg4 = ARG4; \
+env->fp_status.float_exception_flags |= float_flag_inexact;   \
+} else {  \
+assert(!(env->fp_status.float_exception_flags &   \
+ float_flag_inexact));\
+env->cached_fn_type = CACHED_FN_TYPE_NONE;\
+} \
+} while (0)
+
 static inline float128 float128_snan_to_qnan(float128 x)
 {
 float128 r;
@@ -566,6 +583,17 @@ void helper_execute_fp_cached(CPUPPCState *env)
 env->fpscr |= FP_FI | FP_XX;
 }
 break;
+case CACHED_FN_TYPE_F64_F64_F64_F64_I_FSTATUS:
+; /* hack to allow declaration below */
+struct cached_fn_f64_f64_f64_f64_i_fstatus args =
+env->cached_fn.f64_f64_f64_f64_i_fstatus;
+assert(!(args.arg5.float_exception_flags & float_flag_inexact));
+args.fn(args.arg1, args.arg2, args.arg3, args.arg4, );
+env->fpscr &= ~FP_FI;
+if (args.arg5.float_exception_flags & float_flag_inexact) {
+env->fpscr |= FP_FI | FP_XX;
+}
+break;
 default:
 g_assert_not_reached();
 }
@@ -836,7 +864,8 @@ static void float_invalid_op_madd(CPUPPCState *env, int 
flags,
 static float64 do_fmadd(CPUPPCState *env, float64 a, float64 b,
  float64 c, int madd_flags, uintptr_t retaddr)
 {
-CACHE_FN_NONE(env);
+CACHE_FN_5(env, float64_muladd, a, b, c, madd_flags,
+f64_f64_f64_f64_i_fstatus, CACHED_FN_TYPE_F64_F64_F64_F64_I_FSTATUS);
 float64 ret = float64_muladd(a, b, c, madd_flags, >fp_status);
 int flags = get_float_exception_flags(>fp_status);
 
@@ -849,7 +878,8 @@ static float64 do_fmadd(CPUPPCState *env, float64 a, 
float64 b,
 static uint64_t do_fmadds(CPUPPCState *env, float64 a, float64 b,
   float64 c, int madd_flags, uintptr_t retaddr)
 {
-

[RFC PATCH 4/4] target/ppc: Enable hardfpu for Power

2022-10-05 Thread Víctor Colombo
Remove the build conditional from softfloat.c, allowing TARGET_PPC
to use hardfpu.

Signed-off-by: Víctor Colombo 
---
 fpu/softfloat.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index c7454c3eb1..de94732f6a 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -220,11 +220,9 @@ GEN_INPUT_FLUSH3(float64_input_flush3, float64)
  * the use of hardfloat, since hardfloat relies on the inexact flag being
  * already set.
  */
-#if defined(TARGET_PPC) || defined(__FAST_MATH__)
-# if defined(__FAST_MATH__)
-#  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact 
\
+#if defined(__FAST_MATH__)
+# warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
 IEEE implementation
-# endif
 # define QEMU_NO_HARDFLOAT 1
 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
 #else
-- 
2.25.1




[RFC PATCH 1/4] target/ppc: prepare instructions to work with caching last FP insn

2022-10-05 Thread Víctor Colombo
When enabling hardfpu for Power and adding the instruction caching
feature, it will be necessary to uncache when the instruction
is garanteed to be executed in softfloat. If the cache is not cleared
in this situation, it could lead to a previous instruction being
reexecuted and yield a different result than when only softfloat
was present.

This patch introduces the base code to allow for the implementation of
FP instructions caching, while also adding calls to a macro that clears
the cached instruction for every one that has not been 'migrated' to
hardfpu-compliance yet.

In the future, it will be necessary to implement the necessary code
for each FP instruction that wants to use hardfpu.

Signed-off-by: Víctor Colombo 
---
 target/ppc/cpu.h   |  6 +++
 target/ppc/excp_helper.c   |  2 +
 target/ppc/fpu_helper.c| 65 ++
 target/ppc/helper.h|  1 +
 target/ppc/translate/fp-impl.c.inc |  1 +
 5 files changed, 75 insertions(+)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 7f73e2ac81..1132d60162 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -1080,6 +1080,10 @@ struct ppc_radix_page_info {
 #define PPC_CPU_OPCODES_LEN  0x40
 #define PPC_CPU_INDIRECT_OPCODES_LEN 0x20
 
+enum {
+CACHED_FN_TYPE_NONE,
+};
+
 struct CPUArchState {
 /* Most commonly used resources during translated code execution first */
 target_ulong gpr[32];  /* general purpose registers */
@@ -1157,6 +1161,8 @@ struct CPUArchState {
 float_status fp_status; /* Floating point execution context */
 target_ulong fpscr; /* Floating point status and control register */
 
+int cached_fn_type;
+
 /* Internal devices resources */
 ppc_tb_t *tb_env;  /* Time base and decrementer */
 ppc_dcr_t *dcr_env;/* Device control registers */
diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index 214acf5ac4..4671b15386 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -1904,6 +1904,8 @@ void raise_exception_err_ra(CPUPPCState *env, uint32_t 
exception,
 {
 CPUState *cs = env_cpu(env);
 
+helper_execute_fp_cached(env);
+
 cs->exception_index = exception;
 env->error_code = error_code;
 cpu_loop_exit_restore(cs, raddr);
diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index ae25f32d6e..6aaee37619 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c
@@ -23,6 +23,13 @@
 #include "internal.h"
 #include "fpu/softfloat.h"
 
+#define CACHE_FN_NONE(env)\
+do {  \
+assert(!(env->fp_status.float_exception_flags &   \
+ float_flag_inexact));\
+env->cached_fn_type = CACHED_FN_TYPE_NONE;\
+} while (0)
+
 static inline float128 float128_snan_to_qnan(float128 x)
 {
 float128 r;
@@ -514,6 +521,22 @@ void helper_reset_fpstatus(CPUPPCState *env)
 set_float_exception_flags(0, >fp_status);
 }
 
+void helper_execute_fp_cached(CPUPPCState *env)
+{
+switch (env->cached_fn_type) {
+case CACHED_FN_TYPE_NONE:
+/*
+ * the last fp instruction was executed in softfloat
+ * so no need to execute it again
+ */
+break;
+default:
+g_assert_not_reached();
+}
+
+env->cached_fn_type = CACHED_FN_TYPE_NONE;
+}
+
 static void float_invalid_op_addsub(CPUPPCState *env, int flags,
 bool set_fpcc, uintptr_t retaddr)
 {
@@ -527,6 +550,7 @@ static void float_invalid_op_addsub(CPUPPCState *env, int 
flags,
 /* fadd - fadd. */
 float64 helper_fadd(CPUPPCState *env, float64 arg1, float64 arg2)
 {
+CACHE_FN_NONE(env);
 float64 ret = float64_add(arg1, arg2, >fp_status);
 int flags = get_float_exception_flags(>fp_status);
 
@@ -540,6 +564,7 @@ float64 helper_fadd(CPUPPCState *env, float64 arg1, float64 
arg2)
 /* fadds - fadds. */
 float64 helper_fadds(CPUPPCState *env, float64 arg1, float64 arg2)
 {
+CACHE_FN_NONE(env);
 float64 ret = float64r32_add(arg1, arg2, >fp_status);
 int flags = get_float_exception_flags(>fp_status);
 
@@ -552,6 +577,7 @@ float64 helper_fadds(CPUPPCState *env, float64 arg1, 
float64 arg2)
 /* fsub - fsub. */
 float64 helper_fsub(CPUPPCState *env, float64 arg1, float64 arg2)
 {
+CACHE_FN_NONE(env);
 float64 ret = float64_sub(arg1, arg2, >fp_status);
 int flags = get_float_exception_flags(>fp_status);
 
@@ -565,6 +591,7 @@ float64 helper_fsub(CPUPPCState *env, float64 arg1, float64 
arg2)
 /* fsubs - fsubs. */
 float64 helper_fsubs(CPUPPCState *env, float64 arg1, float64 arg2)
 {
+CACHE_FN_NONE(env);
 float64 ret = float64r32_sub(arg1, arg2, >fp_status);
 int flags = get_fl

[PATCH] target/ppc: Fix xvcmp* clearing FI bit

2022-10-05 Thread Víctor Colombo
Vector instructions in general are not supposed to change the FI bit.
However, xvcmp* instructions are calling gen_helper_float_check_status,
which is leading to a cleared FI flag where it should be kept
unchanged.
As helper_float_check_status only affects inexact, overflow and
underflow, and the xvcmp* instructions don't change these flags, this
issue can be fixed by removing the call to helper_float_check_status.
By doing this, the FI bit in FPSCR will be preserved as expected.

Fixes: 00084a25adf ("target/ppc: introduce separate VSX_CMP macro for xvcmp* 
instructions")
Signed-off-by: Víctor Colombo 
---
 target/ppc/translate/vsx-impl.c.inc | 1 -
 1 file changed, 1 deletion(-)

diff --git a/target/ppc/translate/vsx-impl.c.inc 
b/target/ppc/translate/vsx-impl.c.inc
index 7acdbceec4..e6e5c45ffd 100644
--- a/target/ppc/translate/vsx-impl.c.inc
+++ b/target/ppc/translate/vsx-impl.c.inc
@@ -810,7 +810,6 @@ static void gen_##name(DisasContext *ctx)   
  \
 gen_helper_##name(ignored, cpu_env, xt, xa, xb);  \
 tcg_temp_free_i32(ignored);   \
 } \
-gen_helper_float_check_status(cpu_env);   \
 tcg_temp_free_ptr(xt);\
 tcg_temp_free_ptr(xa);\
 tcg_temp_free_ptr(xb);\
-- 
2.25.1




Re: [PATCH 3/3] target/ppc: Merge fsqrt and fsqrts helpers

2022-09-06 Thread Víctor Colombo

On 05/09/2022 14:20, Richard Henderson wrote:
Well, there would of course be no separate call, but 


I didn't understand what you meant here with 'no separate call'...


do_float_check_status would:

     int status = get_float_exception_flags(>fp_status);

     set_float_exception_flags(0, >fp_status);

straight away.  No extra call overhead, and the steady-state of softfp 
exception flags

outside of an in-progress fp operation is 0.



Right, makes sense. And what about when an invalid operation occurs,
with the corresponding exception enabled bit set?
float_invalid_op_* would stop the execution and do_float_check_status
would not be called, right? So it would require to call
set_float_exception_flags there too, correct?
If that's all that's necessary, I might be able to take a look at it and
come with a possible patch.

Anything that can raise a softfp exception and doesn't do something with 
it, either
immediately within the same helper, or just afterward with 
helper_float_check_status, is

buggy.  With those fixed, helper_reset_fpstatus may be removed entirely.



Oh, that makes sense. It's easier to implement the idea using this
assumption.


--
Víctor Cora Colombo
Instituto de Pesquisas ELDORADO
Aviso Legal - Disclaimer 


[PATCH v2 6/8] target/ppc: Set OV32 when OV is set

2022-09-06 Thread Víctor Colombo
According to PowerISA: "OV32 is set whenever OV is implicitly set, and
is set to the same value that OV is defined to be set to in 32-bit
mode".

This patch changes helper_update_ov_legacy to set/clear ov32 when
applicable.

Signed-off-by: Víctor Colombo 
Reviewed-by: Daniel Henrique Barboza 
---
 target/ppc/int_helper.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index d905f07d02..696096100b 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -37,9 +37,9 @@
 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov)
 {
 if (unlikely(ov)) {
-env->so = env->ov = 1;
+env->so = env->ov = env->ov32 = 1;
 } else {
-env->ov = 0;
+env->ov = env->ov32 = 0;
 }
 }
 
-- 
2.25.1




[PATCH v2 3/8] target/ppc: Zero second doubleword in DFP instructions

2022-09-06 Thread Víctor Colombo
Starting at PowerISA v3.1, the second doubleword of the registers
used to store results in DFP instructions are supposed to be zeroed.

>From the ISA, chapter 7.2.1.1 Floating-Point Registers:
"""
Chapter 4. Floating-Point Facility provides 32 64-bit
FPRs. Chapter 5. Decimal Floating-Point also employs
FPRs in decimal floating-point (DFP) operations. When
VSX is implemented, the 32 FPRs are mapped to
doubleword 0 of VSRs 0-31. (...)
All instructions that operate on an FPR are redefined
to operate on doubleword element 0 of the
corresponding VSR. (...)
and the contents of doubleword element 1 of the
VSR corresponding to the target FPR or FPR pair for these
instructions are set to 0.
"""

Before, the result stored at doubleword 1 was said to be undefined.

With that, this patch changes the DFP facility to zero doubleword 1
when using set_dfp64 and set_dfp128. This fixes the behavior for ISA
3.1 while keeping the behavior correct for previous ones.

Signed-off-by: Víctor Colombo 
Reviewed-by: Daniel Henrique Barboza 
---
 target/ppc/dfp_helper.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/target/ppc/dfp_helper.c b/target/ppc/dfp_helper.c
index 5ba74b2124..be7aa5357a 100644
--- a/target/ppc/dfp_helper.c
+++ b/target/ppc/dfp_helper.c
@@ -42,13 +42,16 @@ static void get_dfp128(ppc_vsr_t *dst, ppc_fprp_t *dfp)
 
 static void set_dfp64(ppc_fprp_t *dfp, ppc_vsr_t *src)
 {
-dfp->VsrD(0) = src->VsrD(1);
+dfp[0].VsrD(0) = src->VsrD(1);
+dfp[0].VsrD(1) = 0ULL;
 }
 
 static void set_dfp128(ppc_fprp_t *dfp, ppc_vsr_t *src)
 {
 dfp[0].VsrD(0) = src->VsrD(0);
 dfp[1].VsrD(0) = src->VsrD(1);
+dfp[0].VsrD(1) = 0ULL;
+dfp[1].VsrD(1) = 0ULL;
 }
 
 static void set_dfp128_to_avr(ppc_avr_t *dst, ppc_vsr_t *src)
-- 
2.25.1




[PATCH v2 2/8] target/ppc: Remove unused xer_* macros

2022-09-06 Thread Víctor Colombo
The macros xer_ov, xer_ca, xer_ov32, and xer_ca32 are both unused and
hiding the usage of env. Remove them.

Signed-off-by: Víctor Colombo 
Reviewed-by: Daniel Henrique Barboza 
---
 target/ppc/cpu.h | 4 
 1 file changed, 4 deletions(-)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 985ff86f55..6481f48087 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -1506,10 +1506,6 @@ void ppc_compat_add_property(Object *obj, const char 
*name,
 #define XER_CMP  8
 #define XER_BC   0
 #define xer_so  (env->so)
-#define xer_ov  (env->ov)
-#define xer_ca  (env->ca)
-#define xer_ov32  (env->ov)
-#define xer_ca32  (env->ca)
 #define xer_cmp ((env->xer >> XER_CMP) & 0xFF)
 #define xer_bc  ((env->xer >> XER_BC)  & 0x7F)
 
-- 
2.25.1




[PATCH v2 1/8] target/ppc: Remove extra space from s128 field in ppc_vsr_t

2022-09-06 Thread Víctor Colombo
Very trivial rogue space removal. There are two spaces between Int128
and s128 in ppc_vsr_t struct, where it should be only one.

Signed-off-by: Víctor Colombo 
Reviewed-by: Daniel Henrique Barboza 
---
 target/ppc/cpu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index a4c893cfad..985ff86f55 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -246,7 +246,7 @@ typedef union _ppc_vsr_t {
 #ifdef CONFIG_INT128
 __uint128_t u128;
 #endif
-Int128  s128;
+Int128 s128;
 } ppc_vsr_t;
 
 typedef ppc_vsr_t ppc_avr_t;
-- 
2.25.1




[PATCH v2 4/8] target/ppc: Set result to QNaN for DENBCD when VXCVI occurs

2022-09-06 Thread Víctor Colombo
According to the ISA, for instruction DENBCD:
"If an invalid BCD digit or sign code is detected in the source
operand, an invalid-operation exception (VXCVI) occurs."

In the Invalid Operation Exception section, there is the situation:
"When Invalid Operation Exception is disabled (VE=0) and Invalid
Operation occurs (...) If the operation is an (...) or format the
target FPR is set to a Quiet NaN". This was not being done in
QEMU.

This patch sets the result to QNaN when the instruction DENBCD causes
an Invalid Operation Exception.

Signed-off-by: Víctor Colombo 
Reviewed-by: Daniel Henrique Barboza 
---
 target/ppc/dfp_helper.c | 26 --
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/target/ppc/dfp_helper.c b/target/ppc/dfp_helper.c
index be7aa5357a..cc024316d5 100644
--- a/target/ppc/dfp_helper.c
+++ b/target/ppc/dfp_helper.c
@@ -1147,6 +1147,26 @@ static inline uint8_t dfp_get_bcd_digit_128(ppc_vsr_t 
*t, unsigned n)
 return t->VsrD((n & 0x10) ? 0 : 1) >> ((n << 2) & 63) & 15;
 }
 
+static inline void dfp_invalid_op_vxcvi_64(struct PPC_DFP *dfp)
+{
+/* TODO: fpscr is incorrectly not being saved to env */
+dfp_set_FPSCR_flag(dfp, FP_VX | FP_VXCVI, FPSCR_VE);
+if ((dfp->env->fpscr & FP_VE) == 0) {
+dfp->vt.VsrD(1) = 0x7c00; /* QNaN */
+}
+}
+
+
+static inline void dfp_invalid_op_vxcvi_128(struct PPC_DFP *dfp)
+{
+/* TODO: fpscr is incorrectly not being saved to env */
+dfp_set_FPSCR_flag(dfp, FP_VX | FP_VXCVI, FPSCR_VE);
+if ((dfp->env->fpscr & FP_VE) == 0) {
+dfp->vt.VsrD(0) = 0x7c00; /* QNaN */
+dfp->vt.VsrD(1) = 0x0;
+}
+}
+
 #define DFP_HELPER_ENBCD(op, size)   \
 void helper_##op(CPUPPCState *env, ppc_fprp_t *t, ppc_fprp_t *b, \
  uint32_t s) \
@@ -1173,7 +1193,8 @@ void helper_##op(CPUPPCState *env, ppc_fprp_t *t, 
ppc_fprp_t *b, \
 sgn = 0; \
 break;   \
 default: \
-dfp_set_FPSCR_flag(, FP_VX | FP_VXCVI, FPSCR_VE);\
+dfp_invalid_op_vxcvi_##size();   \
+set_dfp##size(t, );   \
 return;  \
 }\
 }\
@@ -1183,7 +1204,8 @@ void helper_##op(CPUPPCState *env, ppc_fprp_t *t, 
ppc_fprp_t *b, \
 digits[(size) / 4 - n] = dfp_get_bcd_digit_##size(,   \
   offset++); \
 if (digits[(size) / 4 - n] > 10) {   \
-dfp_set_FPSCR_flag(, FP_VX | FP_VXCVI, FPSCR_VE);\
+dfp_invalid_op_vxcvi_##size();   \
+set_dfp##size(t, );   \
 return;  \
 } else { \
 nonzero |= (digits[(size) / 4 - n] > 0); \
-- 
2.25.1




[PATCH v2 8/8] target/ppc: Clear fpstatus flags on helpers missing it

2022-09-06 Thread Víctor Colombo
In ppc emulation, exception flags are not cleared at the end of an
instruction. Instead, the next instruction is responsible to clear
it before its emulation. However, some helpers are not doing it,
causing an issue where the previously set exception flags are being
used and leading to incorrect values being set in FPSCR.
Fix this by clearing fp_status before doing the instruction 'real' work
for the following helpers that were missing this behavior:

- VSX_CVT_INT_TO_FP_VECTOR
- VSX_CVT_FP_TO_FP
- VSX_CVT_FP_TO_INT_VECTOR
- VSX_CVT_FP_TO_INT2
- VSX_CVT_FP_TO_INT
- VSX_CVT_FP_TO_FP_HP
- VSX_CVT_FP_TO_FP_VECTOR
- VSX_CMP
- VSX_ROUND
- xscvqpdp
- xscvdpsp[n]

Signed-off-by: Víctor Colombo 
---
 target/ppc/fpu_helper.c | 37 ++---
 1 file changed, 26 insertions(+), 11 deletions(-)

diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index 95b22d99b3..331361234f 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c
@@ -2637,6 +2637,8 @@ uint32_t helper_##op(CPUPPCState *env, ppc_vsr_t *xt, 
\
 int all_true = 1; \
 int all_false = 1;\
   \
+helper_reset_fpstatus(env);   \
+  \
 for (i = 0; i < nels; i++) {  \
 if (unlikely(tp##_is_any_nan(xa->fld) ||  \
  tp##_is_any_nan(xb->fld))) { \
@@ -2690,6 +2692,8 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, 
ppc_vsr_t *xb)   \
 ppc_vsr_t t = { }; \
 int i; \
\
+helper_reset_fpstatus(env);\
+   \
 for (i = 0; i < nels; i++) {   \
 t.tfld = stp##_to_##ttp(xb->sfld, >fp_status);\
 if (unlikely(stp##_is_signaling_nan(xb->sfld,  \
@@ -2715,6 +2719,8 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, 
ppc_vsr_t *xb)  \
 ppc_vsr_t t = { };\
 int i;\
   \
+helper_reset_fpstatus(env);   \
+  \
 for (i = 0; i < nels; i++) {  \
 t.VsrW(2 * i) = stp##_to_##ttp(xb->VsrD(i), >fp_status); \
 if (unlikely(stp##_is_signaling_nan(xb->VsrD(i),  \
@@ -2752,6 +2758,8 @@ void helper_##op(CPUPPCState *env, uint32_t opcode,   
  \
 ppc_vsr_t t = *xt;  \
 int i;  \
 \
+helper_reset_fpstatus(env); \
+\
 for (i = 0; i < nels; i++) {\
 t.tfld = stp##_to_##ttp(xb->sfld, >fp_status); \
 if (unlikely(stp##_is_signaling_nan(xb->sfld,   \
@@ -2787,6 +2795,8 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, 
ppc_vsr_t *xb)   \
 ppc_vsr_t t = { }; \
 int i; \
\
+helper_reset_fpstatus(env);\
+   \
 for (i = 0; i < nels; i++) {   \
 t.tfld = stp##_to_##ttp(xb->sfld, 1, >fp_status); \
 if (unlikely(stp##_is_signaling_nan(xb->sfld,  \
@@ -2834,6 +2844,8 @@ void helper_XSCVQPDP(CPUPPCState *env, uint32_t ro, 
ppc_vsr_t *xt,
 ppc_vsr_t t = { };
 float_status tstat;
 
+helper_reset_fpstatus(env);
+
 tstat = env->fp_status;
 if (ro != 0) {
 tstat.float_rounding_mode = float_round_to_odd;
@@ -2855,6 +2867,7 @@ uint64_t helper_xscvdpspn(CPUPPCState *env, uint64_t xb)
 {
 uint64_t result, sign, exp, frac;
 
+helper_reset_fpstatus(env);
 float_status tstat = env->fp_status;
 set_float_exception_flags(0, );
 
@@ -2910,22 +2923,20 @@ uint64_t helper_XSCVSPDPN(uint64_

[PATCH v2 7/8] target/ppc: Zero second doubleword of VSR registers for FPR insns

2022-09-06 Thread Víctor Colombo
FPR register are mapped to the first doubleword of the VSR registers.
Since PowerISA v3.1, the second doubleword of the target register
must be zeroed for FP instructions.

This patch does it by writting 0 to the second dw everytime the
first dw is being written using set_fpr.

Signed-off-by: Víctor Colombo 
Reviewed-by: Daniel Henrique Barboza 
---
 target/ppc/translate.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 000b1e518d..5e433315e1 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -6443,6 +6443,14 @@ static inline void get_fpr(TCGv_i64 dst, int regno)
 static inline void set_fpr(int regno, TCGv_i64 src)
 {
 tcg_gen_st_i64(src, cpu_env, fpr_offset(regno));
+/*
+ * Before PowerISA v3.1 the result of doubleword 1 of the VSR
+ * corresponding to the target FPR was undefined. However,
+ * most (if not all) real hardware were setting the result to 0.
+ * Starting at ISA v3.1, the result for doubleword 1 is now defined
+ * to be 0.
+ */
+tcg_gen_st_i64(tcg_constant_i64(0), cpu_env, vsr64_offset(regno, false));
 }
 
 static inline void get_avr64(TCGv_i64 dst, int regno, bool high)
-- 
2.25.1




[PATCH v2 5/8] target/ppc: Zero second doubleword for VSX madd instructions

2022-09-06 Thread Víctor Colombo
In 205eb5a89e we updated most VSX instructions to zero the
second doubleword, as is requested by PowerISA since v3.1.
However, VSX_MADD helper was left behind unchanged, while it
is also affected and should be fixed as well.

This patch applies the fix for MADD instructions.

Fixes: 205eb5a89e ("target/ppc: Change VSX instructions behavior to fill with 
zeros")
Signed-off-by: Víctor Colombo 
Reviewed-by: Daniel Henrique Barboza 
---
 target/ppc/fpu_helper.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index 0f045b70f8..95b22d99b3 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c
@@ -2176,7 +2176,7 @@ VSX_TSQRT(xvtsqrtsp, 4, float32, VsrW(i), -126, 23)
 void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, \
  ppc_vsr_t *s1, ppc_vsr_t *s2, ppc_vsr_t *s3) \
 { \
-ppc_vsr_t t = *xt;\
+ppc_vsr_t t = { };\
 int i;\
   \
 helper_reset_fpstatus(env);   \
-- 
2.25.1




[PATCH v2 0/8] Multiple ppc instructions fixes

2022-09-06 Thread Víctor Colombo
This patch set fixes multiple instructions for PPC targets that were
producing incorrect results, or setting the wrong bits in FPSCR.

Patch 1 is just a style fix, trivial.
Patch 8 adds helper_reset_fpstatus() calls to instructions
that have an issue where the exception flags are being kept from
the previous instruction, causing incorrect bits to be set,
specially the non-sticky FI bit.
Other patches fixes other specific situations.

v1->v2:
- Squash patches 8 through 19 and write a better commit message to it.
- Dropped Daniel's R-b in the squashed patches, as the squash merged
both reviewed and non-reviewed patches. Now require a new, single
R-b.

Víctor Colombo (8):
  target/ppc: Remove extra space from s128 field in ppc_vsr_t
  target/ppc: Remove unused xer_* macros
  target/ppc: Zero second doubleword in DFP instructions
  target/ppc: Set result to QNaN for DENBCD when VXCVI occurs
  target/ppc: Zero second doubleword for VSX madd instructions
  target/ppc: Set OV32 when OV is set
  target/ppc: Zero second doubleword of VSR registers for FPR insns
  target/ppc: Clear fpstatus flags on helpers missing it

 target/ppc/cpu.h|  6 +-
 target/ppc/dfp_helper.c | 31 ---
 target/ppc/fpu_helper.c | 39 +++
 target/ppc/int_helper.c |  4 ++--
 target/ppc/translate.c  |  8 
 5 files changed, 66 insertions(+), 22 deletions(-)

-- 
2.25.1




Re: [PATCH 18/19] target/ppc: Clear fpstatus flags on VSX_CMP

2022-09-05 Thread Víctor Colombo

On 05/09/2022 15:41, Daniel Henrique Barboza wrote:

On 9/1/22 10:17, Víctor Colombo wrote:

Signed-off-by: Víctor Colombo 
---


What I mentioned in patch 10 also applies to all patches from 11 to 18
it seems. All changes made in patches 09-18 are based on the explanation
gave in patch 08.

The problem with this is that it'll be annoying if/when something goes
wrong. Let's say that the change made in patch 15 caused a side-effect.
Bisect will point it to patch 15, which doesn't have an explanation of
why you made the change, and then one will need to trace it back to the
mailing list to understand it. It's not a given that one will look at
all the recent changes and understand that the logic used in patch 08
are also being used in the subsequent patches.

I don't mind if you just copy/paste the commit message from patch 08 and
just change the instruction name being fixed. What's important is to
provide some context for each individual change.


Thanks,


Daniel



Hello Daniel. Thank you very much for the reviews.

I'll take your recommendation and make the necessary changes.

Best regards,

--
Víctor Cora Colombo
Instituto de Pesquisas ELDORADO
Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>


Re: [PATCH 3/3] target/ppc: Merge fsqrt and fsqrts helpers

2022-09-05 Thread Víctor Colombo

On 05/09/2022 13:21, Richard Henderson wrote:

On 9/5/22 17:19, Víctor Colombo wrote:

Existing bug, but this is missing to clear fp status to start.

Reviewed-by: Richard Henderson 

r~



Hello Richard, thanks for your review!
gen_reset_fpstatus() is called by the inline implementation in
do_helper_fsqrt() before calling the helper (patch 1).


Oops, ok.



It's probably better to move the call to inside the helper.


I did write about a scheme by which all of these calls should go away.  
I guess it has

been a while...


r~


I have a message bookmarked here ([1]), but I don't know if there is a
previous one with a more in depth scheme.
Anyway, I was also analyzing recently the idea of removing all these
reset_fpstatus() calls from instructions helpers. I think this would
require to actually call it from the end of the (previous) instructions 
instead of the beginning? Like adding the call to

do_float_check_status() and float_invalid_op_*() as a focal point to
'hide' the calls to reset_fpstatus(). However there are also insns
helpers that don't call these auxiliary functions, which I think would
cause the refactor to not be worthy overall.
Did you have another idea that could be simpler?

[1] https://lists.gnu.org/archive/html/qemu-devel/2020-05/msg00064.html


--
Víctor Cora Colombo
Instituto de Pesquisas ELDORADO
Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>


Re: [PATCH 3/3] target/ppc: Merge fsqrt and fsqrts helpers

2022-09-05 Thread Víctor Colombo

On 05/09/2022 12:56, Richard Henderson wrote:

On 9/5/22 13:37, Víctor Colombo wrote:

These two helpers are almost identical, differing only by the softfloat
operation it calls. Merge them into one using a macro.
Also, take this opportunity to capitalize the helper name as we moved
the instruction to decodetree in a previous patch.

Signed-off-by: Víctor Colombo 
---
  target/ppc/fpu_helper.c    | 35 +++---
  target/ppc/helper.h    |  4 ++--
  target/ppc/translate/fp-impl.c.inc |  4 ++--
  3 files changed, 17 insertions(+), 26 deletions(-)

diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index 0f045b70f8..32995179b5 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c
@@ -830,30 +830,21 @@ static void float_invalid_op_sqrt(CPUPPCState 
*env, int flags,

  }
  }

-/* fsqrt - fsqrt. */
-float64 helper_fsqrt(CPUPPCState *env, float64 arg)
-{
-    float64 ret = float64_sqrt(arg, >fp_status);
-    int flags = get_float_exception_flags(>fp_status);
-
-    if (unlikely(flags & float_flag_invalid)) {
-    float_invalid_op_sqrt(env, flags, 1, GETPC());
-    }
-
-    return ret;
+#define FPU_FSQRT(name, 
op)   \
+float64 helper_##name(CPUPPCState *env, float64 
arg)  \
+{ 
\
+    float64 ret = op(arg, 
>fp_status);   \
+    int flags = 
get_float_exception_flags(>fp_status);   \
+  
\
+    if (unlikely(flags & float_flag_invalid)) 
{   \
+    float_invalid_op_sqrt(env, flags, 1, 
GETPC());    \
+
} 
\


Existing bug, but this is missing to clear fp status to start.

Reviewed-by: Richard Henderson 

r~



Hello Richard, thanks for your review!
gen_reset_fpstatus() is called by the inline implementation in
do_helper_fsqrt() before calling the helper (patch 1).
It's probably better to move the call to inside the helper.

+  
\
+    return 
ret;   \

  }

-/* fsqrts - fsqrts. */
-float64 helper_fsqrts(CPUPPCState *env, float64 arg)
-{
-    float64 ret = float64r32_sqrt(arg, >fp_status);
-    int flags = get_float_exception_flags(>fp_status);
-
-    if (unlikely(flags & float_flag_invalid)) {
-    float_invalid_op_sqrt(env, flags, 1, GETPC());
-    }
-    return ret;
-}
+FPU_FSQRT(FSQRT, float64_sqrt)
+FPU_FSQRT(FSQRTS, float64r32_sqrt)

  /* fre - fre. */
  float64 helper_fre(CPUPPCState *env, float64 arg)
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 159b352f6e..68610896b8 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -116,8 +116,8 @@ DEF_HELPER_4(fmadds, i64, env, i64, i64, i64)
  DEF_HELPER_4(fmsubs, i64, env, i64, i64, i64)
  DEF_HELPER_4(fnmadds, i64, env, i64, i64, i64)
  DEF_HELPER_4(fnmsubs, i64, env, i64, i64, i64)
-DEF_HELPER_2(fsqrt, f64, env, f64)
-DEF_HELPER_2(fsqrts, f64, env, f64)
+DEF_HELPER_2(FSQRT, f64, env, f64)
+DEF_HELPER_2(FSQRTS, f64, env, f64)
  DEF_HELPER_2(fre, i64, env, i64)
  DEF_HELPER_2(fres, i64, env, i64)
  DEF_HELPER_2(frsqrte, i64, env, i64)
diff --git a/target/ppc/translate/fp-impl.c.inc 
b/target/ppc/translate/fp-impl.c.inc

index 7a90c0e350..8d5cf0f982 100644
--- a/target/ppc/translate/fp-impl.c.inc
+++ b/target/ppc/translate/fp-impl.c.inc
@@ -280,8 +280,8 @@ static bool do_helper_fsqrt(DisasContext *ctx, 
arg_A_tb *a,

  return true;
  }

-TRANS(FSQRT, do_helper_fsqrt, gen_helper_fsqrt);
-TRANS(FSQRTS, do_helper_fsqrt, gen_helper_fsqrts);
+TRANS(FSQRT, do_helper_fsqrt, gen_helper_FSQRT);
+TRANS(FSQRTS, do_helper_fsqrt, gen_helper_FSQRTS);

  /*** Floating-Point 
multiply-and-add   ***/

  /* fmadd - fmadds */





--
Víctor Cora Colombo
Instituto de Pesquisas ELDORADO
Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>


[PATCH 1/3] target/ppc: Move fsqrt to decodetree

2022-09-05 Thread Víctor Colombo
Signed-off-by: Víctor Colombo 
---
 target/ppc/insn32.decode   |  7 +++
 target/ppc/translate/fp-impl.c.inc | 29 +
 target/ppc/translate/fp-ops.c.inc  |  1 -
 3 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index eb41efc100..b55d1550f3 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -20,6 +20,9 @@
   frt fra frb frc rc:bool
 @A  .. frt:5 fra:5 frb:5 frc:5 . rc:1   
 
+_tb   frt frb rc:bool
+@A_tb   .. frt:5 . frb:5 . . rc:1   _tb
+
   rt ra si:int64_t
 @D  .. rt:5 ra:5 si:s16 
 
@@ -353,6 +356,10 @@ STFDU   110111 . .. ... @D
 STFDX   01 . ..  1011010111 -   @X
 STFDUX  01 . ..  100111 -   @X
 
+### Floating-Point Arithmetic Instructions
+
+FSQRT   11 . - . - 10110 .  @A_tb
+
 ### Floating-Point Select Instruction
 
 FSEL11 . . . . 10111 .  @A
diff --git a/target/ppc/translate/fp-impl.c.inc 
b/target/ppc/translate/fp-impl.c.inc
index 0e893eafa7..e8359af005 100644
--- a/target/ppc/translate/fp-impl.c.inc
+++ b/target/ppc/translate/fp-impl.c.inc
@@ -254,29 +254,34 @@ static bool trans_FSEL(DisasContext *ctx, arg_A *a)
 GEN_FLOAT_AB(sub, 0x14, 0x07C0, 1, PPC_FLOAT);
 /* Optional: */
 
-/* fsqrt */
-static void gen_fsqrt(DisasContext *ctx)
+static bool do_helper_fsqrt(DisasContext *ctx, arg_A_tb *a,
+void (*helper)(TCGv_i64, TCGv_ptr, TCGv_i64))
 {
-TCGv_i64 t0;
-TCGv_i64 t1;
-if (unlikely(!ctx->fpu_enabled)) {
-gen_exception(ctx, POWERPC_EXCP_FPU);
-return;
-}
+TCGv_i64 t0, t1;
+
+REQUIRE_INSNS_FLAGS(ctx, FLOAT_FSQRT);
+REQUIRE_FPU(ctx);
+
 t0 = tcg_temp_new_i64();
 t1 = tcg_temp_new_i64();
+
 gen_reset_fpstatus();
-get_fpr(t0, rB(ctx->opcode));
-gen_helper_fsqrt(t1, cpu_env, t0);
-set_fpr(rD(ctx->opcode), t1);
+get_fpr(t0, a->frb);
+helper(t1, cpu_env, t0);
+set_fpr(a->frt, t1);
 gen_compute_fprf_float64(t1);
-if (unlikely(Rc(ctx->opcode) != 0)) {
+if (unlikely(a->rc != 0)) {
 gen_set_cr1_from_fpscr(ctx);
 }
+
 tcg_temp_free_i64(t0);
 tcg_temp_free_i64(t1);
+
+return true;
 }
 
+TRANS(FSQRT, do_helper_fsqrt, gen_helper_fsqrt);
+
 static void gen_fsqrts(DisasContext *ctx)
 {
 TCGv_i64 t0;
diff --git a/target/ppc/translate/fp-ops.c.inc 
b/target/ppc/translate/fp-ops.c.inc
index 1b65f5ab73..38759f5939 100644
--- a/target/ppc/translate/fp-ops.c.inc
+++ b/target/ppc/translate/fp-ops.c.inc
@@ -62,7 +62,6 @@ GEN_HANDLER_E(stfdepx, 0x1F, 0x1F, 0x16, 0x0001, 
PPC_NONE, PPC2_BOOKE206),
 GEN_HANDLER_E(stfdpx, 0x1F, 0x17, 0x1C, 0x0021, PPC_NONE, PPC2_ISA205),
 
 GEN_HANDLER(frsqrtes, 0x3B, 0x1A, 0xFF, 0x001F07C0, PPC_FLOAT_FRSQRTES),
-GEN_HANDLER(fsqrt, 0x3F, 0x16, 0xFF, 0x001F07C0, PPC_FLOAT_FSQRT),
 GEN_HANDLER(fsqrts, 0x3B, 0x16, 0xFF, 0x001F07C0, PPC_FLOAT_FSQRT),
 GEN_HANDLER(fcmpo, 0x3F, 0x00, 0x01, 0x0061, PPC_FLOAT),
 GEN_HANDLER(fcmpu, 0x3F, 0x00, 0x00, 0x0061, PPC_FLOAT),
-- 
2.25.1




[PATCH 3/3] target/ppc: Merge fsqrt and fsqrts helpers

2022-09-05 Thread Víctor Colombo
These two helpers are almost identical, differing only by the softfloat
operation it calls. Merge them into one using a macro.
Also, take this opportunity to capitalize the helper name as we moved
the instruction to decodetree in a previous patch.

Signed-off-by: Víctor Colombo 
---
 target/ppc/fpu_helper.c| 35 +++---
 target/ppc/helper.h|  4 ++--
 target/ppc/translate/fp-impl.c.inc |  4 ++--
 3 files changed, 17 insertions(+), 26 deletions(-)

diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index 0f045b70f8..32995179b5 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c
@@ -830,30 +830,21 @@ static void float_invalid_op_sqrt(CPUPPCState *env, int 
flags,
 }
 }
 
-/* fsqrt - fsqrt. */
-float64 helper_fsqrt(CPUPPCState *env, float64 arg)
-{
-float64 ret = float64_sqrt(arg, >fp_status);
-int flags = get_float_exception_flags(>fp_status);
-
-if (unlikely(flags & float_flag_invalid)) {
-float_invalid_op_sqrt(env, flags, 1, GETPC());
-}
-
-return ret;
+#define FPU_FSQRT(name, op)   \
+float64 helper_##name(CPUPPCState *env, float64 arg)  \
+{ \
+float64 ret = op(arg, >fp_status);   \
+int flags = get_float_exception_flags(>fp_status);   \
+  \
+if (unlikely(flags & float_flag_invalid)) {   \
+float_invalid_op_sqrt(env, flags, 1, GETPC());\
+} \
+  \
+return ret;   \
 }
 
-/* fsqrts - fsqrts. */
-float64 helper_fsqrts(CPUPPCState *env, float64 arg)
-{
-float64 ret = float64r32_sqrt(arg, >fp_status);
-int flags = get_float_exception_flags(>fp_status);
-
-if (unlikely(flags & float_flag_invalid)) {
-float_invalid_op_sqrt(env, flags, 1, GETPC());
-}
-return ret;
-}
+FPU_FSQRT(FSQRT, float64_sqrt)
+FPU_FSQRT(FSQRTS, float64r32_sqrt)
 
 /* fre - fre. */
 float64 helper_fre(CPUPPCState *env, float64 arg)
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 159b352f6e..68610896b8 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -116,8 +116,8 @@ DEF_HELPER_4(fmadds, i64, env, i64, i64, i64)
 DEF_HELPER_4(fmsubs, i64, env, i64, i64, i64)
 DEF_HELPER_4(fnmadds, i64, env, i64, i64, i64)
 DEF_HELPER_4(fnmsubs, i64, env, i64, i64, i64)
-DEF_HELPER_2(fsqrt, f64, env, f64)
-DEF_HELPER_2(fsqrts, f64, env, f64)
+DEF_HELPER_2(FSQRT, f64, env, f64)
+DEF_HELPER_2(FSQRTS, f64, env, f64)
 DEF_HELPER_2(fre, i64, env, i64)
 DEF_HELPER_2(fres, i64, env, i64)
 DEF_HELPER_2(frsqrte, i64, env, i64)
diff --git a/target/ppc/translate/fp-impl.c.inc 
b/target/ppc/translate/fp-impl.c.inc
index 7a90c0e350..8d5cf0f982 100644
--- a/target/ppc/translate/fp-impl.c.inc
+++ b/target/ppc/translate/fp-impl.c.inc
@@ -280,8 +280,8 @@ static bool do_helper_fsqrt(DisasContext *ctx, arg_A_tb *a,
 return true;
 }
 
-TRANS(FSQRT, do_helper_fsqrt, gen_helper_fsqrt);
-TRANS(FSQRTS, do_helper_fsqrt, gen_helper_fsqrts);
+TRANS(FSQRT, do_helper_fsqrt, gen_helper_FSQRT);
+TRANS(FSQRTS, do_helper_fsqrt, gen_helper_FSQRTS);
 
 /*** Floating-Point multiply-and-add   ***/
 /* fmadd - fmadds */
-- 
2.25.1




[PATCH 0/3] target/ppc: Move fsqrt[s] to decodetree

2022-09-05 Thread Víctor Colombo
Move fsqrt and fsqrts instructions from decode legacy to decodetree.

Víctor Colombo (3):
  target/ppc: Move fsqrt to decodetree
  target/ppc: Move fsqrts to decodetree
  target/ppc: Merge fsqrt and fsqrts helpers

 target/ppc/fpu_helper.c| 35 -
 target/ppc/helper.h|  4 +--
 target/ppc/insn32.decode   |  8 +
 target/ppc/translate/fp-impl.c.inc | 50 ++
 target/ppc/translate/fp-ops.c.inc  |  2 --
 5 files changed, 40 insertions(+), 59 deletions(-)

-- 
2.25.1




[PATCH 2/3] target/ppc: Move fsqrts to decodetree

2022-09-05 Thread Víctor Colombo
Signed-off-by: Víctor Colombo 
---
 target/ppc/insn32.decode   |  1 +
 target/ppc/translate/fp-impl.c.inc | 23 +--
 target/ppc/translate/fp-ops.c.inc  |  1 -
 3 files changed, 2 insertions(+), 23 deletions(-)

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index b55d1550f3..7eb808a43f 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -359,6 +359,7 @@ STFDUX  01 . ..  100111 -   @X
 ### Floating-Point Arithmetic Instructions
 
 FSQRT   11 . - . - 10110 .  @A_tb
+FSQRTS  111011 . - . - 10110 .  @A_tb
 
 ### Floating-Point Select Instruction
 
diff --git a/target/ppc/translate/fp-impl.c.inc 
b/target/ppc/translate/fp-impl.c.inc
index e8359af005..7a90c0e350 100644
--- a/target/ppc/translate/fp-impl.c.inc
+++ b/target/ppc/translate/fp-impl.c.inc
@@ -281,28 +281,7 @@ static bool do_helper_fsqrt(DisasContext *ctx, arg_A_tb *a,
 }
 
 TRANS(FSQRT, do_helper_fsqrt, gen_helper_fsqrt);
-
-static void gen_fsqrts(DisasContext *ctx)
-{
-TCGv_i64 t0;
-TCGv_i64 t1;
-if (unlikely(!ctx->fpu_enabled)) {
-gen_exception(ctx, POWERPC_EXCP_FPU);
-return;
-}
-t0 = tcg_temp_new_i64();
-t1 = tcg_temp_new_i64();
-gen_reset_fpstatus();
-get_fpr(t0, rB(ctx->opcode));
-gen_helper_fsqrts(t1, cpu_env, t0);
-set_fpr(rD(ctx->opcode), t1);
-gen_compute_fprf_float64(t1);
-if (unlikely(Rc(ctx->opcode) != 0)) {
-gen_set_cr1_from_fpscr(ctx);
-}
-tcg_temp_free_i64(t0);
-tcg_temp_free_i64(t1);
-}
+TRANS(FSQRTS, do_helper_fsqrt, gen_helper_fsqrts);
 
 /*** Floating-Point multiply-and-add   ***/
 /* fmadd - fmadds */
diff --git a/target/ppc/translate/fp-ops.c.inc 
b/target/ppc/translate/fp-ops.c.inc
index 38759f5939..d4c6c4bed1 100644
--- a/target/ppc/translate/fp-ops.c.inc
+++ b/target/ppc/translate/fp-ops.c.inc
@@ -62,7 +62,6 @@ GEN_HANDLER_E(stfdepx, 0x1F, 0x1F, 0x16, 0x0001, 
PPC_NONE, PPC2_BOOKE206),
 GEN_HANDLER_E(stfdpx, 0x1F, 0x17, 0x1C, 0x0021, PPC_NONE, PPC2_ISA205),
 
 GEN_HANDLER(frsqrtes, 0x3B, 0x1A, 0xFF, 0x001F07C0, PPC_FLOAT_FRSQRTES),
-GEN_HANDLER(fsqrts, 0x3B, 0x16, 0xFF, 0x001F07C0, PPC_FLOAT_FSQRT),
 GEN_HANDLER(fcmpo, 0x3F, 0x00, 0x01, 0x0061, PPC_FLOAT),
 GEN_HANDLER(fcmpu, 0x3F, 0x00, 0x00, 0x0061, PPC_FLOAT),
 GEN_HANDLER(fabs, 0x3F, 0x08, 0x08, 0x001F, PPC_FLOAT),
-- 
2.25.1




[PATCH 19/19] target/ppc: Clear fpstatus flags on VSX_ROUND

2022-09-01 Thread Víctor Colombo
Fix xsrdpic and other instructions not clearing the flags and saving
incorrect values to FPSCR.

Signed-off-by: Víctor Colombo 
---
 target/ppc/fpu_helper.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index fd3a966371..be47d73b14 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c
@@ -3172,6 +3172,8 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, 
ppc_vsr_t *xb)   \
 int i; \
 FloatRoundMode curr_rounding_mode; \
\
+helper_reset_fpstatus(env);\
+   \
 if (rmode != FLOAT_ROUND_CURRENT) {\
 curr_rounding_mode = get_float_rounding_mode(>fp_status); \
 set_float_rounding_mode(rmode, >fp_status);   \
-- 
2.25.1




[PATCH 18/19] target/ppc: Clear fpstatus flags on VSX_CMP

2022-09-01 Thread Víctor Colombo
Signed-off-by: Víctor Colombo 
---
 target/ppc/fpu_helper.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index 5f7f52ab5b..fd3a966371 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c
@@ -2639,6 +2639,8 @@ uint32_t helper_##op(CPUPPCState *env, ppc_vsr_t *xt, 
\
 int all_true = 1; \
 int all_false = 1;\
   \
+helper_reset_fpstatus(env);   \
+  \
 for (i = 0; i < nels; i++) {  \
 if (unlikely(tp##_is_any_nan(xa->fld) ||  \
  tp##_is_any_nan(xb->fld))) { \
-- 
2.25.1




[PATCH 17/19] target/ppc: Clear fpstatus flags for xscvdpsp[n]

2022-09-01 Thread Víctor Colombo
Signed-off-by: Víctor Colombo 
---
 target/ppc/fpu_helper.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index 51142bd7d5..5f7f52ab5b 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c
@@ -2719,6 +2719,8 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, 
ppc_vsr_t *xb)  \
 ppc_vsr_t t = { };\
 int i;\
   \
+helper_reset_fpstatus(env);   \
+  \
 for (i = 0; i < nels; i++) {  \
 t.VsrW(2 * i) = stp##_to_##ttp(xb->VsrD(i), >fp_status); \
 if (unlikely(stp##_is_signaling_nan(xb->VsrD(i),  \
@@ -2865,6 +2867,7 @@ uint64_t helper_xscvdpspn(CPUPPCState *env, uint64_t xb)
 {
 uint64_t result, sign, exp, frac;
 
+helper_reset_fpstatus(env);
 float_status tstat = env->fp_status;
 set_float_exception_flags(0, );
 
-- 
2.25.1




[PATCH 15/19] target/ppc: Clear fpstatus flags on VSX_CVT_FP_TO_FP_VECTOR

2022-09-01 Thread Víctor Colombo
Signed-off-by: Víctor Colombo 
---
 target/ppc/fpu_helper.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index 39f0ec7431..627166672c 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c
@@ -2756,6 +2756,8 @@ void helper_##op(CPUPPCState *env, uint32_t opcode,   
  \
 ppc_vsr_t t = *xt;  \
 int i;  \
 \
+helper_reset_fpstatus(env); \
+\
 for (i = 0; i < nels; i++) {\
 t.tfld = stp##_to_##ttp(xb->sfld, >fp_status); \
 if (unlikely(stp##_is_signaling_nan(xb->sfld,   \
-- 
2.25.1




[PATCH 16/19] target/ppc: Clear fpstatus flags for xscvqpdp

2022-09-01 Thread Víctor Colombo
Signed-off-by: Víctor Colombo 
---
 target/ppc/fpu_helper.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index 627166672c..51142bd7d5 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c
@@ -2842,6 +2842,8 @@ void helper_XSCVQPDP(CPUPPCState *env, uint32_t ro, 
ppc_vsr_t *xt,
 ppc_vsr_t t = { };
 float_status tstat;
 
+helper_reset_fpstatus(env);
+
 tstat = env->fp_status;
 if (ro != 0) {
 tstat.float_rounding_mode = float_round_to_odd;
-- 
2.25.1




[PATCH 12/19] target/ppc: Clear fpstatus flags on VSX_CVT_FP_TO_INT2

2022-09-01 Thread Víctor Colombo
Signed-off-by: Víctor Colombo 
---
 target/ppc/fpu_helper.c | 8 +++-
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index c6dc14d653..6f9a1a755e 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c
@@ -2981,12 +2981,12 @@ VSX_CVT_FP_TO_INT128(XSCVQPSQZ, int128, 
0x8000ULL);
 #define VSX_CVT_FP_TO_INT2(op, nels, stp, ttp, sfi, rnan)\
 void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb) \
 {\
-int all_flags = env->fp_status.float_exception_flags, flags; \
 ppc_vsr_t t = { };   \
-int i;   \
+int i, flags;\
+ \
+helper_reset_fpstatus(env);  \
  \
 for (i = 0; i < nels; i++) { \
-env->fp_status.float_exception_flags = 0;\
 t.VsrW(2 * i) = stp##_to_##ttp##_round_to_zero(xb->VsrD(i),  \
>fp_status); \
 flags = env->fp_status.float_exception_flags;\
@@ -2995,11 +2995,9 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, 
ppc_vsr_t *xb) \
   rnan, 0, GETPC()); \
 }\
 t.VsrW(2 * i + 1) = t.VsrW(2 * i);   \
-all_flags |= flags;  \
 }\
  \
 *xt = t; \
-env->fp_status.float_exception_flags = all_flags;\
 do_float_check_status(env, sfi, GETPC());\
 }
 
-- 
2.25.1




[PATCH 14/19] target/ppc: Clear fpstatus flags on VSX_CVT_FP_TO_FP_HP

2022-09-01 Thread Víctor Colombo
Signed-off-by: Víctor Colombo 
---
 target/ppc/fpu_helper.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index eb16fb20a9..39f0ec7431 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c
@@ -2791,6 +2791,8 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, 
ppc_vsr_t *xb)   \
 ppc_vsr_t t = { }; \
 int i; \
\
+helper_reset_fpstatus(env);\
+   \
 for (i = 0; i < nels; i++) {   \
 t.tfld = stp##_to_##ttp(xb->sfld, 1, >fp_status); \
 if (unlikely(stp##_is_signaling_nan(xb->sfld,  \
-- 
2.25.1




[PATCH 10/19] target/ppc: Clear fpstatus flags on VSX_CVT_FP_TO_FP

2022-09-01 Thread Víctor Colombo
Signed-off-by: Víctor Colombo 
---
 target/ppc/fpu_helper.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index ceb70ed36e..8a20413a78 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c
@@ -2692,6 +2692,8 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, 
ppc_vsr_t *xb)   \
 ppc_vsr_t t = { }; \
 int i; \
\
+helper_reset_fpstatus(env);\
+   \
 for (i = 0; i < nels; i++) {   \
 t.tfld = stp##_to_##ttp(xb->sfld, >fp_status);\
 if (unlikely(stp##_is_signaling_nan(xb->sfld,  \
-- 
2.25.1




[PATCH 09/19] target/ppc: Clear fpstatus flags on VSX_CVT_INT_TO_FP

2022-09-01 Thread Víctor Colombo
Signed-off-by: Víctor Colombo 
---
 target/ppc/fpu_helper.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index 94029883c7..ceb70ed36e 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c
@@ -3057,6 +3057,8 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, 
ppc_vsr_t *xb)\
 ppc_vsr_t t = { };  \
 int i;  \
 \
+helper_reset_fpstatus(env); \
+\
 for (i = 0; i < nels; i++) {\
 t.tfld = stp##_to_##ttp(xb->sfld, >fp_status); \
 if (r2sp) { \
-- 
2.25.1




[PATCH 13/19] target/ppc: Clear fpstatus flags on VSX_CVT_FP_TO_INT

2022-09-01 Thread Víctor Colombo
Signed-off-by: Víctor Colombo 
---
 target/ppc/fpu_helper.c | 8 +++-
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index 6f9a1a755e..eb16fb20a9 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c
@@ -2914,22 +2914,20 @@ uint64_t helper_XSCVSPDPN(uint64_t xb)
 #define VSX_CVT_FP_TO_INT(op, nels, stp, ttp, sfld, tfld, sfi, rnan) \
 void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb) \
 {\
-int all_flags = env->fp_status.float_exception_flags, flags; \
 ppc_vsr_t t = { };   \
-int i;   \
+int i, flags;\
+ \
+helper_reset_fpstatus(env);  \
  \
 for (i = 0; i < nels; i++) { \
-env->fp_status.float_exception_flags = 0;\
 t.tfld = stp##_to_##ttp##_round_to_zero(xb->sfld, >fp_status);  \
 flags = env->fp_status.float_exception_flags;\
 if (unlikely(flags & float_flag_invalid)) {  \
 t.tfld = float_invalid_cvt(env, flags, t.tfld, rnan, 0, GETPC());\
 }\
-all_flags |= flags;  \
 }\
  \
 *xt = t; \
-env->fp_status.float_exception_flags = all_flags;\
 do_float_check_status(env, sfi, GETPC());\
 }
 
-- 
2.25.1




[PATCH 07/19] target/ppc: Zero second doubleword of VSR registers for FPR insns

2022-09-01 Thread Víctor Colombo
FPR register are mapped to the first doubleword of the VSR registers.
Since PowerISA v3.1, the second doubleword of the target register
must be zeroed for FP instructions.

This patch does it by writting 0 to the second dw everytime the
first dw is being written using set_fpr.

Signed-off-by: Víctor Colombo 
---
 target/ppc/translate.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 388337f81b..a0fa419a1f 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -6443,6 +6443,14 @@ static inline void get_fpr(TCGv_i64 dst, int regno)
 static inline void set_fpr(int regno, TCGv_i64 src)
 {
 tcg_gen_st_i64(src, cpu_env, fpr_offset(regno));
+/*
+ * Before PowerISA v3.1 the result of doubleword 1 of the VSR
+ * corresponding to the target FPR was undefined. However,
+ * most (if not all) real hardware were setting the result to 0.
+ * Starting at ISA v3.1, the result for doubleword 1 is now defined
+ * to be 0.
+ */
+tcg_gen_st_i64(tcg_constant_i64(0), cpu_env, vsr64_offset(regno, false));
 }
 
 static inline void get_avr64(TCGv_i64 dst, int regno, bool high)
-- 
2.25.1




[PATCH 08/19] target/ppc: Clear fpstatus flags on VSX_CVT_INT_TO_FP_VECTOR

2022-09-01 Thread Víctor Colombo
Fix xscvsdqp incorrectly not clearing the FI bit.
Power ISA states that xscvsdqp should always set FPSCR.FI=0
Right now, QEMU sometimes is incorrectly setting the flag to 1.

Signed-off-by: Víctor Colombo 
---
 target/ppc/fpu_helper.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index da79c64eca..94029883c7 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c
@@ -3126,6 +3126,7 @@ void helper_##op(CPUPPCState *env, uint32_t opcode,   
  \
 {   \
 ppc_vsr_t t = *xt;  \
 \
+helper_reset_fpstatus(env); \
 t.tfld = stp##_to_##ttp(xb->sfld, >fp_status); \
 helper_compute_fprf_##ttp(env, t.tfld); \
 \
-- 
2.25.1




[PATCH 11/19] target/ppc: Clear fpstatus flags on VSX_CVT_FP_TO_INT_VECTOR

2022-09-01 Thread Víctor Colombo
Signed-off-by: Víctor Colombo 
---
 target/ppc/fpu_helper.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index 8a20413a78..c6dc14d653 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c
@@ -3024,6 +3024,8 @@ void helper_##op(CPUPPCState *env, uint32_t opcode,   
   \
 ppc_vsr_t t = { };   \
 int flags;   \
  \
+helper_reset_fpstatus(env);  \
+ \
 t.tfld = stp##_to_##ttp##_round_to_zero(xb->sfld, >fp_status);  \
 flags = get_float_exception_flags(>fp_status);  \
 if (flags & float_flag_invalid) {\
@@ -3036,7 +3038,6 @@ void helper_##op(CPUPPCState *env, uint32_t opcode,   
   \
 
 VSX_CVT_FP_TO_INT_VECTOR(xscvqpsdz, float128, int64, f128, VsrD(0),  \
   0x8000ULL)
-
 VSX_CVT_FP_TO_INT_VECTOR(xscvqpswz, float128, int32, f128, VsrD(0),  \
   0x8000ULL)
 VSX_CVT_FP_TO_INT_VECTOR(xscvqpudz, float128, uint64, f128, VsrD(0), 0x0ULL)
-- 
2.25.1




[PATCH 06/19] target/ppc: Set OV32 when OV is set

2022-09-01 Thread Víctor Colombo
According to PowerISA: "OV32 is set whenever OV is implicitly set, and
is set to the same value that OV is defined to be set to in 32-bit
mode".

This patch changes helper_update_ov_legacy to set/clear ov32 when
applicable.

Signed-off-by: Víctor Colombo 
---
 target/ppc/int_helper.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index d905f07d02..696096100b 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -37,9 +37,9 @@
 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov)
 {
 if (unlikely(ov)) {
-env->so = env->ov = 1;
+env->so = env->ov = env->ov32 = 1;
 } else {
-env->ov = 0;
+env->ov = env->ov32 = 0;
 }
 }
 
-- 
2.25.1




[PATCH 05/19] target/ppc: Zero second doubleword for VSX madd instructions

2022-09-01 Thread Víctor Colombo
In 205eb5a89e we updated most VSX instructions to zero the
second doubleword, as is requested by PowerISA since v3.1.
However, VSX_MADD helper was left behind unchanged, while it
is also affected and should be fixed as well.

This patch applies the fix for MADD instructions.

Signed-off-by: Víctor Colombo 
---
 target/ppc/fpu_helper.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index 7ab6beadad..da79c64eca 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c
@@ -2178,7 +2178,7 @@ VSX_TSQRT(xvtsqrtsp, 4, float32, VsrW(i), -126, 23)
 void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, \
  ppc_vsr_t *s1, ppc_vsr_t *s2, ppc_vsr_t *s3) \
 { \
-ppc_vsr_t t = *xt;\
+ppc_vsr_t t = { };\
 int i;\
   \
 helper_reset_fpstatus(env);   \
-- 
2.25.1




[PATCH 01/19] target/ppc: Remove extra space from s128 field in ppc_vsr_t

2022-09-01 Thread Víctor Colombo
Very trivial rogue space removal. There are two spaces between Int128
and s128 in ppc_vsr_t struct, where it should be only one.

Signed-off-by: Víctor Colombo 
---
 target/ppc/cpu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index a4c893cfad..985ff86f55 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -246,7 +246,7 @@ typedef union _ppc_vsr_t {
 #ifdef CONFIG_INT128
 __uint128_t u128;
 #endif
-Int128  s128;
+Int128 s128;
 } ppc_vsr_t;
 
 typedef ppc_vsr_t ppc_avr_t;
-- 
2.25.1




[PATCH 04/19] target/ppc: Set result to QNaN for DENBCD when VXCVI occurs

2022-09-01 Thread Víctor Colombo
According to the ISA, for instruction DENBCD:
"If an invalid BCD digit or sign code is detected in the source
operand, an invalid-operation exception (VXCVI) occurs."

In the Invalid Operation Exception section, there is the situation:
"When Invalid Operation Exception is disabled (VE=0) and Invalid
Operation occurs (...) If the operation is an (...) or format the
target FPR is set to a Quiet NaN". This was not being done in
QEMU.

This patch sets the result to QNaN when the instruction DENBCD causes
an Invalid Operation Exception.

Signed-off-by: Víctor Colombo 
---
 target/ppc/dfp_helper.c | 26 --
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/target/ppc/dfp_helper.c b/target/ppc/dfp_helper.c
index be7aa5357a..cc024316d5 100644
--- a/target/ppc/dfp_helper.c
+++ b/target/ppc/dfp_helper.c
@@ -1147,6 +1147,26 @@ static inline uint8_t dfp_get_bcd_digit_128(ppc_vsr_t 
*t, unsigned n)
 return t->VsrD((n & 0x10) ? 0 : 1) >> ((n << 2) & 63) & 15;
 }
 
+static inline void dfp_invalid_op_vxcvi_64(struct PPC_DFP *dfp)
+{
+/* TODO: fpscr is incorrectly not being saved to env */
+dfp_set_FPSCR_flag(dfp, FP_VX | FP_VXCVI, FPSCR_VE);
+if ((dfp->env->fpscr & FP_VE) == 0) {
+dfp->vt.VsrD(1) = 0x7c00; /* QNaN */
+}
+}
+
+
+static inline void dfp_invalid_op_vxcvi_128(struct PPC_DFP *dfp)
+{
+/* TODO: fpscr is incorrectly not being saved to env */
+dfp_set_FPSCR_flag(dfp, FP_VX | FP_VXCVI, FPSCR_VE);
+if ((dfp->env->fpscr & FP_VE) == 0) {
+dfp->vt.VsrD(0) = 0x7c00; /* QNaN */
+dfp->vt.VsrD(1) = 0x0;
+}
+}
+
 #define DFP_HELPER_ENBCD(op, size)   \
 void helper_##op(CPUPPCState *env, ppc_fprp_t *t, ppc_fprp_t *b, \
  uint32_t s) \
@@ -1173,7 +1193,8 @@ void helper_##op(CPUPPCState *env, ppc_fprp_t *t, 
ppc_fprp_t *b, \
 sgn = 0; \
 break;   \
 default: \
-dfp_set_FPSCR_flag(, FP_VX | FP_VXCVI, FPSCR_VE);\
+dfp_invalid_op_vxcvi_##size();   \
+set_dfp##size(t, );   \
 return;  \
 }\
 }\
@@ -1183,7 +1204,8 @@ void helper_##op(CPUPPCState *env, ppc_fprp_t *t, 
ppc_fprp_t *b, \
 digits[(size) / 4 - n] = dfp_get_bcd_digit_##size(,   \
   offset++); \
 if (digits[(size) / 4 - n] > 10) {   \
-dfp_set_FPSCR_flag(, FP_VX | FP_VXCVI, FPSCR_VE);\
+dfp_invalid_op_vxcvi_##size();   \
+set_dfp##size(t, );   \
 return;  \
 } else { \
 nonzero |= (digits[(size) / 4 - n] > 0); \
-- 
2.25.1




[PATCH 02/19] target/ppc: Remove unused xer_* macros

2022-09-01 Thread Víctor Colombo
The macros xer_ov, xer_ca, xer_ov32, and xer_ca32 are both unused and
hiding the usage of env. Remove them.

Signed-off-by: Víctor Colombo 
---
 target/ppc/cpu.h | 4 
 1 file changed, 4 deletions(-)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 985ff86f55..6481f48087 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -1506,10 +1506,6 @@ void ppc_compat_add_property(Object *obj, const char 
*name,
 #define XER_CMP  8
 #define XER_BC   0
 #define xer_so  (env->so)
-#define xer_ov  (env->ov)
-#define xer_ca  (env->ca)
-#define xer_ov32  (env->ov)
-#define xer_ca32  (env->ca)
 #define xer_cmp ((env->xer >> XER_CMP) & 0xFF)
 #define xer_bc  ((env->xer >> XER_BC)  & 0x7F)
 
-- 
2.25.1




[PATCH 03/19] target/ppc: Zero second doubleword in DFP instructions

2022-09-01 Thread Víctor Colombo
Starting at PowerISA v3.1, the second doubleword of the registers
used to store results in DFP instructions are supposed to be zeroed.

>From the ISA, chapter 7.2.1.1 Floating-Point Registers:
"""
Chapter 4. Floating-Point Facility provides 32 64-bit
FPRs. Chapter 5. Decimal Floating-Point also employs
FPRs in decimal floating-point (DFP) operations. When
VSX is implemented, the 32 FPRs are mapped to
doubleword 0 of VSRs 0-31. (...)
All instructions that operate on an FPR are redefined
to operate on doubleword element 0 of the
corresponding VSR. (...)
and the contents of doubleword element 1 of the
VSR corresponding to the target FPR or FPR pair for these
instructions are set to 0.
"""

Before, the result stored at doubleword 1 was said to be undefined.

With that, this patch changes the DFP facility to zero doubleword 1
when using set_dfp64 and set_dfp128. This fixes the behavior for ISA
3.1 while keeping the behavior correct for previous ones.

Signed-off-by: Víctor Colombo 
---
 target/ppc/dfp_helper.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/target/ppc/dfp_helper.c b/target/ppc/dfp_helper.c
index 5ba74b2124..be7aa5357a 100644
--- a/target/ppc/dfp_helper.c
+++ b/target/ppc/dfp_helper.c
@@ -42,13 +42,16 @@ static void get_dfp128(ppc_vsr_t *dst, ppc_fprp_t *dfp)
 
 static void set_dfp64(ppc_fprp_t *dfp, ppc_vsr_t *src)
 {
-dfp->VsrD(0) = src->VsrD(1);
+dfp[0].VsrD(0) = src->VsrD(1);
+dfp[0].VsrD(1) = 0ULL;
 }
 
 static void set_dfp128(ppc_fprp_t *dfp, ppc_vsr_t *src)
 {
 dfp[0].VsrD(0) = src->VsrD(0);
 dfp[1].VsrD(0) = src->VsrD(1);
+dfp[0].VsrD(1) = 0ULL;
+dfp[1].VsrD(1) = 0ULL;
 }
 
 static void set_dfp128_to_avr(ppc_avr_t *dst, ppc_vsr_t *src)
-- 
2.25.1




[PATCH 00/19] Multiple ppc instructions fixes

2022-09-01 Thread Víctor Colombo
This patch set fixes multiple instructions for PPC targets that were
producing incorrect results, or setting the wrong bits in FPSCR.

Patch 1 is just a style fix, trivial.
Patches 8 through 19 add helper_reset_fpstatus() calls to instructions
that have an issue where the exception flags are being kept from
the previous instruction, causing incorrect bits to be set,
specially the non-sticky FI bit.
Other patches fixes other specific situations.

Víctor Colombo (19):
  target/ppc: Remove extra space from s128 field in ppc_vsr_t
  target/ppc: Remove unused xer_* macros
  target/ppc: Zero second doubleword in DFP instructions
  target/ppc: Set result to QNaN for DENBCD when VXCVI occurs
  target/ppc: Zero second doubleword for VSX madd instructions
  target/ppc: Set OV32 when OV is set
  target/ppc: Zero second doubleword of VSR registers for FPR insns
  target/ppc: Clear fpstatus flags on VSX_CVT_INT_TO_FP_VECTOR
  target/ppc: Clear fpstatus flags on VSX_CVT_INT_TO_FP
  target/ppc: Clear fpstatus flags on VSX_CVT_FP_TO_FP
  target/ppc: Clear fpstatus flags on VSX_CVT_FP_TO_INT_VECTOR
  target/ppc: Clear fpstatus flags on VSX_CVT_FP_TO_INT2
  target/ppc: Clear fpstatus flags on VSX_CVT_FP_TO_INT
  target/ppc: Clear fpstatus flags on VSX_CVT_FP_TO_FP_HP
  target/ppc: Clear fpstatus flags on VSX_CVT_FP_TO_FP_VECTOR
  target/ppc: Clear fpstatus flags for xscvqpdp
  target/ppc: Clear fpstatus flags for xscvdpsp[n]
  target/ppc: Clear fpstatus flags on VSX_CMP
  target/ppc: Clear fpstatus flags on VSX_ROUND

 target/ppc/cpu.h|  6 +-
 target/ppc/dfp_helper.c | 31 ---
 target/ppc/fpu_helper.c | 39 +++
 target/ppc/int_helper.c |  4 ++--
 target/ppc/translate.c  |  8 
 5 files changed, 66 insertions(+), 22 deletions(-)

-- 
2.25.1




Re: [PATCH v3] target/ppc: Implement new wait variants

2022-07-20 Thread Víctor Colombo
   * wc=1 (waitrsv) waits for an exception or a reservation to be lost.
+ * Reservation-loss may have implementation-specific conditions, so it
+ * can be implemented as a no-op.
+ *
+ * wc=2 waits for an exception or an amount of time to pass. This
+ * amount is implementation-specific so it can be implemented as a
+ * no-op.
+ *
+ * ISA v3.1 allows for execution to resume "in the rare case of
+ * an implementation-dependent event", so in any case software must
+ * not depend on the architected resumption condition to become
+ * true, so no-op implementations should be architecturally correct
+ * (if suboptimal).
+ */
  }

  #if defined(TARGET_PPC64)
@@ -6852,8 +6931,9 @@ GEN_HANDLER2(stdcx_, "stdcx.", 0x1F, 0x16, 0x06, 
0x, PPC_64B),
  GEN_HANDLER_E(stqcx_, 0x1F, 0x16, 0x05, 0, PPC_NONE, PPC2_LSQ_ISA207),
  #endif
  GEN_HANDLER(sync, 0x1F, 0x16, 0x12, 0x039FF801, PPC_MEM_SYNC),
-GEN_HANDLER(wait, 0x1F, 0x1E, 0x01, 0x03FFF801, PPC_WAIT),
-GEN_HANDLER_E(wait, 0x1F, 0x1E, 0x00, 0x039FF801, PPC_NONE, PPC2_ISA300),
+/* ISA v3.0 changed the extended opcode from 62 to 30 */
+GEN_HANDLER(wait, 0x1F, 0x1E, 0x01, 0x039FF801, PPC_WAIT),
+GEN_HANDLER_E(wait, 0x1F, 0x1E, 0x00, 0x039CF801, PPC_NONE, PPC2_ISA300),
  GEN_HANDLER(b, 0x12, 0xFF, 0xFF, 0x, PPC_FLOW),
  GEN_HANDLER(bc, 0x10, 0xFF, 0xFF, 0x, PPC_FLOW),
  GEN_HANDLER(bcctr, 0x13, 0x10, 0x10, 0x, PPC_FLOW),
--
2.35.1



Looks correct with what the ISA says. I reviewed mostly the flow
expected for each ISA, and this v2 looks ok now.
I didn't dive deep on the 'waiting' behavior itself, but assuming the
code is the same as was before, and the new considerations regarding
noop seems to be correct when compared with what the ISA says, LGTM

Reviewed-by: Víctor Colombo 

--
Víctor Cora Colombo
Instituto de Pesquisas ELDORADO
Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>


Re: [PATCH] target/ppc: fix unreachable code in do_ldst_quad()

2022-07-20 Thread Víctor Colombo

On 20/07/2022 10:57, Daniel Henrique Barboza wrote:

Coverity reports that commit fc34e81acd51 ("target/ppc: add macros to
check privilege level") turned the following code unreachable:

if (!prefixed && !(ctx->insns_flags2 & PPC2_LSQ_ISA207)) {
 /* lq and stq were privileged prior to V. 2.07 */
 REQUIRE_SV(ctx);


 CID 1490757:  Control flow issues  (UNREACHABLE)
 This code cannot be reached: "if (ctx->le_mode) {

 if (ctx->le_mode) {
 gen_align_no_le(ctx);
 return true;
 }
}

This happens because the macro REQUIRE_SV(), in CONFIG_USER_MODE, will
always result in a 'return true' statement.

Fix it by using "#if !defined(CONFIG_USER_ONLY)" to fold the code that
shouldn't be there if we're running in a non-privileged state. This is
also how the REQUIRE_SV() macro is being used in
storage-ctrl-impl.c.inc.

Fixes: Coverity CID 1490757
Fixes: fc34e81acd51 ("target/ppc: add macros to check privilege level")
Cc: Matheus Ferst 
Signed-off-by: Daniel Henrique Barboza 
---
  target/ppc/translate/fixedpoint-impl.c.inc | 4 
  1 file changed, 4 insertions(+)

diff --git a/target/ppc/translate/fixedpoint-impl.c.inc 
b/target/ppc/translate/fixedpoint-impl.c.inc
index db14d3bebc..4a32fac4f3 100644
--- a/target/ppc/translate/fixedpoint-impl.c.inc
+++ b/target/ppc/translate/fixedpoint-impl.c.inc
@@ -82,10 +82,14 @@ static bool do_ldst_quad(DisasContext *ctx, arg_D *a, bool 
store, bool prefixed)
  /* lq and stq were privileged prior to V. 2.07 */
  REQUIRE_SV(ctx);

+#if !defined(CONFIG_USER_ONLY)
  if (ctx->le_mode) {
  gen_align_no_le(ctx);
  return true;
  }
+#else
+qemu_build_not_reached();


nit: I think the indentation here is off by 1 level (missing 4 spaces)?


+#endif
  }

  if (!store && unlikely(a->ra == a->rt)) {
--
2.36.1



Reviewed-by: Víctor Colombo 

--
Víctor Cora Colombo
Instituto de Pesquisas ELDORADO
Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>


Re: [PATCH] target/ppc: Implement new wait variants

2022-07-19 Thread Víctor Colombo

Hello Nicholas,

On 19/07/2022 08:38, Nicholas Piggin wrote:

ISA v2.06 adds new variations of wait, specified by the WC field. These
are not compatible with the wait 0 implementation, because they add
additional conditions that cause the processor to resume, which can
cause software to hang or run very slowly.

ISA v3.0 changed the wait opcode.

ISA v3.1 added new WC values to the new wait opcode, and added a PL
field.

This implements the new wait encoding and supports WC variants with
no-op implementations, which is provides basic correctness as explained.

Signed-off-by: Nicholas Piggin 
---
  target/ppc/translate.c | 84 ++
  1 file changed, 76 insertions(+), 8 deletions(-)

diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 1d6daa4608..ce4aa84f1d 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -4066,12 +4066,79 @@ static void gen_sync(DisasContext *ctx)
  /* wait */
  static void gen_wait(DisasContext *ctx)
  {
-TCGv_i32 t0 = tcg_const_i32(1);
-tcg_gen_st_i32(t0, cpu_env,
-   -offsetof(PowerPCCPU, env) + offsetof(CPUState, halted));
-tcg_temp_free_i32(t0);
-/* Stop translation, as the CPU is supposed to sleep from now */
-gen_exception_nip(ctx, EXCP_HLT, ctx->base.pc_next);
+uint32_t wc = (ctx->opcode >> 21) & 3;
+uint32_t pl = (ctx->opcode >> 16) & 3;


I think the best here would be to move this instruction to decodetree.
However, this can be a bit of extra work and out of the scope you though
for this patch. What do you think about adding a EXTRACT_HELPER to
target/ppc/internal.h?


+
+/* v3.0 and later use the ISA flag for wait rather than a PM flag */
+if (!(ctx->insns_flags2 & PPC2_PM_ISA206) &&
+!(ctx->insns_flags2 & PPC2_ISA300)) {
+/* wc field was introduced in ISA v2.06 */
+if (wc) {
+gen_invalid(ctx);
+return;
+}
+}
+
+if (!(ctx->insns_flags2 & PPC2_ISA310)) {
+/* pl field was introduced in ISA v3.1 */
+if (pl) {
+gen_invalid(ctx);
+return;
+}


IIUC the ISA says that "Reserved fields in instructions are ignored by
the processor". So this check is incorrect, I guess, as we should allow
the instruction to continue.


+
+if (ctx->insns_flags2 & PPC2_ISA300) {
+/* wc > 0 is reserved in v3.0 */
+if (wc > 0) {


This however is correct


+gen_invalid(ctx);
+return;
+}
+}
+}
+
+/* wc=3 is reserved and pl=1-3 are reserved in v3.1. */
+if (wc == 3 || pl > 0) {


This can cause a situation where the field is reserve in a previous ISA
and should be ignored. I think the best option is to put these checks
inside a conditional for each different ISA. Otherwise it's getting a
bit hard to follow what should happen in each situation.


+gen_invalid(ctx);
+return;
+}
+
+/* wait 0 waits for an exception to occur. */
+if (wc == 0) {
+TCGv_i32 t0 = tcg_const_i32(1);
+tcg_gen_st_i32(t0, cpu_env,
+   -offsetof(PowerPCCPU, env) + offsetof(CPUState, 
halted));
+tcg_temp_free_i32(t0);
+/* Stop translation, as the CPU is supposed to sleep from now */
+gen_exception_nip(ctx, EXCP_HLT, ctx->base.pc_next);
+}
+
+/*
+ * Other wait types must not just wait until an exception occurs because
+ * ignoring their other wake-up conditions could cause a hang.
+ *
+ * For v2.06 and 2.07, wc=1,2 are architected but may be implemented as
+ * no-ops.
+ *
+ * wc=1 (waitrsv) waits for an exception or a reservation to be lost.
+ * Reservation-loss may have implementation-specific conditions, so it
+ * can be implemented as a no-op.
+ *
+ * wc=2 waits for an implementation-specific condition which could be
+ * always true, so it can be implemented as a no-op.
+ *
+ * For v3.1, wc=1,2 are architected but may be implemented as no-ops.
+ *
+ * wc=1 similarly to v2.06 and v2.07.
+ *
+ * wc=2 waits for an exception or an amount of time to pass. This
+ * amount is implementation-specific so it can be implemented as a
+ * no-op.
+ *
+ * ISA v3.1 does allow for execution to resume "in the rare case of
+ * an implementation-dependent event", so in any case software must
+ * not depend on the architected resumption condition to become
+ * true, so no-op implementations should be architecturally correct
+ * (if suboptimal).
+ */
  }

  #if defined(TARGET_PPC64)
@@ -6852,8 +6919,9 @@ GEN_HANDLER2(stdcx_, "stdcx.", 0x1F, 0x16, 0x06, 
0x, PPC_64B),
  GEN_HANDLER_E(stqcx_, 0x1F, 0x16, 0x05, 0, PPC_NONE, PPC2_LSQ_ISA207),
  #endif
  GEN_HANDLER(sync, 0x1F, 0x16, 0x12, 0x039FF801, PPC_MEM_SYNC),
-GEN_HANDLER(wait, 0x1F, 0x1E, 0x01, 0x03FFF801, PPC_WAIT),
-GEN_HANDLER_E(wait, 0x1F, 0x1E, 0x00, 0x039FF801, 

[PATCH v4 3/3] target/ppc: Implement hashstp and hashchkp

2022-07-15 Thread Víctor Colombo
Implementation for instructions hashstp and hashchkp, the privileged
versions of hashst and hashchk, which were added in Power ISA 3.1B.

Signed-off-by: Víctor Colombo 
---
 target/ppc/excp_helper.c   | 2 ++
 target/ppc/helper.h| 2 ++
 target/ppc/insn32.decode   | 2 ++
 target/ppc/translate/fixedpoint-impl.c.inc | 2 ++
 4 files changed, 8 insertions(+)

diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index fa5a737e22..847eff9213 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -2255,6 +2255,8 @@ void helper_##op(CPUPPCState *env, target_ulong ea, 
target_ulong ra,  \
 
 HELPER_HASH(HASHST, env->spr[SPR_HASHKEYR], true)
 HELPER_HASH(HASHCHK, env->spr[SPR_HASHKEYR], false)
+HELPER_HASH(HASHSTP, env->spr[SPR_HASHPKEYR], true)
+HELPER_HASH(HASHCHKP, env->spr[SPR_HASHPKEYR], false)
 
 #if !defined(CONFIG_USER_ONLY)
 
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 5817af632b..122b2e9359 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -6,6 +6,8 @@ DEF_HELPER_FLAGS_4(td, TCG_CALL_NO_WG, void, env, tl, tl, i32)
 #endif
 DEF_HELPER_4(HASHST, void, env, tl, tl, tl)
 DEF_HELPER_4(HASHCHK, void, env, tl, tl, tl)
+DEF_HELPER_4(HASHSTP, void, env, tl, tl, tl)
+DEF_HELPER_4(HASHCHKP, void, env, tl, tl, tl)
 #if !defined(CONFIG_USER_ONLY)
 DEF_HELPER_2(store_msr, void, env, tl)
 DEF_HELPER_1(rfi, void, env)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 544514565c..da08960fca 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -330,6 +330,8 @@ PEXTD   01 . . . 001000 -   @X
 
 HASHST  01 . . . 1011010010 .   @X_DW
 HASHCHK 01 . . . 100010 .   @X_DW
+HASHSTP 01 . . . 1010010010 .   @X_DW
+HASHCHKP01 . . . 1010110010 .   @X_DW
 
 ## BCD Assist
 
diff --git a/target/ppc/translate/fixedpoint-impl.c.inc 
b/target/ppc/translate/fixedpoint-impl.c.inc
index 41c06de8a2..1ba56cbed5 100644
--- a/target/ppc/translate/fixedpoint-impl.c.inc
+++ b/target/ppc/translate/fixedpoint-impl.c.inc
@@ -572,3 +572,5 @@ static bool do_hash(DisasContext *ctx, arg_X *a, bool priv,
 
 TRANS(HASHST, do_hash, false, gen_helper_HASHST)
 TRANS(HASHCHK, do_hash, false, gen_helper_HASHCHK)
+TRANS(HASHSTP, do_hash, true, gen_helper_HASHSTP)
+TRANS(HASHCHKP, do_hash, true, gen_helper_HASHCHKP)
-- 
2.25.1




[PATCH v4 1/3] target/ppc: Add HASHKEYR and HASHPKEYR SPRs

2022-07-15 Thread Víctor Colombo
Add the Special Purpose Registers HASHKEYR and HASHPKEYR, which were
introduced by the Power ISA 3.1B. They are used by the new instructions
hashchk(p) and hashst(p).

The ISA states that the Operating System should generate the value for
these registers when creating a process, so it's its responsability to
do so. We initialize it with 0 for qemu-softmmu, and set a random 64
bits value for linux-user.

Signed-off-by: Víctor Colombo 
---

Is the way I did the random number generation ok?

---
 target/ppc/cpu.h  |  2 ++
 target/ppc/cpu_init.c | 28 
 2 files changed, 30 insertions(+)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index a4c893cfad..4551d81b5f 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -1676,6 +1676,8 @@ void ppc_compat_add_property(Object *obj, const char 
*name,
 #define SPR_BOOKE_GIVOR14 (0x1BD)
 #define SPR_TIR   (0x1BE)
 #define SPR_PTCR  (0x1D0)
+#define SPR_HASHKEYR  (0x1D4)
+#define SPR_HASHPKEYR (0x1D5)
 #define SPR_BOOKE_SPEFSCR (0x200)
 #define SPR_Exxx_BBEAR(0x201)
 #define SPR_Exxx_BBTAR(0x202)
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index d1493a660c..29c7752483 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -5700,6 +5700,33 @@ static void register_power9_mmu_sprs(CPUPPCState *env)
 #endif
 }
 
+static void register_power10_hash_sprs(CPUPPCState *env)
+{
+/*
+ * it's the OS responsability to generate a random value for the registers
+ * in each process' context. So, initialize it with 0 here.
+ */
+uint64_t hashkeyr_initial_value = 0, hashpkeyr_initial_value = 0;
+#if defined(CONFIG_USER_ONLY)
+/* in linux-user, setup the hash register with a random value */
+GRand *rand = g_rand_new();
+hashkeyr_initial_value =
+((uint64_t)g_rand_int(rand) << 32) | (uint64_t)g_rand_int(rand);
+hashpkeyr_initial_value =
+((uint64_t)g_rand_int(rand) << 32) | (uint64_t)g_rand_int(rand);
+g_rand_free(rand);
+#endif
+spr_register(env, SPR_HASHKEYR, "HASHKEYR",
+SPR_NOACCESS, SPR_NOACCESS,
+_read_generic, _write_generic,
+hashkeyr_initial_value);
+spr_register_hv(env, SPR_HASHPKEYR, "HASHPKEYR",
+SPR_NOACCESS, SPR_NOACCESS,
+SPR_NOACCESS, SPR_NOACCESS,
+_read_generic, _write_generic,
+hashpkeyr_initial_value);
+}
+
 /*
  * Initialize PMU counter overflow timers for Power8 and
  * newer Power chips when using TCG.
@@ -6484,6 +6511,7 @@ static void init_proc_POWER10(CPUPPCState *env)
 register_power8_book4_sprs(env);
 register_power8_rpr_sprs(env);
 register_power9_mmu_sprs(env);
+register_power10_hash_sprs(env);
 
 /* FIXME: Filter fields properly based on privilege level */
 spr_register_kvm_hv(env, SPR_PSSCR, "PSSCR", NULL, NULL, NULL, NULL,
-- 
2.25.1




[PATCH v4 2/3] target/ppc: Implement hashst and hashchk

2022-07-15 Thread Víctor Colombo
Implementation for instructions hashst and hashchk, which were added
in Power ISA 3.1B.

It was decided to implement the hash algorithm from ground up in this
patch exactly as described in Power ISA.

Signed-off-by: Víctor Colombo 
---
 target/ppc/excp_helper.c   | 82 ++
 target/ppc/helper.h|  2 +
 target/ppc/insn32.decode   |  8 +++
 target/ppc/translate.c |  5 ++
 target/ppc/translate/fixedpoint-impl.c.inc | 32 +
 5 files changed, 129 insertions(+)

diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index cb752b184a..fa5a737e22 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -2174,6 +2174,88 @@ void helper_td(CPUPPCState *env, target_ulong arg1, 
target_ulong arg2,
 #endif
 #endif
 
+static uint32_t helper_SIMON_LIKE_32_64(uint32_t x, uint64_t key, uint32_t 
lane)
+{
+const uint16_t c = 0xfffc;
+const uint64_t z0 = 0xfa2561cdf44ac398ULL;
+uint16_t z = 0, temp;
+uint16_t k[32], eff_k[32], xleft[33], xright[33], fxleft[32];
+
+for (int i = 3; i >= 0; i--) {
+k[i] = key & 0x;
+key >>= 16;
+}
+xleft[0] = x & 0x;
+xright[0] = (x >> 16) & 0x;
+
+for (int i = 0; i < 28; i++) {
+z = (z0 >> (63 - i)) & 1;
+temp = ror16(k[i + 3], 3) ^ k[i + 1];
+k[i + 4] = c ^ z ^ k[i] ^ temp ^ ror16(temp, 1);
+}
+
+for (int i = 0; i < 8; i++) {
+eff_k[4 * i + 0] = k[4 * i + ((0 + lane) % 4)];
+eff_k[4 * i + 1] = k[4 * i + ((1 + lane) % 4)];
+eff_k[4 * i + 2] = k[4 * i + ((2 + lane) % 4)];
+eff_k[4 * i + 3] = k[4 * i + ((3 + lane) % 4)];
+}
+
+for (int i = 0; i < 32; i++) {
+fxleft[i] = (rol16(xleft[i], 1) &
+rol16(xleft[i], 8)) ^ rol16(xleft[i], 2);
+xleft[i + 1] = xright[i] ^ fxleft[i] ^ eff_k[i];
+xright[i + 1] = xleft[i];
+}
+
+return (((uint32_t)xright[32]) << 16) | xleft[32];
+}
+
+static uint64_t hash_digest(uint64_t ra, uint64_t rb, uint64_t key)
+{
+uint64_t stage0_h = 0ULL, stage0_l = 0ULL;
+uint64_t stage1_h, stage1_l;
+
+for (int i = 0; i < 4; i++) {
+stage0_h |= ror64(rb & 0xff, 8 * (2 * i + 1));
+stage0_h |= ((ra >> 32) & 0xff) << (8 * 2 * i);
+stage0_l |= ror64((rb >> 32) & 0xff, 8 * (2 * i + 1));
+stage0_l |= (ra & 0xff) << (8 * 2 * i);
+rb >>= 8;
+ra >>= 8;
+}
+
+stage1_h = (uint64_t)helper_SIMON_LIKE_32_64(stage0_h >> 32, key, 0) << 32;
+stage1_h |= helper_SIMON_LIKE_32_64(stage0_h, key, 1);
+stage1_l = (uint64_t)helper_SIMON_LIKE_32_64(stage0_l >> 32, key, 2) << 32;
+stage1_l |= helper_SIMON_LIKE_32_64(stage0_l, key, 3);
+
+return stage1_h ^ stage1_l;
+}
+
+#include "qemu/guest-random.h"
+
+#define HELPER_HASH(op, key, store)   \
+void helper_##op(CPUPPCState *env, target_ulong ea, target_ulong ra,  \
+ target_ulong rb) \
+{ \
+uint64_t calculated_hash = hash_digest(ra, rb, key), loaded_hash; \
+  \
+if (store) {  \
+cpu_stq_data_ra(env, ea, calculated_hash, GETPC());   \
+} else {  \
+loaded_hash = cpu_ldq_data_ra(env, ea, GETPC());  \
+if (loaded_hash != calculated_hash) { \
+/* hashes don't match, trap */\
+raise_exception_err_ra(env, POWERPC_EXCP_PROGRAM, \
+POWERPC_EXCP_TRAP, GETPC());  \
+} \
+} \
+}
+
+HELPER_HASH(HASHST, env->spr[SPR_HASHKEYR], true)
+HELPER_HASH(HASHCHK, env->spr[SPR_HASHKEYR], false)
+
 #if !defined(CONFIG_USER_ONLY)
 
 #ifdef CONFIG_TCG
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 159b352f6e..5817af632b 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -4,6 +4,8 @@ DEF_HELPER_FLAGS_4(tw, TCG_CALL_NO_WG, void, env, tl, tl, i32)
 #if defined(TARGET_PPC64)
 DEF_HELPER_FLAGS_4(td, TCG_CALL_NO_WG, void, env, tl, tl, i32)
 #endif
+DEF_HELPER_4(HASHST, void, env, tl, tl, tl)
+DEF_HELPER_4(HASHCHK, void, env, tl, tl, tl)
 #if !defined(CONFIG_USER_ONLY)
 DEF_HELPER_2(store_msr, void, env, tl)
 DEF_HELPER_1(rfi, void, env)
diff --git a/target/ppc/insn32.decode b

[PATCH v4 0/3] Implement Power ISA 3.1B hash insns

2022-07-15 Thread Víctor Colombo
This patch series implements the 4 instructions added in Power ISA
3.1B:

- hashchk
- hashst
- hashchkp
- hashstp

It's built on top of ppc-next. Working branch for ease of use can be
found here:
https://github.com/PPC64/qemu/tree/vccolombo-hash-to-send-v4

What do you think about the choice to implement the hash algorithm
from the ground up, following the SIMON-like algorithm presented in
Power ISA? IIUC, this algorithm is not the same as the original[1].
Other options would be to use other algorithm already implemented
in QEMU, or even make this instruction a nop for all Power versions.

v1->v2:
- Split the patch in 2
- Rebase to master

v2->v3:
- Split patches in 3
- the new patch (patch 1) is separating the kvm header
  changes [Cornelia]

v3->v4:
- Remove Patch 1 (linux-headers/asm-powerpc/kvm.h:
Add HASHKEYR and HASHPKEYR in headers)
- Daniel recommended drop the kvm part:
https://lists.nongnu.org/archive/html/qemu-ppc/2022-07/msg00213.html
- Substitute Patch 1 with a separated patch setting up the registers
  for TCG only. Also, now setup it with a random value in linux-user.
- Change the registers naming:
- SPR_POWER_HASHKEYR -> SPR_HASHKEYR
- Drop RFC tag

[1] https://eprint.iacr.org/2013/404.pdf

Víctor Colombo (3):
  target/ppc: Add HASHKEYR and HASHPKEYR SPRs
  target/ppc: Implement hashst and hashchk
  target/ppc: Implement hashstp and hashchkp

 target/ppc/cpu.h   |  2 +
 target/ppc/cpu_init.c  | 28 
 target/ppc/excp_helper.c   | 84 ++
 target/ppc/helper.h|  4 ++
 target/ppc/insn32.decode   | 10 +++
 target/ppc/translate.c |  5 ++
 target/ppc/translate/fixedpoint-impl.c.inc | 34 +
 7 files changed, 167 insertions(+)

-- 
2.25.1




Re: [RFC PATCH v3 0/3] Implement Power ISA 3.1B hash insns

2022-07-15 Thread Víctor Colombo

On 15/07/2022 10:23, Daniel Henrique Barboza wrote:

On 7/13/22 13:54, Víctor Colombo wrote:

This patch series implements the 4 instructions added in Power ISA
3.1B:

- hashchk
- hashst
- hashchkp
- hashstp

To build it, you need to apply the following patches on top of master:
<20220701133507.740619-2-lucas.couti...@eldorado.org.br>
<20220701133507.740619-3-lucas.couti...@eldorado.org.br>
<20220712193741.59134-2-leandro.lup...@eldorado.org.br>
<20220712193741.59134-3-leandro.lup...@eldorado.org.br>

Working branch for ease of use can be found here:
https://github.com/PPC64/qemu/tree/vccolombo-hash-to-send-v3

What do you think about the choice to implement the hash algorithm
from the ground up, following the SIMON-like algorithm presented in
Power ISA? IIUC, this algorithm is not the same as the original[1].
Other options would be to use other algorithm already implemented
in QEMU, or even make this instruction a nop for all Power versions.

Also, I was thinking about using the call to spr_register_kvm() in
init_proc_POWER10 to initialize the registers with a random value.
I'm not sure what is the behavior here, I would expect that is the job
of the OS to set the regs, but looks like KVM is not exporting them,
so they are always 0 (?). Does anyone have any insight on this?


This happens because KVM on POWER10 isn't handling these registers
appropriately. We are probably missing kernel/kvm code to do so.

Since KVM on POWER10 is on an uncertain spot at this moment I wouldn't
worry too much about it. Making the regs read/write work in TCG is good
enough for now.


Daniel


Hello Daniel,

Thanks for taking a look at this. I agree that in this case it is better
to make it work in TCG and drop the KVM part from this patch set
I'll work on it now

Thanks!





v1->v2:
- Split the patch in 2
- Rebase to master

v2->v3:
- Split patches in 3
 - the new patch (patch 1) is separating the kvm header
   changes [Cornelia]

[1] https://eprint.iacr.org/2013/404.pdf

Víctor Colombo (3):
   linux-headers/asm-powerpc/kvm.h: Add HASHKEYR and HASHPKEYR in headers
   target/ppc: Implement hashst and hashchk
   target/ppc: Implement hashstp and hashchkp

  linux-headers/asm-powerpc/kvm.h    |  3 +
  target/ppc/cpu.h   |  2 +
  target/ppc/cpu_init.c  |  7 ++
  target/ppc/excp_helper.c   | 82 ++
  target/ppc/helper.h    |  4 ++
  target/ppc/insn32.decode   | 10 +++
  target/ppc/translate.c |  5 ++
  target/ppc/translate/fixedpoint-impl.c.inc | 34 +
  8 files changed, 147 insertions(+)




--
Víctor Cora Colombo
Instituto de Pesquisas ELDORADO
Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>


[RFC PATCH v3 3/3] target/ppc: Implement hashstp and hashchkp

2022-07-13 Thread Víctor Colombo
Implementation for instructions hashstp and hashchkp, the privileged
versions of hashst and hashchk, which were added in Power ISA 3.1B.

Signed-off-by: Víctor Colombo 
---
 target/ppc/cpu.h   | 1 +
 target/ppc/cpu_init.c  | 3 +++
 target/ppc/excp_helper.c   | 2 ++
 target/ppc/helper.h| 2 ++
 target/ppc/insn32.decode   | 2 ++
 target/ppc/translate/fixedpoint-impl.c.inc | 2 ++
 6 files changed, 12 insertions(+)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index f3f98d7a01..e6fc9c41f0 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -1678,6 +1678,7 @@ void ppc_compat_add_property(Object *obj, const char 
*name,
 #define SPR_TIR   (0x1BE)
 #define SPR_PTCR  (0x1D0)
 #define SPR_POWER_HASHKEYR(0x1D4)
+#define SPR_POWER_HASHPKEYR   (0x1D5)
 #define SPR_BOOKE_SPEFSCR (0x200)
 #define SPR_Exxx_BBEAR(0x201)
 #define SPR_Exxx_BBTAR(0x202)
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index a2bbb84d47..3e704304b1 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -6493,6 +6493,9 @@ static void init_proc_POWER10(CPUPPCState *env)
 spr_register_kvm(env, SPR_POWER_HASHKEYR, "HASHKEYR",
 SPR_NOACCESS, SPR_NOACCESS, _read_generic, _write_generic,
 KVM_REG_PPC_HASHKEYR, 0x0);
+spr_register_kvm(env, SPR_POWER_HASHPKEYR, "HASHPKEYR",
+SPR_NOACCESS, SPR_NOACCESS, _read_generic, _write_generic,
+KVM_REG_PPC_HASHPKEYR, 0x0);
 
 /* env variables */
 env->dcache_line_size = 128;
diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index 34893bdf9f..0998e8374e 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -2253,6 +2253,8 @@ void helper_##op(CPUPPCState *env, target_ulong ea, 
target_ulong ra,  \
 
 HELPER_HASH(HASHST, env->spr[SPR_POWER_HASHKEYR], true)
 HELPER_HASH(HASHCHK, env->spr[SPR_POWER_HASHKEYR], false)
+HELPER_HASH(HASHSTP, env->spr[SPR_POWER_HASHPKEYR], true)
+HELPER_HASH(HASHCHKP, env->spr[SPR_POWER_HASHPKEYR], false)
 
 #if !defined(CONFIG_USER_ONLY)
 
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index d455b9d97a..cf68ba458d 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -6,6 +6,8 @@ DEF_HELPER_FLAGS_4(td, TCG_CALL_NO_WG, void, env, tl, tl, i32)
 #endif
 DEF_HELPER_4(HASHST, void, env, tl, tl, tl)
 DEF_HELPER_4(HASHCHK, void, env, tl, tl, tl)
+DEF_HELPER_4(HASHSTP, void, env, tl, tl, tl)
+DEF_HELPER_4(HASHCHKP, void, env, tl, tl, tl)
 #if !defined(CONFIG_USER_ONLY)
 DEF_HELPER_2(store_msr, void, env, tl)
 DEF_HELPER_1(rfi, void, env)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 37ec6b2681..64f92a0524 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -321,6 +321,8 @@ PEXTD   01 . . . 001000 -   @X
 
 HASHST  01 . . . 1011010010 .   @X_DW
 HASHCHK 01 . . . 100010 .   @X_DW
+HASHSTP 01 . . . 1010010010 .   @X_DW
+HASHCHKP01 . . . 1010110010 .   @X_DW
 
 ## BCD Assist
 
diff --git a/target/ppc/translate/fixedpoint-impl.c.inc 
b/target/ppc/translate/fixedpoint-impl.c.inc
index 41c06de8a2..1ba56cbed5 100644
--- a/target/ppc/translate/fixedpoint-impl.c.inc
+++ b/target/ppc/translate/fixedpoint-impl.c.inc
@@ -572,3 +572,5 @@ static bool do_hash(DisasContext *ctx, arg_X *a, bool priv,
 
 TRANS(HASHST, do_hash, false, gen_helper_HASHST)
 TRANS(HASHCHK, do_hash, false, gen_helper_HASHCHK)
+TRANS(HASHSTP, do_hash, true, gen_helper_HASHSTP)
+TRANS(HASHCHKP, do_hash, true, gen_helper_HASHCHKP)
-- 
2.25.1




[RFC PATCH v3 2/3] target/ppc: Implement hashst and hashchk

2022-07-13 Thread Víctor Colombo
Implementation for instructions hashst and hashchk, which were added
in Power ISA 3.1B.

It was decided to implement the hash algorithm from ground up in this
patch exactly as described in Power ISA.

Signed-off-by: Víctor Colombo 
---
 target/ppc/cpu.h   |  1 +
 target/ppc/cpu_init.c  |  4 ++
 target/ppc/excp_helper.c   | 80 ++
 target/ppc/helper.h|  2 +
 target/ppc/insn32.decode   |  8 +++
 target/ppc/translate.c |  5 ++
 target/ppc/translate/fixedpoint-impl.c.inc | 32 +
 7 files changed, 132 insertions(+)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 7aaff9dcc5..f3f98d7a01 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -1677,6 +1677,7 @@ void ppc_compat_add_property(Object *obj, const char 
*name,
 #define SPR_BOOKE_GIVOR14 (0x1BD)
 #define SPR_TIR   (0x1BE)
 #define SPR_PTCR  (0x1D0)
+#define SPR_POWER_HASHKEYR(0x1D4)
 #define SPR_BOOKE_SPEFSCR (0x200)
 #define SPR_Exxx_BBEAR(0x201)
 #define SPR_Exxx_BBTAR(0x202)
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index 1da5f1f1d8..a2bbb84d47 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -6490,6 +6490,10 @@ static void init_proc_POWER10(CPUPPCState *env)
 spr_read_generic, spr_write_generic,
 KVM_REG_PPC_PSSCR, 0);
 
+spr_register_kvm(env, SPR_POWER_HASHKEYR, "HASHKEYR",
+SPR_NOACCESS, SPR_NOACCESS, _read_generic, _write_generic,
+KVM_REG_PPC_HASHKEYR, 0x0);
+
 /* env variables */
 env->dcache_line_size = 128;
 env->icache_line_size = 128;
diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index cb752b184a..34893bdf9f 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -2174,6 +2174,86 @@ void helper_td(CPUPPCState *env, target_ulong arg1, 
target_ulong arg2,
 #endif
 #endif
 
+static uint32_t helper_SIMON_LIKE_32_64(uint32_t x, uint64_t key, uint32_t 
lane)
+{
+const uint16_t c = 0xfffc;
+const uint64_t z0 = 0xfa2561cdf44ac398ULL;
+uint16_t z = 0, temp;
+uint16_t k[32], eff_k[32], xleft[33], xright[33], fxleft[32];
+
+for (int i = 3; i >= 0; i--) {
+k[i] = key & 0x;
+key >>= 16;
+}
+xleft[0] = x & 0x;
+xright[0] = (x >> 16) & 0x;
+
+for (int i = 0; i < 28; i++) {
+z = (z0 >> (63 - i)) & 1;
+temp = ror16(k[i + 3], 3) ^ k[i + 1];
+k[i + 4] = c ^ z ^ k[i] ^ temp ^ ror16(temp, 1);
+}
+
+for (int i = 0; i < 8; i++) {
+eff_k[4 * i + 0] = k[4 * i + ((0 + lane) % 4)];
+eff_k[4 * i + 1] = k[4 * i + ((1 + lane) % 4)];
+eff_k[4 * i + 2] = k[4 * i + ((2 + lane) % 4)];
+eff_k[4 * i + 3] = k[4 * i + ((3 + lane) % 4)];
+}
+
+for (int i = 0; i < 32; i++) {
+fxleft[i] = (rol16(xleft[i], 1) &
+rol16(xleft[i], 8)) ^ rol16(xleft[i], 2);
+xleft[i + 1] = xright[i] ^ fxleft[i] ^ eff_k[i];
+xright[i + 1] = xleft[i];
+}
+
+return (((uint32_t)xright[32]) << 16) | xleft[32];
+}
+
+static uint64_t hash_digest(uint64_t ra, uint64_t rb, uint64_t key)
+{
+uint64_t stage0_h = 0ULL, stage0_l = 0ULL;
+uint64_t stage1_h, stage1_l;
+
+for (int i = 0; i < 4; i++) {
+stage0_h |= ror64(rb & 0xff, 8 * (2 * i + 1));
+stage0_h |= ((ra >> 32) & 0xff) << (8 * 2 * i);
+stage0_l |= ror64((rb >> 32) & 0xff, 8 * (2 * i + 1));
+stage0_l |= (ra & 0xff) << (8 * 2 * i);
+rb >>= 8;
+ra >>= 8;
+}
+
+stage1_h = (uint64_t)helper_SIMON_LIKE_32_64(stage0_h >> 32, key, 0) << 32;
+stage1_h |= helper_SIMON_LIKE_32_64(stage0_h, key, 1);
+stage1_l = (uint64_t)helper_SIMON_LIKE_32_64(stage0_l >> 32, key, 2) << 32;
+stage1_l |= helper_SIMON_LIKE_32_64(stage0_l, key, 3);
+
+return stage1_h ^ stage1_l;
+}
+
+#define HELPER_HASH(op, key, store)   \
+void helper_##op(CPUPPCState *env, target_ulong ea, target_ulong ra,  \
+ target_ulong rb) \
+{ \
+uint64_t chash = hash_digest(ra, rb, key), lhash; \
+  \
+if (store) {  \
+cpu_stq_data_ra(env, ea, chash, GETPC()); \
+} else {  \
+lhash = cpu_ldq_data_ra(env, ea, GETPC());\
+if (lhash != chash) {

[RFC PATCH v3 1/3] linux-headers/asm-powerpc/kvm.h: Add HASHKEYR and HASHPKEYR in headers

2022-07-13 Thread Víctor Colombo
Linux KVM currently does not export these registers. Create
placeholders for them to allow implementing hashchk(p) and
hashst(p) instructions from PowerISA 3.1B.

Signed-off-by: Víctor Colombo 
---
 linux-headers/asm-powerpc/kvm.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h
index 9f18fa090f..4ae4718143 100644
--- a/linux-headers/asm-powerpc/kvm.h
+++ b/linux-headers/asm-powerpc/kvm.h
@@ -646,6 +646,9 @@ struct kvm_ppc_cpu_char {
 #define KVM_REG_PPC_SIER3  (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc3)
 #define KVM_REG_PPC_DAWR1  (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc4)
 #define KVM_REG_PPC_DAWRX1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc5)
+/* FIXME: KVM hasn't exposed these registers yet */
+#define KVM_REG_PPC_HASHKEYR(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x00)
+#define KVM_REG_PPC_HASHPKEYR   (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x00)
 
 /* Transactional Memory checkpointed state:
  * This is all GPRs, all VSX regs and a subset of SPRs
-- 
2.25.1




[RFC PATCH v3 0/3] Implement Power ISA 3.1B hash insns

2022-07-13 Thread Víctor Colombo
This patch series implements the 4 instructions added in Power ISA
3.1B:

- hashchk
- hashst
- hashchkp
- hashstp

To build it, you need to apply the following patches on top of master:
<20220701133507.740619-2-lucas.couti...@eldorado.org.br>
<20220701133507.740619-3-lucas.couti...@eldorado.org.br>
<20220712193741.59134-2-leandro.lup...@eldorado.org.br>
<20220712193741.59134-3-leandro.lup...@eldorado.org.br>

Working branch for ease of use can be found here:
https://github.com/PPC64/qemu/tree/vccolombo-hash-to-send-v3

What do you think about the choice to implement the hash algorithm
from the ground up, following the SIMON-like algorithm presented in
Power ISA? IIUC, this algorithm is not the same as the original[1].
Other options would be to use other algorithm already implemented
in QEMU, or even make this instruction a nop for all Power versions.

Also, I was thinking about using the call to spr_register_kvm() in
init_proc_POWER10 to initialize the registers with a random value.
I'm not sure what is the behavior here, I would expect that is the job
of the OS to set the regs, but looks like KVM is not exporting them,
so they are always 0 (?). Does anyone have any insight on this?

v1->v2:
- Split the patch in 2
- Rebase to master

v2->v3:
- Split patches in 3
- the new patch (patch 1) is separating the kvm header 
  changes [Cornelia]

[1] https://eprint.iacr.org/2013/404.pdf

Víctor Colombo (3):
  linux-headers/asm-powerpc/kvm.h: Add HASHKEYR and HASHPKEYR in headers
  target/ppc: Implement hashst and hashchk
  target/ppc: Implement hashstp and hashchkp

 linux-headers/asm-powerpc/kvm.h|  3 +
 target/ppc/cpu.h   |  2 +
 target/ppc/cpu_init.c  |  7 ++
 target/ppc/excp_helper.c   | 82 ++
 target/ppc/helper.h|  4 ++
 target/ppc/insn32.decode   | 10 +++
 target/ppc/translate.c |  5 ++
 target/ppc/translate/fixedpoint-impl.c.inc | 34 +
 8 files changed, 147 insertions(+)

-- 
2.25.1




Re: [RFC PATCH v2 1/2] target/ppc: Implement hashst and hashchk

2022-07-11 Thread Víctor Colombo

On 11/07/2022 11:18, Cornelia Huck wrote:

On Mon, Jul 11 2022, Víctor Colombo  wrote:


Implementation for instructions hashst and hashchk, which were added
in Power ISA 3.1B.

It was decided to implement the hash algorithm from ground up in this
patch exactly as described in Power ISA.

Signed-off-by: Víctor Colombo 
---
  linux-headers/asm-powerpc/kvm.h|  2 +
  target/ppc/cpu.h   |  1 +
  target/ppc/cpu_init.c  |  4 ++
  target/ppc/excp_helper.c   | 80 ++
  target/ppc/helper.h|  2 +
  target/ppc/insn32.decode   |  8 +++
  target/ppc/translate.c |  5 ++
  target/ppc/translate/fixedpoint-impl.c.inc | 32 +
  8 files changed, 134 insertions(+)

diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h
index 9f18fa090f..dd58d574bf 100644
--- a/linux-headers/asm-powerpc/kvm.h
+++ b/linux-headers/asm-powerpc/kvm.h
@@ -646,6 +646,8 @@ struct kvm_ppc_cpu_char {
  #define KVM_REG_PPC_SIER3(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc3)
  #define KVM_REG_PPC_DAWR1(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc4)
  #define KVM_REG_PPC_DAWRX1   (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc5)
+/* FIXME: KVM hasn't exposed these registers yet */
+#define KVM_REG_PPC_HASHKEYR(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x00)

  /* Transactional Memory checkpointed state:
   * This is all GPRs, all VSX regs and a subset of SPRs



Please split any header updates into a separate patch (a full header
update or, if the changes are not yet included in Linux, a placeholder
patch. Thank you :)




Hello Cornelia
I'll do it. Thanks for your reply

--
Víctor Cora Colombo
Instituto de Pesquisas ELDORADO
Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>


[RFC PATCH v2 2/2] target/ppc: Implement hashstp and hashchkp

2022-07-11 Thread Víctor Colombo
Implementation for instructions hashstp and hashchkp, the privileged
versions of hashst and hashchk, which were added in Power ISA 3.1B.

Signed-off-by: Víctor Colombo 
---
 linux-headers/asm-powerpc/kvm.h| 1 +
 target/ppc/cpu.h   | 1 +
 target/ppc/cpu_init.c  | 3 +++
 target/ppc/excp_helper.c   | 2 ++
 target/ppc/helper.h| 2 ++
 target/ppc/insn32.decode   | 2 ++
 target/ppc/translate/fixedpoint-impl.c.inc | 2 ++
 7 files changed, 13 insertions(+)

diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h
index dd58d574bf..4ae4718143 100644
--- a/linux-headers/asm-powerpc/kvm.h
+++ b/linux-headers/asm-powerpc/kvm.h
@@ -648,6 +648,7 @@ struct kvm_ppc_cpu_char {
 #define KVM_REG_PPC_DAWRX1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc5)
 /* FIXME: KVM hasn't exposed these registers yet */
 #define KVM_REG_PPC_HASHKEYR(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x00)
+#define KVM_REG_PPC_HASHPKEYR   (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x00)
 
 /* Transactional Memory checkpointed state:
  * This is all GPRs, all VSX regs and a subset of SPRs
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index f3f98d7a01..e6fc9c41f0 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -1678,6 +1678,7 @@ void ppc_compat_add_property(Object *obj, const char 
*name,
 #define SPR_TIR   (0x1BE)
 #define SPR_PTCR  (0x1D0)
 #define SPR_POWER_HASHKEYR(0x1D4)
+#define SPR_POWER_HASHPKEYR   (0x1D5)
 #define SPR_BOOKE_SPEFSCR (0x200)
 #define SPR_Exxx_BBEAR(0x201)
 #define SPR_Exxx_BBTAR(0x202)
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index db541c531a..ea3197c2be 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -6493,6 +6493,9 @@ static void init_proc_POWER10(CPUPPCState *env)
 spr_register_kvm(env, SPR_POWER_HASHKEYR, "HASHKEYR",
 SPR_NOACCESS, SPR_NOACCESS, _read_generic, _write_generic,
 KVM_REG_PPC_HASHKEYR, 0x0);
+spr_register_kvm(env, SPR_POWER_HASHPKEYR, "HASHPKEYR",
+SPR_NOACCESS, SPR_NOACCESS, _read_generic, _write_generic,
+KVM_REG_PPC_HASHPKEYR, 0x0);
 
 /* env variables */
 env->dcache_line_size = 128;
diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index 34893bdf9f..0998e8374e 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -2253,6 +2253,8 @@ void helper_##op(CPUPPCState *env, target_ulong ea, 
target_ulong ra,  \
 
 HELPER_HASH(HASHST, env->spr[SPR_POWER_HASHKEYR], true)
 HELPER_HASH(HASHCHK, env->spr[SPR_POWER_HASHKEYR], false)
+HELPER_HASH(HASHSTP, env->spr[SPR_POWER_HASHPKEYR], true)
+HELPER_HASH(HASHCHKP, env->spr[SPR_POWER_HASHPKEYR], false)
 
 #if !defined(CONFIG_USER_ONLY)
 
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 84447aa3a5..51a5910cb7 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -6,6 +6,8 @@ DEF_HELPER_FLAGS_4(td, TCG_CALL_NO_WG, void, env, tl, tl, i32)
 #endif
 DEF_HELPER_4(HASHST, void, env, tl, tl, tl)
 DEF_HELPER_4(HASHCHK, void, env, tl, tl, tl)
+DEF_HELPER_4(HASHSTP, void, env, tl, tl, tl)
+DEF_HELPER_4(HASHCHKP, void, env, tl, tl, tl)
 #if !defined(CONFIG_USER_ONLY)
 DEF_HELPER_2(store_msr, void, env, tl)
 DEF_HELPER_1(rfi, void, env)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 23e8f56f82..6dfb6d5880 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -321,6 +321,8 @@ PEXTD   01 . . . 001000 -   @X
 
 HASHST  01 . . . 1011010010 .   @X_DW
 HASHCHK 01 . . . 100010 .   @X_DW
+HASHSTP 01 . . . 1010010010 .   @X_DW
+HASHCHKP01 . . . 1010110010 .   @X_DW
 
 ## BCD Assist
 
diff --git a/target/ppc/translate/fixedpoint-impl.c.inc 
b/target/ppc/translate/fixedpoint-impl.c.inc
index f525144398..c151c1fe46 100644
--- a/target/ppc/translate/fixedpoint-impl.c.inc
+++ b/target/ppc/translate/fixedpoint-impl.c.inc
@@ -575,3 +575,5 @@ static bool do_hash(DisasContext *ctx, arg_X *a, bool priv,
 
 TRANS(HASHST, do_hash, false, gen_helper_HASHST)
 TRANS(HASHCHK, do_hash, false, gen_helper_HASHCHK)
+TRANS(HASHSTP, do_hash, true, gen_helper_HASHSTP)
+TRANS(HASHCHKP, do_hash, true, gen_helper_HASHCHKP)
-- 
2.25.1




[RFC PATCH v2 1/2] target/ppc: Implement hashst and hashchk

2022-07-11 Thread Víctor Colombo
Implementation for instructions hashst and hashchk, which were added
in Power ISA 3.1B.

It was decided to implement the hash algorithm from ground up in this
patch exactly as described in Power ISA.

Signed-off-by: Víctor Colombo 
---
 linux-headers/asm-powerpc/kvm.h|  2 +
 target/ppc/cpu.h   |  1 +
 target/ppc/cpu_init.c  |  4 ++
 target/ppc/excp_helper.c   | 80 ++
 target/ppc/helper.h|  2 +
 target/ppc/insn32.decode   |  8 +++
 target/ppc/translate.c |  5 ++
 target/ppc/translate/fixedpoint-impl.c.inc | 32 +
 8 files changed, 134 insertions(+)

diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h
index 9f18fa090f..dd58d574bf 100644
--- a/linux-headers/asm-powerpc/kvm.h
+++ b/linux-headers/asm-powerpc/kvm.h
@@ -646,6 +646,8 @@ struct kvm_ppc_cpu_char {
 #define KVM_REG_PPC_SIER3  (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc3)
 #define KVM_REG_PPC_DAWR1  (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc4)
 #define KVM_REG_PPC_DAWRX1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc5)
+/* FIXME: KVM hasn't exposed these registers yet */
+#define KVM_REG_PPC_HASHKEYR(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x00)
 
 /* Transactional Memory checkpointed state:
  * This is all GPRs, all VSX regs and a subset of SPRs
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 7aaff9dcc5..f3f98d7a01 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -1677,6 +1677,7 @@ void ppc_compat_add_property(Object *obj, const char 
*name,
 #define SPR_BOOKE_GIVOR14 (0x1BD)
 #define SPR_TIR   (0x1BE)
 #define SPR_PTCR  (0x1D0)
+#define SPR_POWER_HASHKEYR(0x1D4)
 #define SPR_BOOKE_SPEFSCR (0x200)
 #define SPR_Exxx_BBEAR(0x201)
 #define SPR_Exxx_BBTAR(0x202)
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index 86ad28466a..db541c531a 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -6490,6 +6490,10 @@ static void init_proc_POWER10(CPUPPCState *env)
 spr_read_generic, spr_write_generic,
 KVM_REG_PPC_PSSCR, 0);
 
+spr_register_kvm(env, SPR_POWER_HASHKEYR, "HASHKEYR",
+SPR_NOACCESS, SPR_NOACCESS, _read_generic, _write_generic,
+KVM_REG_PPC_HASHKEYR, 0x0);
+
 /* env variables */
 env->dcache_line_size = 128;
 env->icache_line_size = 128;
diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index cb752b184a..34893bdf9f 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -2174,6 +2174,86 @@ void helper_td(CPUPPCState *env, target_ulong arg1, 
target_ulong arg2,
 #endif
 #endif
 
+static uint32_t helper_SIMON_LIKE_32_64(uint32_t x, uint64_t key, uint32_t 
lane)
+{
+const uint16_t c = 0xfffc;
+const uint64_t z0 = 0xfa2561cdf44ac398ULL;
+uint16_t z = 0, temp;
+uint16_t k[32], eff_k[32], xleft[33], xright[33], fxleft[32];
+
+for (int i = 3; i >= 0; i--) {
+k[i] = key & 0x;
+key >>= 16;
+}
+xleft[0] = x & 0x;
+xright[0] = (x >> 16) & 0x;
+
+for (int i = 0; i < 28; i++) {
+z = (z0 >> (63 - i)) & 1;
+temp = ror16(k[i + 3], 3) ^ k[i + 1];
+k[i + 4] = c ^ z ^ k[i] ^ temp ^ ror16(temp, 1);
+}
+
+for (int i = 0; i < 8; i++) {
+eff_k[4 * i + 0] = k[4 * i + ((0 + lane) % 4)];
+eff_k[4 * i + 1] = k[4 * i + ((1 + lane) % 4)];
+eff_k[4 * i + 2] = k[4 * i + ((2 + lane) % 4)];
+eff_k[4 * i + 3] = k[4 * i + ((3 + lane) % 4)];
+}
+
+for (int i = 0; i < 32; i++) {
+fxleft[i] = (rol16(xleft[i], 1) &
+rol16(xleft[i], 8)) ^ rol16(xleft[i], 2);
+xleft[i + 1] = xright[i] ^ fxleft[i] ^ eff_k[i];
+xright[i + 1] = xleft[i];
+}
+
+return (((uint32_t)xright[32]) << 16) | xleft[32];
+}
+
+static uint64_t hash_digest(uint64_t ra, uint64_t rb, uint64_t key)
+{
+uint64_t stage0_h = 0ULL, stage0_l = 0ULL;
+uint64_t stage1_h, stage1_l;
+
+for (int i = 0; i < 4; i++) {
+stage0_h |= ror64(rb & 0xff, 8 * (2 * i + 1));
+stage0_h |= ((ra >> 32) & 0xff) << (8 * 2 * i);
+stage0_l |= ror64((rb >> 32) & 0xff, 8 * (2 * i + 1));
+stage0_l |= (ra & 0xff) << (8 * 2 * i);
+rb >>= 8;
+ra >>= 8;
+}
+
+stage1_h = (uint64_t)helper_SIMON_LIKE_32_64(stage0_h >> 32, key, 0) << 32;
+stage1_h |= helper_SIMON_LIKE_32_64(stage0_h, key, 1);
+stage1_l = (uint64_t)helper_SIMON_LIKE_32_64(stage0_l >> 32, key, 2) << 32;
+stage1_l |= helper_SIMON_LIKE_32_64(stage0_l, key, 3);
+
+return stage1_h ^ stage1_l;
+}
+
+#define HELPER_HASH(op, key, store)

[RFC PATCH v2 0/2] Implement Power ISA 3.1B hash insns

2022-07-11 Thread Víctor Colombo
This patch series implements the 4 instructions added in Power ISA
3.1B:

- hashchk
- hashst
- hashchkp
- hashstp

What do you think about the choice to implement the hash algorithm
from the ground up, following the SIMON-like algorithm presented in
Power ISA? IIUC, this algorithm is not the same as the original[1].
Other options would be to use other algorithm already implemented
in QEMU, or even make this instruction a nop for all Power versions.

Also, I was thinking about using the call to spr_register_kvm() in
init_proc_POWER10 to initialize the registers with a random value.
I'm not sure what is the behavior here, I would expect that is the job
of the OS to set the regs, but looks like KVM is not exporting them,
so they are always 0 (?). Does anyone have any insight on this?

v1->v2:
- Split the patch in 2
- Rebase to master

[1] https://eprint.iacr.org/2013/404.pdf

Víctor Colombo (2):
  target/ppc: Implement hashst and hashchk instructions
  target/ppc: Implement hashstp and hashchkp

 linux-headers/asm-powerpc/kvm.h|  3 +
 target/ppc/cpu.h   |  2 +
 target/ppc/cpu_init.c  |  7 ++
 target/ppc/excp_helper.c   | 82 ++
 target/ppc/helper.h|  4 ++
 target/ppc/insn32.decode   | 10 +++
 target/ppc/translate.c |  5 ++
 target/ppc/translate/fixedpoint-impl.c.inc | 34 +
 8 files changed, 147 insertions(+)

-- 
2.25.1




Re: [PATCH 1/2] util/log: Add vu to dump content of vector unit

2022-07-11 Thread Víctor Colombo

On 08/07/2022 05:57, Kito Cheng wrote:

Add new option for -d vu to dump the content of vector unit, many target
has vector register, but there is no easy way to dump the content, we
use this on downstream for a while to help debug, and I feel that's
really useful, so I think it would be great to upstream that to save debug time
for other people :)

Signed-off-by: Kito Cheng 
---
  accel/tcg/cpu-exec.c  | 3 +++
  include/hw/core/cpu.h | 2 ++
  include/qemu/log.h| 1 +
  util/log.c| 2 ++
  4 files changed, 8 insertions(+)

diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index a565a3f8ec..2cbec0a6ed 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -221,6 +221,9 @@ static inline void log_cpu_exec(target_ulong pc, CPUState 
*cpu,
  if (qemu_loglevel_mask(CPU_LOG_TB_FPU)) {
  flags |= CPU_DUMP_FPU;
  }
+if (qemu_loglevel_mask(CPU_LOG_TB_VU)) {
+flags |= CPU_DUMP_VU;
+}
  #if defined(TARGET_I386)
  flags |= CPU_DUMP_CCOP;
  #endif
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index 996f94059f..7a767e17cd 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -535,11 +535,13 @@ GuestPanicInformation *cpu_get_crash_info(CPUState *cpu);
   * @CPU_DUMP_CODE:
   * @CPU_DUMP_FPU: dump FPU register state, not just integer
   * @CPU_DUMP_CCOP: dump info about TCG QEMU's condition code optimization 
state
+ * @CPU_DUMP_VU: dump vector register state
   */
  enum CPUDumpFlags {
  CPU_DUMP_CODE = 0x0001,
  CPU_DUMP_FPU  = 0x0002,
  CPU_DUMP_CCOP = 0x0004,
+CPU_DUMP_VU   = 0x0008,
  };

  /**
diff --git a/include/qemu/log.h b/include/qemu/log.h
index c5643d8dd5..49bd0b0fbc 100644
--- a/include/qemu/log.h
+++ b/include/qemu/log.h
@@ -35,6 +35,7 @@ bool qemu_log_separate(void);
  /* LOG_STRACE is used for user-mode strace logging. */
  #define LOG_STRACE (1 << 19)
  #define LOG_PER_THREAD (1 << 20)
+#define CPU_LOG_TB_VU  (1 << 21)

  /* Lock/unlock output. */

diff --git a/util/log.c b/util/log.c
index d6eb0378c3..775d122c2e 100644
--- a/util/log.c
+++ b/util/log.c
@@ -441,6 +441,8 @@ const QEMULogItem qemu_log_items[] = {
  #ifdef CONFIG_PLUGIN
  { CPU_LOG_PLUGIN, "plugin", "output from TCG plugins\n"},
  #endif
+{ CPU_LOG_TB_VU, "vu",
+  "include vector unit registers in the 'cpu' logging" },
  { LOG_STRACE, "strace",
"log every user-mode syscall, its input, and its result" },
  { LOG_PER_THREAD, "tid",
--
2.34.0




I think this looks like a good idea, can see myself using it.
I see a lot of places in the code where cpu_dump_state() is
used with CPU_DUMP_FPU flag as a parameter. Do you think it
is also the case to add this new flag CPU_DUMP_VU there too?

Best regards,

--
Víctor Cora Colombo
Instituto de Pesquisas ELDORADO
Aviso Legal - Disclaimer 


Re: [RFC 5/8] static-analyzer: Enforce coroutine_fn restrictions on function pointers

2022-07-04 Thread Víctor Colombo

On 04/07/2022 15:04, Alberto Faria wrote:

On Mon, Jul 4, 2022 at 6:46 PM Víctor Colombo
 wrote:

Yes, this line is present at the beginning of the output
Is this caused by problems with the code being analyzed or is it because
libclang is getting confused with something that is outside of our
control?


I think I found the problem: the commands in the compilation database
weren't being parsed properly. I switched to shlex.split() and it
seems to be working now. The WIP v2 is available at [1], if you want
to give it a try.

Thanks for reporting this!

Alberto

[1] https://gitlab.com/albertofaria/qemu/-/tree/static-analysis



I tested the version from the WIP v2 and seems to be working now.
Thanks!

--
Víctor Cora Colombo
Instituto de Pesquisas ELDORADO
Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>


Re: [RFC 5/8] static-analyzer: Enforce coroutine_fn restrictions on function pointers

2022-07-04 Thread Víctor Colombo

On 04/07/2022 13:57, Alberto Faria wrote:

Hi Víctor,

On Mon, Jul 4, 2022 at 3:18 PM Víctor Colombo
 wrote:

And I receive an exception on the line above saying that node is of type
NoneType. Seems that `node = node.referenced` is setting `node` to None
in this case.

I was unable to understand the root cause of it. Is this an incorrect
usage of the tool from my part? Full error message below


Unfortunately there seem to be a lot of corner cases that libclang can
throw at us. I hadn't come across this one before. I expected that
DECL_REF_EXPR/MEMBER_REF_EXPR would always reference something.

This may be due to some build error -- libclang tries to continue
processing a translation unit by dropping subtrees or nodes that have
problems. Is there a "too many errors emitted, stopping now; this may
lead to false positives and negatives" line at the top of the script's
output?



Yes, this line is present at the beginning of the output
Is this caused by problems with the code being analyzed or is it because
libclang is getting confused with something that is outside of our
control?

--
Víctor Cora Colombo
Instituto de Pesquisas ELDORADO
Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>


Re: [RFC 5/8] static-analyzer: Enforce coroutine_fn restrictions on function pointers

2022-07-04 Thread Víctor Colombo

On 02/07/2022 08:33, Alberto Faria wrote:

Alberto, hello. I was testing this patch as follows:

./static-analyzer.py build target/ppc/mmu-hash64.c




@@ -627,9 +744,31 @@ def is_coroutine_fn(node: Cursor) -> bool:
  else:
  break

-return node.kind == CursorKind.FUNCTION_DECL and is_annotated_with(
-node, "coroutine_fn"
-)
+if node.kind in [CursorKind.DECL_REF_EXPR, CursorKind.MEMBER_REF_EXPR]:
+node = node.referenced
+
+# ---
+
+if node.kind == CursorKind.FUNCTION_DECL:



And I receive an exception on the line above saying that node is of type
NoneType. Seems that `node = node.referenced` is setting `node` to None
in this case.

I was unable to understand the root cause of it. Is this an incorrect
usage of the tool from my part? Full error message below

Traceback (most recent call last):
  File "./static-analyzer.py", line 327, in analyze_translation_unit
checker(tu, context.absolute_path, log)
  File "./static-analyzer.py", line 613, in check_coroutine_pointers
and is_coroutine_fn(right)
  File "./static-analyzer.py", line 781, in is_coroutine_fn
if node.kind == CursorKind.FUNCTION_DECL:
AttributeError: 'NoneType' object has no attribute 'kind'

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/usr/lib/python3.8/multiprocessing/pool.py", line 125, in worker
result = (True, func(*args, **kwds))
  File "/usr/lib/python3.8/multiprocessing/pool.py", line 48, in mapstar
return list(map(*args))
  File "./static-analyzer.py", line 329, in analyze_translation_unit
raise RuntimeError(f"Error analyzing {relative_path}") from e
RuntimeError: Error analyzing target/ppc/mmu-hash64.c
"""

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "./static-analyzer.py", line 893, in 
main()
  File "./static-analyzer.py", line 123, in main
analyze_translation_units(args, contexts)
  File "./static-analyzer.py", line 240, in analyze_translation_units
results = pool.map(analyze_translation_unit, contexts)
  File "/usr/lib/python3.8/multiprocessing/pool.py", line 364, in map
return self._map_async(func, iterable, mapstar, chunksize).get()
  File "/usr/lib/python3.8/multiprocessing/pool.py", line 771, in get
raise self._value
RuntimeError: Error analyzing target/ppc/mmu-hash64.c

> +return is_annotated_with(node, "coroutine_fn")

+
+if node.kind in [
+CursorKind.FIELD_DECL,
+CursorKind.VAR_DECL,
+CursorKind.PARM_DECL,
+]:
+
+if is_annotated_with(node, "coroutine_fn"):
+return True
+
+# TODO: If type is typedef or pointer to typedef, follow typedef.
+
+return False
+
+if node.kind == CursorKind.TYPEDEF_DECL:
+return is_annotated_with(node, "coroutine_fn")
+
+return False

Best regards,

--
Víctor Cora Colombo
Instituto de Pesquisas ELDORADO
Aviso Legal - Disclaimer 


Re: [PATCH v2] target/ppc: Return default CPU for max CPU

2022-06-30 Thread Víctor Colombo

On 28/06/2022 17:55, Murilo Opsfelder Araujo wrote:

All ppc CPUs represent hardware that exists in the real world, i.e.: we
do not have a "max" CPU with all possible emulated features enabled.
Return the default CPU type for the machine because that has greater
chance of being useful as the "max" CPU.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1038
Cc: Cédric Le Goater 
Cc: Daniel Henrique Barboza 
Cc: Daniel P. Berrangé 
Cc: Greg Kurz 
Cc: Matheus K. Ferst 
Cc: Thomas Huth 
Signed-off-by: Murilo Opsfelder Araujo 
Signed-off-by: Fabiano Rosas 
---
v2:
- Return the default CPU of the machine instead of hard-coded alias.

v1: 
https://lore.kernel.org/qemu-devel/20220531172711.94564-1-muri...@linux.ibm.com/

  target/ppc/cpu-models.c |  1 -
  target/ppc/cpu_init.c   | 19 +++
  2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c
index 976be5e0d1..05589eb21d 100644
--- a/target/ppc/cpu-models.c
+++ b/target/ppc/cpu-models.c
@@ -879,7 +879,6 @@ PowerPCCPUAlias ppc_cpu_aliases[] = {
  { "755", "755_v2.8" },
  { "goldfinger", "755_v2.8" },
  { "7400", "7400_v2.9" },
-{ "max", "7400_v2.9" },
  { "g4",  "7400_v2.9" },
  { "7410", "7410_v1.4" },
  { "nitro", "7410_v1.4" },
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index c16cb8dbe7..8ee0b7c785 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -47,6 +47,10 @@
  #include "spr_common.h"
  #include "power8-pmu.h"

+#ifndef CONFIG_USER_ONLY
+#include "hw/boards.h"
+#endif
+
  /* #define PPC_DEBUG_SPR */
  /* #define USE_APPLE_GDB */

@@ -6963,6 +6967,21 @@ static ObjectClass *ppc_cpu_class_by_name(const char 
*name)
  }
  }

+/*
+ * All ppc CPUs represent hardware that exists in the real world, i.e.: we
+ * do not have a "max" CPU with all possible emulated features enabled.
+ * Return the default CPU type for the machine because that has greater
+ * chance of being useful as the "max" CPU.
+ */
+#if !defined(CONFIG_USER_ONLY)
+if (strcmp(name, "max") == 0) {
+MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
+if (mc) {
+    return object_class_by_name(mc->default_cpu_type);
+}
+}
+#endif
+
  cpu_model = g_ascii_strdown(name, -1);
  p = ppc_cpu_lookup_alias(cpu_model);
  if (p) {
--
2.36.1




Reviewed-by: Víctor Colombo 

Best regards,

--
Víctor Cora Colombo
Instituto de Pesquisas ELDORADO
Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>



[PATCH v3 09/11] target/ppc: implement addg6s

2022-06-29 Thread Víctor Colombo
From: Matheus Ferst 

Implements the following Power ISA v2.06 instruction:
addg6s: Add and Generate Sixes

Signed-off-by: Matheus Ferst 
Signed-off-by: Víctor Colombo 
Reviewed-by: Víctor Colombo 
---
 target/ppc/insn32.decode   |  4 +++
 target/ppc/translate/fixedpoint-impl.c.inc | 37 ++
 2 files changed, 41 insertions(+)

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 400ca41bc6..36db427537 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -311,6 +311,10 @@ CNTTZDM 01 . . . 1000111011 -   @X
 PDEPD   01 . . . 0010011100 -   @X
 PEXTD   01 . . . 001000 -   @X
 
+## BCD Assist
+
+ADDG6S  01 . . . - 001001010 -  @X
+
 ### Float-Point Load Instructions
 
 LFS 11 . .  @D
diff --git a/target/ppc/translate/fixedpoint-impl.c.inc 
b/target/ppc/translate/fixedpoint-impl.c.inc
index 1aab32be03..490e49cfc7 100644
--- a/target/ppc/translate/fixedpoint-impl.c.inc
+++ b/target/ppc/translate/fixedpoint-impl.c.inc
@@ -492,3 +492,40 @@ static bool trans_PEXTD(DisasContext *ctx, arg_X *a)
 #endif
 return true;
 }
+
+static bool trans_ADDG6S(DisasContext *ctx, arg_X *a)
+{
+const uint64_t carry_bits = 0xULL;
+TCGv t0, t1, carry, zero = tcg_constant_tl(0);
+
+REQUIRE_INSNS_FLAGS2(ctx, BCDA_ISA206);
+
+t0 = tcg_temp_new();
+t1 = tcg_const_tl(0);
+carry = tcg_const_tl(0);
+
+for (int i = 0; i < 16; i++) {
+tcg_gen_shri_tl(t0, cpu_gpr[a->ra], i * 4);
+tcg_gen_andi_tl(t0, t0, 0xf);
+tcg_gen_add_tl(t1, t1, t0);
+
+tcg_gen_shri_tl(t0, cpu_gpr[a->rb], i * 4);
+tcg_gen_andi_tl(t0, t0, 0xf);
+tcg_gen_add_tl(t1, t1, t0);
+
+tcg_gen_andi_tl(t1, t1, 0x10);
+tcg_gen_setcond_tl(TCG_COND_NE, t1, t1, zero);
+
+tcg_gen_shli_tl(t0, t1, i * 4);
+tcg_gen_or_tl(carry, carry, t0);
+}
+
+tcg_gen_xori_tl(carry, carry, (target_long)carry_bits);
+tcg_gen_muli_tl(cpu_gpr[a->rt], carry, 6);
+
+tcg_temp_free(t0);
+tcg_temp_free(t1);
+tcg_temp_free(carry);
+
+return true;
+}
-- 
2.25.1




[PATCH v3 11/11] target/ppc: implement cdtbcd

2022-06-29 Thread Víctor Colombo
From: Matheus Ferst 

Implements the Convert Declets To Binary Coded Decimal instruction.
Since libdecnumber doesn't expose the methods for direct conversion
(decDigitsFromDPD, DPD2BCD, etc), a positive decimal32 with zero
exponent is used as an intermediate value to convert the declets.

Reviewed-by: Richard Henderson 
Signed-off-by: Matheus Ferst 
Signed-off-by: Víctor Colombo 
---
 target/ppc/dfp_helper.c| 26 ++
 target/ppc/helper.h|  1 +
 target/ppc/insn32.decode   |  1 +
 target/ppc/translate/fixedpoint-impl.c.inc |  7 ++
 4 files changed, 35 insertions(+)

diff --git a/target/ppc/dfp_helper.c b/target/ppc/dfp_helper.c
index db9e994c8c..5ba74b2124 100644
--- a/target/ppc/dfp_helper.c
+++ b/target/ppc/dfp_helper.c
@@ -1392,6 +1392,32 @@ DFP_HELPER_SHIFT(DSCLIQ, 128, 1)
 DFP_HELPER_SHIFT(DSCRI, 64, 0)
 DFP_HELPER_SHIFT(DSCRIQ, 128, 0)
 
+target_ulong helper_CDTBCD(target_ulong s)
+{
+uint64_t res = 0;
+uint32_t dec32, declets;
+uint8_t bcd[6];
+int i, w, sh;
+decNumber a;
+
+for (w = 1; w >= 0; w--) {
+res <<= 32;
+declets = extract64(s, 32 * w, 20);
+if (declets) {
+/* decimal32 with zero exponent and word "w" declets */
+dec32 = (0x225ULL << 20) | declets;
+decimal32ToNumber((decimal32 *), );
+decNumberGetBCD(, bcd);
+for (i = 0; i < a.digits; i++) {
+sh = 4 * (a.digits - 1 - i);
+res |= (uint64_t)bcd[i] << sh;
+}
+}
+}
+
+return res;
+}
+
 target_ulong helper_CBCDTD(target_ulong s)
 {
 uint64_t res = 0;
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 643bd69db8..b0fcebf8b5 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -54,6 +54,7 @@ DEF_HELPER_3(sraw, tl, env, tl, tl)
 DEF_HELPER_FLAGS_2(CFUGED, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_FLAGS_2(PDEPD, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_FLAGS_2(PEXTD, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_1(CDTBCD, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_1(CBCDTD, TCG_CALL_NO_RWG_SE, tl, tl)
 #if defined(TARGET_PPC64)
 DEF_HELPER_FLAGS_2(cmpeqb, TCG_CALL_NO_RWG_SE, i32, tl, tl)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 5222d540b1..b673099eaa 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -317,6 +317,7 @@ PEXTD   01 . . . 001000 -   @X
 ## BCD Assist
 
 ADDG6S  01 . . . - 001001010 -  @X
+CDTBCD  01 . . - 0100011010 -   @X_sa
 CBCDTD  01 . . - 0100111010 -   @X_sa
 
 ### Float-Point Load Instructions
diff --git a/target/ppc/translate/fixedpoint-impl.c.inc 
b/target/ppc/translate/fixedpoint-impl.c.inc
index 892c9d2568..cb0097bedb 100644
--- a/target/ppc/translate/fixedpoint-impl.c.inc
+++ b/target/ppc/translate/fixedpoint-impl.c.inc
@@ -530,6 +530,13 @@ static bool trans_ADDG6S(DisasContext *ctx, arg_X *a)
 return true;
 }
 
+static bool trans_CDTBCD(DisasContext *ctx, arg_X_sa *a)
+{
+REQUIRE_INSNS_FLAGS2(ctx, BCDA_ISA206);
+gen_helper_CDTBCD(cpu_gpr[a->ra], cpu_gpr[a->rs]);
+return true;
+}
+
 static bool trans_CBCDTD(DisasContext *ctx, arg_X_sa *a)
 {
 REQUIRE_INSNS_FLAGS2(ctx, BCDA_ISA206);
-- 
2.25.1




[PATCH v3 07/11] tests/tcg/ppc64: Add mffsce test

2022-06-29 Thread Víctor Colombo
Add mffsce test to check both the return value and the new fpscr
stored in the cpu.

Signed-off-by: Víctor Colombo 
Reviewed-by: Matheus Ferst 
---
 tests/tcg/ppc64/Makefile.target   |  1 +
 tests/tcg/ppc64le/Makefile.target |  1 +
 tests/tcg/ppc64le/mffsce.c| 37 +++
 3 files changed, 39 insertions(+)
 create mode 100644 tests/tcg/ppc64le/mffsce.c

diff --git a/tests/tcg/ppc64/Makefile.target b/tests/tcg/ppc64/Makefile.target
index babd209573..331fae628e 100644
--- a/tests/tcg/ppc64/Makefile.target
+++ b/tests/tcg/ppc64/Makefile.target
@@ -11,6 +11,7 @@ endif
 $(PPC64_TESTS): CFLAGS += -mpower8-vector
 
 PPC64_TESTS += mtfsf
+PPC64_TESTS += mffsce
 
 ifneq ($(CROSS_CC_HAS_POWER10),)
 PPC64_TESTS += byte_reverse sha512-vector
diff --git a/tests/tcg/ppc64le/Makefile.target 
b/tests/tcg/ppc64le/Makefile.target
index 5b0eb5e870..6ca3003f02 100644
--- a/tests/tcg/ppc64le/Makefile.target
+++ b/tests/tcg/ppc64le/Makefile.target
@@ -24,6 +24,7 @@ run-sha512-vector: QEMU_OPTS+=-cpu POWER10
 run-plugin-sha512-vector-with-%: QEMU_OPTS+=-cpu POWER10
 
 PPC64LE_TESTS += mtfsf
+PPC64LE_TESTS += mffsce
 PPC64LE_TESTS += signal_save_restore_xer
 PPC64LE_TESTS += xxspltw
 
diff --git a/tests/tcg/ppc64le/mffsce.c b/tests/tcg/ppc64le/mffsce.c
new file mode 100644
index 00..20d882cb45
--- /dev/null
+++ b/tests/tcg/ppc64le/mffsce.c
@@ -0,0 +1,37 @@
+#include 
+#include 
+#include 
+
+#define MTFSF(FLM, FRB) asm volatile ("mtfsf %0, %1" :: "i" (FLM), "f" (FRB))
+#define MFFS(FRT) asm("mffs %0" : "=f" (FRT))
+#define MFFSCE(FRT) asm("mffsce %0" : "=f" (FRT))
+
+#define PPC_BIT_NR(nr) (63 - (nr))
+
+#define FP_VE  (1ull << PPC_BIT_NR(56))
+#define FP_UE  (1ull << PPC_BIT_NR(58))
+#define FP_ZE  (1ull << PPC_BIT_NR(59))
+#define FP_XE  (1ull << PPC_BIT_NR(60))
+#define FP_NI  (1ull << PPC_BIT_NR(61))
+#define FP_RN1 (1ull << PPC_BIT_NR(63))
+
+int main(void)
+{
+uint64_t frt, fpscr;
+uint64_t test_value = FP_VE | FP_UE | FP_ZE |
+  FP_XE | FP_NI | FP_RN1;
+MTFSF(0b, test_value); /* set test value to cpu fpscr */
+MFFSCE(frt);
+MFFS(fpscr); /* read the value that mffsce stored to cpu fpscr */
+
+/* the returned value should be as the cpu fpscr was before */
+assert((frt & 0xff) == test_value);
+
+/*
+ * the cpu fpscr last 3 bits should be unchanged
+ * and enable bits should be unset
+ */
+assert((fpscr & 0xff) == (test_value & 0x7));
+
+return 0;
+}
-- 
2.25.1




[PATCH v3 08/11] target/ppc: Add flag for ISA v2.06 BCDA instructions

2022-06-29 Thread Víctor Colombo
From: Matheus Ferst 

Adds an insns_flags2 for the BCD assist instructions introduced in
Power ISA 2.06. These instructions are not listed in the manuals for
e5500[1] and e6500[2], so the flag is only added for POWER7/8/9/10
models.

[1] https://www.nxp.com/files-static/32bit/doc/ref_manual/EREF_RM.pdf
[2] https://www.nxp.com/docs/en/reference-manual/E6500RM.pdf

Signed-off-by: Matheus Ferst 
Signed-off-by: Víctor Colombo 
Reviewed-by: Richard Henderson 
---
 target/ppc/cpu.h  |  5 -
 target/ppc/cpu_init.c | 10 ++
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 6d78078f37..642bae311f 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -2277,6 +2277,8 @@ enum {
 PPC2_ISA310= 0x0010ULL,
 /*   lwsync instruction  */
 PPC2_MEM_LWSYNC= 0x0020ULL,
+/* ISA 2.06 BCD assist instructions  */
+PPC2_BCDA_ISA206   = 0x0040ULL,
 
 #define PPC_TCG_INSNS2 (PPC2_BOOKE206 | PPC2_VSX | PPC2_PRCNTL | PPC2_DBRX | \
 PPC2_ISA205 | PPC2_VSX207 | PPC2_PERM_ISA206 | \
@@ -2285,7 +2287,8 @@ enum {
 PPC2_BCTAR_ISA207 | PPC2_LSQ_ISA207 | \
 PPC2_ALTIVEC_207 | PPC2_ISA207S | PPC2_DFP | \
 PPC2_FP_CVT_S64 | PPC2_TM | PPC2_PM_ISA206 | \
-PPC2_ISA300 | PPC2_ISA310 | PPC2_MEM_LWSYNC)
+PPC2_ISA300 | PPC2_ISA310 | PPC2_MEM_LWSYNC | \
+PPC2_BCDA_ISA206)
 };
 
 /*/
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index c16cb8dbe7..bdfb1a5c6f 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -5985,7 +5985,7 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc, void *data)
 PPC2_PERM_ISA206 | PPC2_DIVE_ISA206 |
 PPC2_ATOMIC_ISA206 | PPC2_FP_CVT_ISA206 |
 PPC2_FP_TST_ISA206 | PPC2_FP_CVT_S64 |
-PPC2_PM_ISA206 | PPC2_MEM_LWSYNC;
+PPC2_PM_ISA206 | PPC2_MEM_LWSYNC | PPC2_BCDA_ISA206;
 pcc->msr_mask = (1ull << MSR_SF) |
 (1ull << MSR_VR) |
 (1ull << MSR_VSX) |
@@ -6159,7 +6159,8 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
 PPC2_FP_TST_ISA206 | PPC2_BCTAR_ISA207 |
 PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207 |
 PPC2_ISA205 | PPC2_ISA207S | PPC2_FP_CVT_S64 |
-PPC2_TM | PPC2_PM_ISA206 | PPC2_MEM_LWSYNC;
+PPC2_TM | PPC2_PM_ISA206 | PPC2_MEM_LWSYNC |
+PPC2_BCDA_ISA206;
 pcc->msr_mask = (1ull << MSR_SF) |
 (1ull << MSR_HV) |
 (1ull << MSR_TM) |
@@ -6379,7 +6380,8 @@ POWERPC_FAMILY(POWER9)(ObjectClass *oc, void *data)
 PPC2_FP_TST_ISA206 | PPC2_BCTAR_ISA207 |
 PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207 |
 PPC2_ISA205 | PPC2_ISA207S | PPC2_FP_CVT_S64 |
-PPC2_TM | PPC2_ISA300 | PPC2_PRCNTL | PPC2_MEM_LWSYNC;
+PPC2_TM | PPC2_ISA300 | PPC2_PRCNTL | PPC2_MEM_LWSYNC |
+PPC2_BCDA_ISA206;
 pcc->msr_mask = (1ull << MSR_SF) |
 (1ull << MSR_HV) |
 (1ull << MSR_TM) |
@@ -6597,7 +6599,7 @@ POWERPC_FAMILY(POWER10)(ObjectClass *oc, void *data)
 PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207 |
 PPC2_ISA205 | PPC2_ISA207S | PPC2_FP_CVT_S64 |
 PPC2_TM | PPC2_ISA300 | PPC2_PRCNTL | PPC2_ISA310 |
-PPC2_MEM_LWSYNC;
+PPC2_MEM_LWSYNC | PPC2_BCDA_ISA206;
 pcc->msr_mask = (1ull << MSR_SF) |
 (1ull << MSR_HV) |
 (1ull << MSR_TM) |
-- 
2.25.1




[PATCH v3 04/11] target/ppc: Move mffsl to decodetree

2022-06-29 Thread Víctor Colombo
Signed-off-by: Víctor Colombo 
Reviewed-by: Matheus Ferst 
---
 target/ppc/insn32.decode   |  1 +
 target/ppc/translate/fp-impl.c.inc | 38 +-
 target/ppc/translate/fp-ops.c.inc  |  2 --
 3 files changed, 17 insertions(+), 24 deletions(-)

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index b6a7a3a3ff..6d3b98a127 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -345,6 +345,7 @@ SETNBCR 01 . . - 00 -   
@X_bi
 MFFSCE  11 . 1 - 1001000111 -   @X_t
 MFFSCRN 11 . 10110 . 1001000111 -   @X_tb
 MFFSCRNI11 . 10111 ---.. 1001000111 -   @X_imm2
+MFFSL   11 . 11000 - 1001000111 -   @X_t
 
 ### Decimal Floating-Point Arithmetic Instructions
 
diff --git a/target/ppc/translate/fp-impl.c.inc 
b/target/ppc/translate/fp-impl.c.inc
index 64e26b9b42..4f4d57c611 100644
--- a/target/ppc/translate/fp-impl.c.inc
+++ b/target/ppc/translate/fp-impl.c.inc
@@ -633,28 +633,6 @@ static void gen_mffs(DisasContext *ctx)
 tcg_temp_free_i64(t0);
 }
 
-/* mffsl */
-static void gen_mffsl(DisasContext *ctx)
-{
-TCGv_i64 t0;
-
-if (unlikely(!(ctx->insns_flags2 & PPC2_ISA300))) {
-return gen_mffs(ctx);
-}
-
-if (unlikely(!ctx->fpu_enabled)) {
-gen_exception(ctx, POWERPC_EXCP_FPU);
-return;
-}
-t0 = tcg_temp_new_i64();
-gen_reset_fpstatus();
-tcg_gen_extu_tl_i64(t0, cpu_fpscr);
-/* Mask everything except mode, status, and enables.  */
-tcg_gen_andi_i64(t0, t0, FP_DRN | FP_STATUS | FP_ENABLES | FP_RN);
-set_fpr(rD(ctx->opcode), t0);
-tcg_temp_free_i64(t0);
-}
-
 static TCGv_i64 place_from_fpscr(int rt, uint64_t mask)
 {
 TCGv_i64 fpscr = tcg_temp_new_i64();
@@ -739,6 +717,22 @@ static bool trans_MFFSCRNI(DisasContext *ctx, arg_X_imm2 
*a)
 return true;
 }
 
+static bool trans_MFFSL(DisasContext *ctx, arg_X_t *a)
+{
+TCGv_i64 fpscr;
+
+REQUIRE_INSNS_FLAGS2(ctx, ISA300);
+REQUIRE_FPU(ctx);
+
+gen_reset_fpstatus();
+fpscr = place_from_fpscr(a->rt,
+FP_DRN | FP_STATUS | FP_ENABLES | FP_NI | FP_RN);
+
+tcg_temp_free_i64(fpscr);
+
+return true;
+}
+
 /* mtfsb0 */
 static void gen_mtfsb0(DisasContext *ctx)
 {
diff --git a/target/ppc/translate/fp-ops.c.inc 
b/target/ppc/translate/fp-ops.c.inc
index a76943b8bf..f8c35124ae 100644
--- a/target/ppc/translate/fp-ops.c.inc
+++ b/target/ppc/translate/fp-ops.c.inc
@@ -75,8 +75,6 @@ GEN_HANDLER_E(fmrgew, 0x3F, 0x06, 0x1E, 0x0001, PPC_NONE, 
PPC2_VSX207),
 GEN_HANDLER_E(fmrgow, 0x3F, 0x06, 0x1A, 0x0001, PPC_NONE, PPC2_VSX207),
 GEN_HANDLER(mcrfs, 0x3F, 0x00, 0x02, 0x0063F801, PPC_FLOAT),
 GEN_HANDLER_E_2(mffs, 0x3F, 0x07, 0x12, 0x00, 0x, PPC_FLOAT, PPC_NONE),
-GEN_HANDLER_E_2(mffsl, 0x3F, 0x07, 0x12, 0x18, 0x, PPC_FLOAT,
-PPC2_ISA300),
 GEN_HANDLER(mtfsb0, 0x3F, 0x06, 0x02, 0x001FF800, PPC_FLOAT),
 GEN_HANDLER(mtfsb1, 0x3F, 0x06, 0x01, 0x001FF800, PPC_FLOAT),
 GEN_HANDLER(mtfsf, 0x3F, 0x07, 0x16, 0x, PPC_FLOAT),
-- 
2.25.1




[PATCH v3 10/11] target/ppc: implement cbcdtd

2022-06-29 Thread Víctor Colombo
From: Matheus Ferst 

Implements the Convert Binary Coded Decimal To Declets instruction.
Since libdecnumber doesn't expose the methods for direct conversion
(decDigitsToDPD, BCD2DPD, etc.), the BCD values are converted to
decimal32 format, from which the declets are extracted.

Where the behavior is undefined, we try to match the result observed in
a POWER9 DD2.3.

Reviewed-by: Richard Henderson 
Signed-off-by: Matheus Ferst 
Signed-off-by: Víctor Colombo 
---
 target/ppc/dfp_helper.c| 39 ++
 target/ppc/helper.h|  1 +
 target/ppc/insn32.decode   |  4 +++
 target/ppc/translate/fixedpoint-impl.c.inc |  7 
 4 files changed, 51 insertions(+)

diff --git a/target/ppc/dfp_helper.c b/target/ppc/dfp_helper.c
index 0d01ac3de0..db9e994c8c 100644
--- a/target/ppc/dfp_helper.c
+++ b/target/ppc/dfp_helper.c
@@ -1391,3 +1391,42 @@ DFP_HELPER_SHIFT(DSCLI, 64, 1)
 DFP_HELPER_SHIFT(DSCLIQ, 128, 1)
 DFP_HELPER_SHIFT(DSCRI, 64, 0)
 DFP_HELPER_SHIFT(DSCRIQ, 128, 0)
+
+target_ulong helper_CBCDTD(target_ulong s)
+{
+uint64_t res = 0;
+uint32_t dec32;
+uint8_t bcd[6];
+int w, i, offs;
+decNumber a;
+decContext context;
+
+decContextDefault(, DEC_INIT_DECIMAL32);
+
+for (w = 1; w >= 0; w--) {
+res <<= 32;
+decNumberZero();
+/* Extract each BCD field of word "w" */
+for (i = 5; i >= 0; i--) {
+offs = 4 * (5 - i) + 32 * w;
+bcd[i] = extract64(s, offs, 4);
+if (bcd[i] > 9) {
+/*
+ * If the field value is greater than 9, the results are
+ * undefined. We could use a fixed value like 0 or 9, but
+ * an and with 9 seems to better match the hardware behavior.
+ */
+bcd[i] &= 9;
+}
+}
+
+/* Create a decNumber with the BCD values and convert to decimal32 */
+decNumberSetBCD(, bcd, 6);
+decimal32FromNumber((decimal32 *), , );
+
+/* Extract the two declets from the decimal32 value */
+res |= dec32 & 0xf;
+}
+
+return res;
+}
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index d627cfe6ed..643bd69db8 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -54,6 +54,7 @@ DEF_HELPER_3(sraw, tl, env, tl, tl)
 DEF_HELPER_FLAGS_2(CFUGED, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_FLAGS_2(PDEPD, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_FLAGS_2(PEXTD, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_1(CBCDTD, TCG_CALL_NO_RWG_SE, tl, tl)
 #if defined(TARGET_PPC64)
 DEF_HELPER_FLAGS_2(cmpeqb, TCG_CALL_NO_RWG_SE, i32, tl, tl)
 DEF_HELPER_FLAGS_1(popcntw, TCG_CALL_NO_RWG_SE, tl, tl)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 36db427537..5222d540b1 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -87,6 +87,9 @@
 _rc   rt ra rb rc:bool
 @X_rc   .. rt:5 ra:5 rb:5 .. rc:1   _rc
 
+_sa   rs ra
+@X_sa   .. rs:5 ra:5 . .. . _sa
+
 %x_frtp 22:4 !function=times_2
 %x_frap 17:4 !function=times_2
 %x_frbp 12:4 !function=times_2
@@ -314,6 +317,7 @@ PEXTD   01 . . . 001000 -   @X
 ## BCD Assist
 
 ADDG6S  01 . . . - 001001010 -  @X
+CBCDTD  01 . . - 0100111010 -   @X_sa
 
 ### Float-Point Load Instructions
 
diff --git a/target/ppc/translate/fixedpoint-impl.c.inc 
b/target/ppc/translate/fixedpoint-impl.c.inc
index 490e49cfc7..892c9d2568 100644
--- a/target/ppc/translate/fixedpoint-impl.c.inc
+++ b/target/ppc/translate/fixedpoint-impl.c.inc
@@ -529,3 +529,10 @@ static bool trans_ADDG6S(DisasContext *ctx, arg_X *a)
 
 return true;
 }
+
+static bool trans_CBCDTD(DisasContext *ctx, arg_X_sa *a)
+{
+REQUIRE_INSNS_FLAGS2(ctx, BCDA_ISA206);
+gen_helper_CBCDTD(cpu_gpr[a->ra], cpu_gpr[a->rs]);
+return true;
+}
-- 
2.25.1




[PATCH v3 06/11] target/ppc: Implement mffscdrn[i] instructions

2022-06-29 Thread Víctor Colombo
Signed-off-by: Víctor Colombo 
Reviewed-by: Matheus Ferst 
---
 target/ppc/insn32.decode   |  5 
 target/ppc/translate/fp-impl.c.inc | 41 ++
 2 files changed, 46 insertions(+)

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 736a7c6f3f..400ca41bc6 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -133,6 +133,9 @@
 _imm2 rt imm
 @X_imm2 .. rt:5 . ... imm:2 .. ._imm2
 
+_imm3 rt imm
+@X_imm3 .. rt:5 . .. imm:3 .. . _imm3
+
 %x_xt   0:1 21:5
 _imm5 xt imm:uint8_t vrb
 @X_imm5 .. . imm:5 vrb:5 .. .   _imm5 
xt=%x_xt
@@ -348,7 +351,9 @@ SETNBCR 01 . . - 00 -   
@X_bi
 MFFS11 . 0 - 1001000111 .   @X_t_rc
 MFFSCE  11 . 1 - 1001000111 -   @X_t
 MFFSCRN 11 . 10110 . 1001000111 -   @X_tb
+MFFSCDRN11 . 10100 . 1001000111 -   @X_tb
 MFFSCRNI11 . 10111 ---.. 1001000111 -   @X_imm2
+MFFSCDRNI   11 . 10101 --... 1001000111 -   @X_imm3
 MFFSL   11 . 11000 - 1001000111 -   @X_t
 
 ### Decimal Floating-Point Arithmetic Instructions
diff --git a/target/ppc/translate/fp-impl.c.inc 
b/target/ppc/translate/fp-impl.c.inc
index d6231358f8..319513d001 100644
--- a/target/ppc/translate/fp-impl.c.inc
+++ b/target/ppc/translate/fp-impl.c.inc
@@ -696,6 +696,27 @@ static bool trans_MFFSCRN(DisasContext *ctx, arg_X_tb *a)
 return true;
 }
 
+static bool trans_MFFSCDRN(DisasContext *ctx, arg_X_tb *a)
+{
+TCGv_i64 t1, fpscr;
+
+REQUIRE_INSNS_FLAGS2(ctx, ISA300);
+REQUIRE_FPU(ctx);
+
+t1 = tcg_temp_new_i64();
+get_fpr(t1, a->rb);
+tcg_gen_andi_i64(t1, t1, FP_DRN);
+
+gen_reset_fpstatus();
+fpscr = place_from_fpscr(a->rt, FP_DRN | FP_ENABLES | FP_NI | FP_RN);
+store_fpscr_masked(fpscr, FP_DRN, t1, 0x0100);
+
+tcg_temp_free_i64(t1);
+tcg_temp_free_i64(fpscr);
+
+return true;
+}
+
 static bool trans_MFFSCRNI(DisasContext *ctx, arg_X_imm2 *a)
 {
 TCGv_i64 t1, fpscr;
@@ -716,6 +737,26 @@ static bool trans_MFFSCRNI(DisasContext *ctx, arg_X_imm2 
*a)
 return true;
 }
 
+static bool trans_MFFSCDRNI(DisasContext *ctx, arg_X_imm3 *a)
+{
+TCGv_i64 t1, fpscr;
+
+REQUIRE_INSNS_FLAGS2(ctx, ISA300);
+REQUIRE_FPU(ctx);
+
+t1 = tcg_temp_new_i64();
+tcg_gen_movi_i64(t1, (uint64_t)a->imm << FPSCR_DRN0);
+
+gen_reset_fpstatus();
+fpscr = place_from_fpscr(a->rt, FP_DRN | FP_ENABLES | FP_NI | FP_RN);
+store_fpscr_masked(fpscr, FP_DRN, t1, 0x0100);
+
+tcg_temp_free_i64(t1);
+tcg_temp_free_i64(fpscr);
+
+return true;
+}
+
 static bool trans_MFFSL(DisasContext *ctx, arg_X_t *a)
 {
 TCGv_i64 fpscr;
-- 
2.25.1




[PATCH v3 02/11] target/ppc: Move mffscrn[i] to decodetree

2022-06-29 Thread Víctor Colombo
Signed-off-by: Víctor Colombo 
Reviewed-by: Matheus Ferst 
---
 target/ppc/insn32.decode   |  8 +++
 target/ppc/internal.h  |  3 --
 target/ppc/translate/fp-impl.c.inc | 83 +++---
 target/ppc/translate/fp-ops.c.inc  |  4 --
 4 files changed, 50 insertions(+), 48 deletions(-)

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 8b723b5433..3b61c3a073 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -124,6 +124,9 @@
 _bfl  bf l:bool ra rb
 @X_bfl  .. bf:3 . l:1 ra:5 rb:5 .. ._bfl
 
+_imm2 rt imm
+@X_imm2 .. rt:5 . ... imm:2 .. ._imm2
+
 %x_xt   0:1 21:5
 _imm5 xt imm:uint8_t vrb
 @X_imm5 .. . imm:5 vrb:5 .. .   _imm5 
xt=%x_xt
@@ -334,6 +337,11 @@ SETBCR  01 . . - 011010 -   
@X_bi
 SETNBC  01 . . - 011100 -   @X_bi
 SETNBCR 01 . . - 00 -   @X_bi
 
+### Move To/From FPSCR
+
+MFFSCRN 11 . 10110 . 1001000111 -   @X_tb
+MFFSCRNI11 . 10111 ---.. 1001000111 -   @X_imm2
+
 ### Decimal Floating-Point Arithmetic Instructions
 
 DADD111011 . . . 10 .   @X_rc
diff --git a/target/ppc/internal.h b/target/ppc/internal.h
index 2add128cd1..467f3046c8 100644
--- a/target/ppc/internal.h
+++ b/target/ppc/internal.h
@@ -159,9 +159,6 @@ EXTRACT_HELPER(FPL, 25, 1);
 EXTRACT_HELPER(FPFLM, 17, 8);
 EXTRACT_HELPER(FPW, 16, 1);
 
-/* mffscrni */
-EXTRACT_HELPER(RM, 11, 2);
-
 /* addpcis */
 EXTRACT_HELPER_SPLIT_3(DX, 10, 6, 6, 5, 16, 1, 1, 0, 0)
 #if defined(TARGET_PPC64)
diff --git a/target/ppc/translate/fp-impl.c.inc 
b/target/ppc/translate/fp-impl.c.inc
index f9b58b844e..bcb7ec2689 100644
--- a/target/ppc/translate/fp-impl.c.inc
+++ b/target/ppc/translate/fp-impl.c.inc
@@ -685,71 +685,72 @@ static void gen_mffsce(DisasContext *ctx)
 tcg_temp_free_i64(t0);
 }
 
-static void gen_helper_mffscrn(DisasContext *ctx, TCGv_i64 t1)
+static TCGv_i64 place_from_fpscr(int rt, uint64_t mask)
 {
-TCGv_i64 t0 = tcg_temp_new_i64();
-TCGv_i32 mask = tcg_const_i32(0x0001);
+TCGv_i64 fpscr = tcg_temp_new_i64();
+TCGv_i64 fpscr_masked = tcg_temp_new_i64();
 
-gen_reset_fpstatus();
-tcg_gen_extu_tl_i64(t0, cpu_fpscr);
-tcg_gen_andi_i64(t0, t0, FP_DRN | FP_ENABLES | FP_RN);
-set_fpr(rD(ctx->opcode), t0);
+tcg_gen_extu_tl_i64(fpscr, cpu_fpscr);
+tcg_gen_andi_i64(fpscr_masked, fpscr, mask);
+set_fpr(rt, fpscr_masked);
 
-/* Mask FPSCR value to clear RN.  */
-tcg_gen_andi_i64(t0, t0, ~FP_RN);
+tcg_temp_free_i64(fpscr_masked);
 
-/* Merge RN into FPSCR value.  */
-tcg_gen_or_i64(t0, t0, t1);
+return fpscr;
+}
 
-gen_helper_store_fpscr(cpu_env, t0, mask);
+static void store_fpscr_masked(TCGv_i64 fpscr, uint64_t clear_mask,
+   TCGv_i64 set_mask, uint32_t store_mask)
+{
+TCGv_i64 fpscr_masked = tcg_temp_new_i64();
+TCGv_i32 st_mask = tcg_constant_i32(store_mask);
 
-tcg_temp_free_i32(mask);
-tcg_temp_free_i64(t0);
+tcg_gen_andi_i64(fpscr_masked, fpscr, ~clear_mask);
+tcg_gen_or_i64(fpscr_masked, fpscr_masked, set_mask);
+gen_helper_store_fpscr(cpu_env, fpscr_masked, st_mask);
+
+tcg_temp_free_i64(fpscr_masked);
 }
 
-/* mffscrn */
-static void gen_mffscrn(DisasContext *ctx)
+static bool trans_MFFSCRN(DisasContext *ctx, arg_X_tb *a)
 {
-TCGv_i64 t1;
+TCGv_i64 t1, fpscr;
 
-if (unlikely(!(ctx->insns_flags2 & PPC2_ISA300))) {
-return gen_mffs(ctx);
-}
-
-if (unlikely(!ctx->fpu_enabled)) {
-gen_exception(ctx, POWERPC_EXCP_FPU);
-return;
-}
+REQUIRE_INSNS_FLAGS2(ctx, ISA300);
+REQUIRE_FPU(ctx);
 
 t1 = tcg_temp_new_i64();
-get_fpr(t1, rB(ctx->opcode));
-/* Mask FRB to get just RN.  */
+get_fpr(t1, a->rb);
 tcg_gen_andi_i64(t1, t1, FP_RN);
 
-gen_helper_mffscrn(ctx, t1);
+gen_reset_fpstatus();
+fpscr = place_from_fpscr(a->rt, FP_DRN | FP_ENABLES | FP_NI | FP_RN);
+store_fpscr_masked(fpscr, FP_RN, t1, 0x0001);
 
 tcg_temp_free_i64(t1);
+tcg_temp_free_i64(fpscr);
+
+return true;
 }
 
-/* mffscrni */
-static void gen_mffscrni(DisasContext *ctx)
+static bool trans_MFFSCRNI(DisasContext *ctx, arg_X_imm2 *a)
 {
-TCGv_i64 t1;
-
-if (unlikely(!(ctx->insns_flags2 & PPC2_ISA300))) {
-return gen_mffs(ctx);
-}
+TCGv_i64 t1, fpscr;
 
-if (unlikely(!ctx->fpu_enabled)) {
-gen_exception(ctx, POWERPC_EXCP_FPU);
-return;
-}
+REQUIRE_INSNS_FLAGS2(ctx, ISA300);
+REQUIRE_FPU(ctx);
 
-t1 = tcg_const_i64((uint64_t)RM(ctx->opcode));
+t1 = tcg_temp_new_i64();
+tcg_gen_movi_i64(t1, a->imm);
 
-gen_helper_mffscrn(ctx, t1);
+gen_reset_fpstatus();
+fpscr = place_from_fpscr(a-&g

[PATCH v3 05/11] target/ppc: Move mffs[.] to decodetree

2022-06-29 Thread Víctor Colombo
Signed-off-by: Víctor Colombo 
Reviewed-by: Matheus Ferst 
---
 target/ppc/insn32.decode   |  4 
 target/ppc/translate/fp-impl.c.inc | 35 +++---
 target/ppc/translate/fp-ops.c.inc  |  1 -
 3 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 6d3b98a127..736a7c6f3f 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -100,6 +100,9 @@
 _tb   rt rb
 @X_tb   .. rt:5 . rb:5 .. . _tb
 
+_t_rc rt rc:bool
+@X_t_rc .. rt:5 . . .. rc:1 _t_rc
+
 _tb_rcrt rb rc:bool
 @X_tb_rc.. rt:5 . rb:5 .. rc:1  _tb_rc
 
@@ -342,6 +345,7 @@ SETNBCR 01 . . - 00 -   
@X_bi
 
 ### Move To/From FPSCR
 
+MFFS11 . 0 - 1001000111 .   @X_t_rc
 MFFSCE  11 . 1 - 1001000111 -   @X_t
 MFFSCRN 11 . 10110 . 1001000111 -   @X_tb
 MFFSCRNI11 . 10111 ---.. 1001000111 -   @X_imm2
diff --git a/target/ppc/translate/fp-impl.c.inc 
b/target/ppc/translate/fp-impl.c.inc
index 4f4d57c611..d6231358f8 100644
--- a/target/ppc/translate/fp-impl.c.inc
+++ b/target/ppc/translate/fp-impl.c.inc
@@ -615,24 +615,6 @@ static void gen_mcrfs(DisasContext *ctx)
 tcg_temp_free_i64(tnew_fpscr);
 }
 
-/* mffs */
-static void gen_mffs(DisasContext *ctx)
-{
-TCGv_i64 t0;
-if (unlikely(!ctx->fpu_enabled)) {
-gen_exception(ctx, POWERPC_EXCP_FPU);
-return;
-}
-t0 = tcg_temp_new_i64();
-gen_reset_fpstatus();
-tcg_gen_extu_tl_i64(t0, cpu_fpscr);
-set_fpr(rD(ctx->opcode), t0);
-if (unlikely(Rc(ctx->opcode))) {
-gen_set_cr1_from_fpscr(ctx);
-}
-tcg_temp_free_i64(t0);
-}
-
 static TCGv_i64 place_from_fpscr(int rt, uint64_t mask)
 {
 TCGv_i64 fpscr = tcg_temp_new_i64();
@@ -660,6 +642,23 @@ static void store_fpscr_masked(TCGv_i64 fpscr, uint64_t 
clear_mask,
 tcg_temp_free_i64(fpscr_masked);
 }
 
+static bool trans_MFFS(DisasContext *ctx, arg_X_t_rc *a)
+{
+TCGv_i64 fpscr;
+
+REQUIRE_FPU(ctx);
+
+gen_reset_fpstatus();
+fpscr = place_from_fpscr(a->rt, UINT64_MAX);
+if (a->rc) {
+gen_set_cr1_from_fpscr(ctx);
+}
+
+tcg_temp_free_i64(fpscr);
+
+return true;
+}
+
 static bool trans_MFFSCE(DisasContext *ctx, arg_X_t *a)
 {
 TCGv_i64 fpscr;
diff --git a/target/ppc/translate/fp-ops.c.inc 
b/target/ppc/translate/fp-ops.c.inc
index f8c35124ae..1b65f5ab73 100644
--- a/target/ppc/translate/fp-ops.c.inc
+++ b/target/ppc/translate/fp-ops.c.inc
@@ -74,7 +74,6 @@ GEN_HANDLER_E(fcpsgn, 0x3F, 0x08, 0x00, 0x, PPC_NONE, 
PPC2_ISA205),
 GEN_HANDLER_E(fmrgew, 0x3F, 0x06, 0x1E, 0x0001, PPC_NONE, PPC2_VSX207),
 GEN_HANDLER_E(fmrgow, 0x3F, 0x06, 0x1A, 0x0001, PPC_NONE, PPC2_VSX207),
 GEN_HANDLER(mcrfs, 0x3F, 0x00, 0x02, 0x0063F801, PPC_FLOAT),
-GEN_HANDLER_E_2(mffs, 0x3F, 0x07, 0x12, 0x00, 0x, PPC_FLOAT, PPC_NONE),
 GEN_HANDLER(mtfsb0, 0x3F, 0x06, 0x02, 0x001FF800, PPC_FLOAT),
 GEN_HANDLER(mtfsb1, 0x3F, 0x06, 0x01, 0x001FF800, PPC_FLOAT),
 GEN_HANDLER(mtfsf, 0x3F, 0x07, 0x16, 0x, PPC_FLOAT),
-- 
2.25.1




[PATCH v3 03/11] target/ppc: Move mffsce to decodetree

2022-06-29 Thread Víctor Colombo
Signed-off-by: Víctor Colombo 
Reviewed-by: Matheus Ferst 
---
 target/ppc/insn32.decode   |  4 +++
 target/ppc/translate/fp-impl.c.inc | 46 +++---
 target/ppc/translate/fp-ops.c.inc  |  2 --
 3 files changed, 20 insertions(+), 32 deletions(-)

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 3b61c3a073..b6a7a3a3ff 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -94,6 +94,9 @@
 
 @X_tp_a_bp_rc   .. 0 ra:5 0 .. rc:1 _rc 
rt=%x_frtp rb=%x_frbp
 
+_trt
+@X_t.. rt:5 . . .. ._t
+
 _tb   rt rb
 @X_tb   .. rt:5 . rb:5 .. . _tb
 
@@ -339,6 +342,7 @@ SETNBCR 01 . . - 00 -   
@X_bi
 
 ### Move To/From FPSCR
 
+MFFSCE  11 . 1 - 1001000111 -   @X_t
 MFFSCRN 11 . 10110 . 1001000111 -   @X_tb
 MFFSCRNI11 . 10111 ---.. 1001000111 -   @X_imm2
 
diff --git a/target/ppc/translate/fp-impl.c.inc 
b/target/ppc/translate/fp-impl.c.inc
index bcb7ec2689..64e26b9b42 100644
--- a/target/ppc/translate/fp-impl.c.inc
+++ b/target/ppc/translate/fp-impl.c.inc
@@ -655,36 +655,6 @@ static void gen_mffsl(DisasContext *ctx)
 tcg_temp_free_i64(t0);
 }
 
-/* mffsce */
-static void gen_mffsce(DisasContext *ctx)
-{
-TCGv_i64 t0;
-TCGv_i32 mask;
-
-if (unlikely(!(ctx->insns_flags2 & PPC2_ISA300))) {
-return gen_mffs(ctx);
-}
-
-if (unlikely(!ctx->fpu_enabled)) {
-gen_exception(ctx, POWERPC_EXCP_FPU);
-return;
-}
-
-t0 = tcg_temp_new_i64();
-
-gen_reset_fpstatus();
-tcg_gen_extu_tl_i64(t0, cpu_fpscr);
-set_fpr(rD(ctx->opcode), t0);
-
-/* Clear exception enable bits in the FPSCR.  */
-tcg_gen_andi_i64(t0, t0, ~FP_ENABLES);
-mask = tcg_const_i32(0x0003);
-gen_helper_store_fpscr(cpu_env, t0, mask);
-
-tcg_temp_free_i32(mask);
-tcg_temp_free_i64(t0);
-}
-
 static TCGv_i64 place_from_fpscr(int rt, uint64_t mask)
 {
 TCGv_i64 fpscr = tcg_temp_new_i64();
@@ -712,6 +682,22 @@ static void store_fpscr_masked(TCGv_i64 fpscr, uint64_t 
clear_mask,
 tcg_temp_free_i64(fpscr_masked);
 }
 
+static bool trans_MFFSCE(DisasContext *ctx, arg_X_t *a)
+{
+TCGv_i64 fpscr;
+
+REQUIRE_INSNS_FLAGS2(ctx, ISA300);
+REQUIRE_FPU(ctx);
+
+gen_reset_fpstatus();
+fpscr = place_from_fpscr(a->rt, UINT64_MAX);
+store_fpscr_masked(fpscr, FP_ENABLES, tcg_constant_i64(0), 0x0003);
+
+tcg_temp_free_i64(fpscr);
+
+return true;
+}
+
 static bool trans_MFFSCRN(DisasContext *ctx, arg_X_tb *a)
 {
 TCGv_i64 t1, fpscr;
diff --git a/target/ppc/translate/fp-ops.c.inc 
b/target/ppc/translate/fp-ops.c.inc
index a27a1be9f5..a76943b8bf 100644
--- a/target/ppc/translate/fp-ops.c.inc
+++ b/target/ppc/translate/fp-ops.c.inc
@@ -75,8 +75,6 @@ GEN_HANDLER_E(fmrgew, 0x3F, 0x06, 0x1E, 0x0001, PPC_NONE, 
PPC2_VSX207),
 GEN_HANDLER_E(fmrgow, 0x3F, 0x06, 0x1A, 0x0001, PPC_NONE, PPC2_VSX207),
 GEN_HANDLER(mcrfs, 0x3F, 0x00, 0x02, 0x0063F801, PPC_FLOAT),
 GEN_HANDLER_E_2(mffs, 0x3F, 0x07, 0x12, 0x00, 0x, PPC_FLOAT, PPC_NONE),
-GEN_HANDLER_E_2(mffsce, 0x3F, 0x07, 0x12, 0x01, 0x, PPC_FLOAT,
-PPC2_ISA300),
 GEN_HANDLER_E_2(mffsl, 0x3F, 0x07, 0x12, 0x18, 0x, PPC_FLOAT,
 PPC2_ISA300),
 GEN_HANDLER(mtfsb0, 0x3F, 0x06, 0x02, 0x001FF800, PPC_FLOAT),
-- 
2.25.1




[PATCH v3 01/11] target/ppc: Fix insn32.decode style issues

2022-06-29 Thread Víctor Colombo
Some lines in insn32.decode have inconsistent alignment when compared
to others.
Fix this by changing the alignment of some lines, making it more
consistent throughout the file.

Signed-off-by: Víctor Colombo 
Reviewed-by: Richard Henderson 
---
 target/ppc/insn32.decode | 24 
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 6ea48d5163..8b723b5433 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -21,11 +21,11 @@
 @A  .. frt:5 fra:5 frb:5 frc:5 . rc:1   
 
   rt ra si:int64_t
-@D  .. rt:5 ra:5 si:s16 
+@D  .. rt:5 ra:5 si:s16 
 
 _bf   bf l:bool ra imm
-@D_bfs  .. bf:3 - l:1 ra:5 imm:s16  _bf
-@D_bfu  .. bf:3 - l:1 ra:5 imm:16   _bf
+@D_bfs  .. bf:3 . l:1 ra:5 imm:s16  _bf
+@D_bfu  .. bf:3 . l:1 ra:5 imm:16   _bf
 
 %dq_si  4:s12  !function=times_16
 %dq_rtp 22:4   !function=times_2
@@ -38,7 +38,7 @@
 @DQ_TSXP.. . ra:5    si=%dq_si 
rt=%rt_tsxp
 
 %ds_si  2:s14  !function=times_4
-@DS .. rt:5 ra:5 .. ..   si=%ds_si
+@DS .. rt:5 ra:5 .. ..   si=%ds_si
 
 %ds_rtp 22:4   !function=times_2
 @DS_rtp .. 0 ra:5 .. ..  rt=%ds_rtp 
si=%ds_si
@@ -49,10 +49,10 @@
 
  rt d
 %dx_d   6:s10 16:5 0:1
-@DX .. rt:5  . .. . .d=%dx_d
+@DX .. rt:5  . .. . .d=%dx_d
 
  vrt vra vrb rc
-@VA .. vrt:5 vra:5 vrb:5 rc:5 ..
+@VA .. vrt:5 vra:5 vrb:5 rc:5 ..
 
  vrt vra vrb rc:bool
 @VC .. vrt:5 vra:5 vrb:5 rc:1 ..
@@ -61,7 +61,7 @@
 @VN .. vrt:5 vra:5 vrb:5 .. sh:3 .. 
 
  vrt vra vrb
-@VX .. vrt:5 vra:5 vrb:5 .. .   
+@VX .. vrt:5 vra:5 vrb:5 .. .   
 
 _bf  bf vra vrb
 @VX_bf  .. bf:3 .. vra:5 vrb:5 ...  _bf
@@ -76,13 +76,13 @@
 @VX_tb_rc   .. vrt:5 . vrb:5 rc:1 .._tb_rc
 
 _uim4vrt uim vrb
-@VX_uim4.. vrt:5 . uim:4 vrb:5 ...  _uim4
+@VX_uim4.. vrt:5 . uim:4 vrb:5 ...  _uim4
 
 _tb  vrt vrb
-@VX_tb  .. vrt:5 . vrb:5 ..._tb
+@VX_tb  .. vrt:5 . vrb:5 ..._tb
 
   rt ra rb
-@X  .. rt:5 ra:5 rb:5 .. .  
+@X  .. rt:5 ra:5 rb:5 .. .  
 
 _rc   rt ra rb rc:bool
 @X_rc   .. rt:5 ra:5 rb:5 .. rc:1   _rc
@@ -107,7 +107,7 @@
 @X_t_bp_rc  .. rt:5 . 0 .. rc:1 _tb_rc 
rb=%x_frbp
 
 _bi   rt bi
-@X_bi   .. rt:5 bi:5 - .. - _bi
+@X_bi   .. rt:5 bi:5 . .. . _bi
 
 _bf   bf ra rb
 @X_bf   .. bf:3 .. ra:5 rb:5 .. .   _bf
@@ -122,7 +122,7 @@
 @X_bf_uim_bp.. bf:3 . uim:6 0 .. .  _bf_uim 
rb=%x_frbp
 
 _bfl  bf l:bool ra rb
-@X_bfl  .. bf:3 - l:1 ra:5 rb:5 ..- _bfl
+@X_bfl  .. bf:3 . l:1 ra:5 rb:5 .. ._bfl
 
 %x_xt   0:1 21:5
 _imm5 xt imm:uint8_t vrb
-- 
2.25.1




[PATCH v3 00/11] target/ppc: BCDA and mffscdrn implementations

2022-06-29 Thread Víctor Colombo
Hello everyone,

Set of patches containing implementations for some instructions that
were missing before. Also, moves some related instructions to
decodetree. Add mffsce test.

v3:
- Rebase on master
- Add r-b

v2:
- Added R-b on patches 1, 8, 10, and 11. Dropped the R-b on some
  of the patches as there were big changes on them.
- Fixed addg6s issues
- Separated do_mffsc in two different, more specialized functions
- Changed mffs* patches order to make it more readable, as suggested
  by Richard
- Added a new patch with a test for the mffsce instruction

Matheus Ferst (4):
  target/ppc: Add flag for ISA v2.06 BCDA instructions
  target/ppc: implement addg6s
  target/ppc: implement cbcdtd
  target/ppc: implement cdtbcd

Víctor Colombo (7):
  target/ppc: Fix insn32.decode style issues
  target/ppc: Move mffscrn[i] to decodetree
  target/ppc: Move mffsce to decodetree
  target/ppc: Move mffsl to decodetree
  target/ppc: Move mffs[.] to decodetree
  target/ppc: Implement mffscdrn[i] instructions
  tests/tcg/ppc64: Add mffsce test

 target/ppc/cpu.h   |   5 +-
 target/ppc/cpu_init.c  |  10 +-
 target/ppc/dfp_helper.c|  65 +++
 target/ppc/helper.h|   2 +
 target/ppc/insn32.decode   |  55 --
 target/ppc/internal.h  |   3 -
 target/ppc/translate/fixedpoint-impl.c.inc |  51 ++
 target/ppc/translate/fp-impl.c.inc | 203 -
 target/ppc/translate/fp-ops.c.inc  |   9 -
 tests/tcg/ppc64/Makefile.target|   1 +
 tests/tcg/ppc64le/Makefile.target  |   1 +
 tests/tcg/ppc64le/mffsce.c |  37 
 12 files changed, 322 insertions(+), 120 deletions(-)
 create mode 100644 tests/tcg/ppc64le/mffsce.c

-- 
2.25.1




Re: [PATCH 7/7] target/ppc: use int128.h methods in vsubcuq

2022-06-27 Thread Víctor Colombo
_FLAGS2(ALTIVEC_207, VPMSUMD, do_vx_helper, gen_helper_VPMSUMD)

+TRANS_FLAGS2(ALTIVEC_207, VSUBCUQ, do_vx_helper, gen_helper_VSUBCUQ)
  TRANS_FLAGS2(ALTIVEC_207, VSUBUQM, do_vx_helper, gen_helper_VSUBUQM)

  static bool do_vx_vmuleo(DisasContext *ctx, arg_VX *a, bool even,
diff --git a/target/ppc/translate/vmx-ops.c.inc 
b/target/ppc/translate/vmx-ops.c.inc
index 9395806f3d..a3a0fd0650 100644
--- a/target/ppc/translate/vmx-ops.c.inc
+++ b/target/ppc/translate/vmx-ops.c.inc
@@ -127,7 +127,7 @@ GEN_VXFORM_DUAL(vsubsbs, bcdtrunc, 0, 28, PPC_ALTIVEC, 
PPC2_ISA300),
  GEN_VXFORM(vsubshs, 0, 29),
  GEN_VXFORM_DUAL(vsubsws, xpnd04_2, 0, 30, PPC_ALTIVEC, PPC_NONE),
  GEN_VXFORM_300(bcdtrunc, 0, 20),
-GEN_VXFORM_DUAL(vsubcuq, bcdutrunc, 0, 21, PPC2_ALTIVEC_207, PPC2_ISA300),
+GEN_VXFORM_300(bcdutrunc, 0, 21),
  GEN_VXFORM(vsl, 2, 7),
  GEN_VXFORM(vsr, 2, 11),
  GEN_VXFORM(vpkuhum, 7, 0),
--
2.25.1




Reviewed-by: Víctor Colombo 

--
Víctor Cora Colombo
Instituto de Pesquisas ELDORADO
Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>



Re: [PATCH 5/7] target/ppc: use int128.h methods in vsubuqm

2022-06-27 Thread Víctor Colombo
-
2.25.1




Reviewed-by: Víctor Colombo 

--
Víctor Cora Colombo
Instituto de Pesquisas ELDORADO
Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>



Re: [PATCH 2/7] target/ppc: use int128.h methods in vadduqm

2022-06-27 Thread Víctor Colombo

On 06/06/2022 12:00, Matheus Ferst wrote:

And also move the insn to decodetree.

Signed-off-by: Matheus Ferst 
---
  target/ppc/helper.h | 2 +-
  target/ppc/insn32.decode| 2 ++
  target/ppc/int_helper.c | 8 ++--
  target/ppc/translate/vmx-impl.c.inc | 3 ++-
  target/ppc/translate/vmx-ops.c.inc  | 1 -
  5 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 39ad114c97..c6fbe4b6da 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -204,7 +204,7 @@ DEF_HELPER_FLAGS_5(vadduws, TCG_CALL_NO_RWG, void, avr, 
avr, avr, avr, i32)
  DEF_HELPER_FLAGS_5(vsububs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
  DEF_HELPER_FLAGS_5(vsubuhs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
  DEF_HELPER_FLAGS_5(vsubuws, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
-DEF_HELPER_FLAGS_3(vadduqm, TCG_CALL_NO_RWG, void, avr, avr, avr)
+DEF_HELPER_FLAGS_3(VADDUQM, TCG_CALL_NO_RWG, void, avr, avr, avr)
  DEF_HELPER_FLAGS_4(vaddecuq, TCG_CALL_NO_RWG, void, avr, avr, avr, avr)
  DEF_HELPER_FLAGS_4(vaddeuqm, TCG_CALL_NO_RWG, void, avr, avr, avr, avr)
  DEF_HELPER_FLAGS_3(vaddcuq, TCG_CALL_NO_RWG, void, avr, avr, avr)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 0772729c6e..d6bfc2c768 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -550,6 +550,8 @@ VRLQNM  000100 . . . 00101000101@VX

  ## Vector Integer Arithmetic Instructions

+VADDUQM 000100 . . . 001@VX
+
  VEXTSB2W000100 . 1 . 1100010@VX_tb
  VEXTSH2W000100 . 10001 . 1100010@VX_tb
  VEXTSB2D000100 . 11000 . 1100010@VX_tb
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index 67aaa8edf5..c32b252639 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -2224,13 +2224,9 @@ static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, 
ppc_avr_t b)

  #endif

-void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
+void helper_VADDUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
  {
-#ifdef CONFIG_INT128
-r->u128 = a->u128 + b->u128;
-#else
-avr_qw_add(r, *a, *b);
-#endif
+r->s128 = int128_add(a->s128, b->s128);
  }

  void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
diff --git a/target/ppc/translate/vmx-impl.c.inc 
b/target/ppc/translate/vmx-impl.c.inc
index 4c2a36405b..3fb48404d9 100644
--- a/target/ppc/translate/vmx-impl.c.inc
+++ b/target/ppc/translate/vmx-impl.c.inc
@@ -1234,7 +1234,6 @@ GEN_VXFORM_SAT(vsubuws, MO_32, sub, ussub, 0, 26);
  GEN_VXFORM_SAT(vsubsbs, MO_8, sub, sssub, 0, 28);
  GEN_VXFORM_SAT(vsubshs, MO_16, sub, sssub, 0, 29);
  GEN_VXFORM_SAT(vsubsws, MO_32, sub, sssub, 0, 30);
-GEN_VXFORM(vadduqm, 0, 4);
  GEN_VXFORM(vaddcuq, 0, 5);
  GEN_VXFORM3(vaddeuqm, 30, 0);
  GEN_VXFORM3(vaddecuq, 30, 0);
@@ -3100,6 +3099,8 @@ static bool do_vx_helper(DisasContext *ctx, arg_VX *a,
  return true;
  }

+TRANS_FLAGS2(ALTIVEC_207, VADDUQM, do_vx_helper, gen_helper_VADDUQM)
+
  TRANS_FLAGS2(ALTIVEC_207, VPMSUMD, do_vx_helper, gen_helper_VPMSUMD)

  static bool do_vx_vmuleo(DisasContext *ctx, arg_VX *a, bool even,
diff --git a/target/ppc/translate/vmx-ops.c.inc 
b/target/ppc/translate/vmx-ops.c.inc
index 26c1d957ee..065b0ba414 100644
--- a/target/ppc/translate/vmx-ops.c.inc
+++ b/target/ppc/translate/vmx-ops.c.inc
@@ -126,7 +126,6 @@ GEN_VXFORM(vsubuws, 0, 26),
  GEN_VXFORM_DUAL(vsubsbs, bcdtrunc, 0, 28, PPC_ALTIVEC, PPC2_ISA300),
  GEN_VXFORM(vsubshs, 0, 29),
  GEN_VXFORM_DUAL(vsubsws, xpnd04_2, 0, 30, PPC_ALTIVEC, PPC_NONE),
-GEN_VXFORM_207(vadduqm, 0, 4),
  GEN_VXFORM_207(vaddcuq, 0, 5),
  GEN_VXFORM_DUAL(vaddeuqm, vaddecuq, 30, 0xFF, PPC_NONE, PPC2_ALTIVEC_207),
  GEN_VXFORM_DUAL(vsubuqm, bcdtrunc, 0, 20, PPC2_ALTIVEC_207, PPC2_ISA300),
--
2.25.1




Reviewed-by: Víctor Colombo 

--
Víctor Cora Colombo
Instituto de Pesquisas ELDORADO
Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>



Re: [PATCH 1/7] target/ppc: use int128.h methods in vpmsumd

2022-06-27 Thread Víctor Colombo

On 06/06/2022 12:00, Matheus Ferst wrote:

[E-MAIL EXTERNO] Não clique em links ou abra anexos, a menos que você possa 
confirmar o remetente e saber que o conteúdo é seguro. Em caso de e-mail 
suspeito entre imediatamente em contato com o DTI.

Also drop VECTOR_FOR_INORDER_I usage since there is no need to access
the elements in any particular order, and move the instruction to
decodetree.

Signed-off-by: Matheus Ferst 


Reviewed-by: Víctor Colombo 


--
Víctor Cora Colombo
Instituto de Pesquisas ELDORADO
Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>



Re: [PATCH 4/7] target/ppc: use int128.h methods in vaddcuq

2022-06-27 Thread Víctor Colombo

On 06/06/2022 12:00, Matheus Ferst wrote:

And also move the insn to decodetree.

Signed-off-by: Matheus Ferst 
---
  target/ppc/helper.h |  2 +-
  target/ppc/insn32.decode|  1 +
  target/ppc/int_helper.c | 12 ++--
  target/ppc/translate/vmx-impl.c.inc |  2 +-
  target/ppc/translate/vmx-ops.c.inc  |  1 -
  5 files changed, 5 insertions(+), 13 deletions(-)

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index f699adbedc..f6b1b2fad2 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -207,7 +207,7 @@ DEF_HELPER_FLAGS_5(vsubuws, TCG_CALL_NO_RWG, void, avr, 
avr, avr, avr, i32)
  DEF_HELPER_FLAGS_3(VADDUQM, TCG_CALL_NO_RWG, void, avr, avr, avr)
  DEF_HELPER_FLAGS_4(VADDECUQ, TCG_CALL_NO_RWG, void, avr, avr, avr, avr)
  DEF_HELPER_FLAGS_4(VADDEUQM, TCG_CALL_NO_RWG, void, avr, avr, avr, avr)
-DEF_HELPER_FLAGS_3(vaddcuq, TCG_CALL_NO_RWG, void, avr, avr, avr)
+DEF_HELPER_FLAGS_3(VADDCUQ, TCG_CALL_NO_RWG, void, avr, avr, avr)
  DEF_HELPER_FLAGS_3(vsubuqm, TCG_CALL_NO_RWG, void, avr, avr, avr)
  DEF_HELPER_FLAGS_4(vsubecuq, TCG_CALL_NO_RWG, void, avr, avr, avr, avr)
  DEF_HELPER_FLAGS_4(vsubeuqm, TCG_CALL_NO_RWG, void, avr, avr, avr, avr)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 139aa3caeb..35252ddd4f 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -550,6 +550,7 @@ VRLQNM  000100 . . . 00101000101@VX

  ## Vector Integer Arithmetic Instructions

+VADDCUQ 000100 . . . 0010100@VX
  VADDUQM 000100 . . . 001@VX

  VADDEUQM000100 . . . . 00   @VA
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index c5d820f4b1..a12f2831ac 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -2225,18 +2225,10 @@ void helper_VADDEUQM(ppc_avr_t *r, ppc_avr_t *a, 
ppc_avr_t *b, ppc_avr_t *c)
   int128_make64(int128_getlo(c->s128) & 1));
  }

-void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
+void helper_VADDCUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
  {
-#ifdef CONFIG_INT128
-r->u128 = (~a->u128 < b->u128);
-#else
-ppc_avr_t not_a;
-
-avr_qw_not(_a, *a);
-
+r->VsrD(1) = int128_ult(int128_not(a->s128), b->s128);
  r->VsrD(0) = 0;
-r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0);
-#endif
  }

  void helper_VADDECUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
diff --git a/target/ppc/translate/vmx-impl.c.inc 
b/target/ppc/translate/vmx-impl.c.inc
index 4ec6b841b3..8c0e5bcc03 100644
--- a/target/ppc/translate/vmx-impl.c.inc
+++ b/target/ppc/translate/vmx-impl.c.inc
@@ -1234,7 +1234,6 @@ GEN_VXFORM_SAT(vsubuws, MO_32, sub, ussub, 0, 26);
  GEN_VXFORM_SAT(vsubsbs, MO_8, sub, sssub, 0, 28);
  GEN_VXFORM_SAT(vsubshs, MO_16, sub, sssub, 0, 29);
  GEN_VXFORM_SAT(vsubsws, MO_32, sub, sssub, 0, 30);
-GEN_VXFORM(vaddcuq, 0, 5);
  GEN_VXFORM(vsubuqm, 0, 20);
  GEN_VXFORM(vsubcuq, 0, 21);
  GEN_VXFORM3(vsubeuqm, 31, 0);
@@ -3098,6 +3097,7 @@ static bool do_vx_helper(DisasContext *ctx, arg_VX *a,
  return true;
  }

+TRANS_FLAGS2(ALTIVEC_207, VADDCUQ, do_vx_helper, gen_helper_VADDCUQ)
  TRANS_FLAGS2(ALTIVEC_207, VADDUQM, do_vx_helper, gen_helper_VADDUQM)

  TRANS_FLAGS2(ALTIVEC_207, VPMSUMD, do_vx_helper, gen_helper_VPMSUMD)
diff --git a/target/ppc/translate/vmx-ops.c.inc 
b/target/ppc/translate/vmx-ops.c.inc
index f8a512f920..33e05929cb 100644
--- a/target/ppc/translate/vmx-ops.c.inc
+++ b/target/ppc/translate/vmx-ops.c.inc
@@ -126,7 +126,6 @@ GEN_VXFORM(vsubuws, 0, 26),
  GEN_VXFORM_DUAL(vsubsbs, bcdtrunc, 0, 28, PPC_ALTIVEC, PPC2_ISA300),
  GEN_VXFORM(vsubshs, 0, 29),
  GEN_VXFORM_DUAL(vsubsws, xpnd04_2, 0, 30, PPC_ALTIVEC, PPC_NONE),
-GEN_VXFORM_207(vaddcuq, 0, 5),
  GEN_VXFORM_DUAL(vsubuqm, bcdtrunc, 0, 20, PPC2_ALTIVEC_207, PPC2_ISA300),
  GEN_VXFORM_DUAL(vsubcuq, bcdutrunc, 0, 21, PPC2_ALTIVEC_207, PPC2_ISA300),
  GEN_VXFORM_DUAL(vsubeuqm, vsubecuq, 31, 0xFF, PPC_NONE, PPC2_ALTIVEC_207),
--
2.25.1




Reviewed-by: Víctor Colombo 

--
Víctor Cora Colombo
Instituto de Pesquisas ELDORADO
Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>



Re: [PATCH 6/7] target/ppc: use int128.h methods in vsubecuq and vsubeuqm

2022-06-27 Thread Víctor Colombo
ANS_FLAGS2(ALTIVEC_207, VADDEUQM, do_va_helper, gen_helper_VADDEUQM)

+TRANS_FLAGS2(ALTIVEC_207, VSUBEUQM, do_va_helper, gen_helper_VSUBEUQM)
+TRANS_FLAGS2(ALTIVEC_207, VSUBECUQ, do_va_helper, gen_helper_VSUBECUQ)
+
  TRANS_FLAGS(ALTIVEC, VPERM, do_va_helper, gen_helper_VPERM)
  TRANS_FLAGS2(ISA300, VPERMR, do_va_helper, gen_helper_VPERMR)

diff --git a/target/ppc/translate/vmx-ops.c.inc 
b/target/ppc/translate/vmx-ops.c.inc
index 9feef9afee..9395806f3d 100644
--- a/target/ppc/translate/vmx-ops.c.inc
+++ b/target/ppc/translate/vmx-ops.c.inc
@@ -128,7 +128,6 @@ GEN_VXFORM(vsubshs, 0, 29),
  GEN_VXFORM_DUAL(vsubsws, xpnd04_2, 0, 30, PPC_ALTIVEC, PPC_NONE),
  GEN_VXFORM_300(bcdtrunc, 0, 20),
  GEN_VXFORM_DUAL(vsubcuq, bcdutrunc, 0, 21, PPC2_ALTIVEC_207, PPC2_ISA300),
-GEN_VXFORM_DUAL(vsubeuqm, vsubecuq, 31, 0xFF, PPC_NONE, PPC2_ALTIVEC_207),
  GEN_VXFORM(vsl, 2, 7),
  GEN_VXFORM(vsr, 2, 11),
  GEN_VXFORM(vpkuhum, 7, 0),
--
2.25.1




Reviewed-by: Víctor Colombo 

--
Víctor Cora Colombo
Instituto de Pesquisas ELDORADO
Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>



Re: [PATCH 3/7] target/ppc: use int128.h methods in vaddecuq and vaddeuqm

2022-06-27 Thread Víctor Colombo
ec6b841b3 100644
--- a/target/ppc/translate/vmx-impl.c.inc
+++ b/target/ppc/translate/vmx-impl.c.inc
@@ -1235,10 +1235,6 @@ GEN_VXFORM_SAT(vsubsbs, MO_8, sub, sssub, 0, 28);
  GEN_VXFORM_SAT(vsubshs, MO_16, sub, sssub, 0, 29);
  GEN_VXFORM_SAT(vsubsws, MO_32, sub, sssub, 0, 30);
  GEN_VXFORM(vaddcuq, 0, 5);
-GEN_VXFORM3(vaddeuqm, 30, 0);
-GEN_VXFORM3(vaddecuq, 30, 0);
-GEN_VXFORM_DUAL(vaddeuqm, PPC_NONE, PPC2_ALTIVEC_207, \
-vaddecuq, PPC_NONE, PPC2_ALTIVEC_207)
  GEN_VXFORM(vsubuqm, 0, 20);
  GEN_VXFORM(vsubcuq, 0, 21);
  GEN_VXFORM3(vsubeuqm, 31, 0);
@@ -2571,6 +2567,9 @@ static bool do_va_helper(DisasContext *ctx, arg_VA *a,
  return true;
  }

+TRANS_FLAGS2(ALTIVEC_207, VADDECUQ, do_va_helper, gen_helper_VADDECUQ)
+TRANS_FLAGS2(ALTIVEC_207, VADDEUQM, do_va_helper, gen_helper_VADDEUQM)
+
  TRANS_FLAGS(ALTIVEC, VPERM, do_va_helper, gen_helper_VPERM)
  TRANS_FLAGS2(ISA300, VPERMR, do_va_helper, gen_helper_VPERMR)

diff --git a/target/ppc/translate/vmx-ops.c.inc 
b/target/ppc/translate/vmx-ops.c.inc
index 065b0ba414..f8a512f920 100644
--- a/target/ppc/translate/vmx-ops.c.inc
+++ b/target/ppc/translate/vmx-ops.c.inc
@@ -127,7 +127,6 @@ GEN_VXFORM_DUAL(vsubsbs, bcdtrunc, 0, 28, PPC_ALTIVEC, 
PPC2_ISA300),
  GEN_VXFORM(vsubshs, 0, 29),
  GEN_VXFORM_DUAL(vsubsws, xpnd04_2, 0, 30, PPC_ALTIVEC, PPC_NONE),
  GEN_VXFORM_207(vaddcuq, 0, 5),
-GEN_VXFORM_DUAL(vaddeuqm, vaddecuq, 30, 0xFF, PPC_NONE, PPC2_ALTIVEC_207),
  GEN_VXFORM_DUAL(vsubuqm, bcdtrunc, 0, 20, PPC2_ALTIVEC_207, PPC2_ISA300),
  GEN_VXFORM_DUAL(vsubcuq, bcdutrunc, 0, 21, PPC2_ALTIVEC_207, PPC2_ISA300),
  GEN_VXFORM_DUAL(vsubeuqm, vsubecuq, 31, 0xFF, PPC_NONE, PPC2_ALTIVEC_207),
--
2.25.1




Reviewed-by: Víctor Colombo 

--
Víctor Cora Colombo
Instituto de Pesquisas ELDORADO
Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>



Re: [PATCH] target/riscv: fix user-mode build issue because mhartid

2022-06-27 Thread Víctor Colombo

On 27/06/2022 06:40, Rahul Pathak wrote:

mhartid csr is not available in user-mode code path and
user-mode build fails because of its reference in
riscv_cpu_realize function

Signed-off-by: Rahul Pathak 
---
  target/riscv/cpu.c | 6 ++
  1 file changed, 6 insertions(+)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 0a794ef622..03f23d4b6d 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -643,9 +643,15 @@ static void riscv_cpu_realize(DeviceState *dev, Error 
**errp)
  if (isa_ext_is_enabled(cpu, _edata_arr[i]) &&
  (env->priv_ver < isa_edata_arr[i].min_version)) {
  isa_ext_update_enabled(cpu, _edata_arr[i], false);
+#ifndef CONFIG_USER_ONLY
  warn_report("disabling %s extension for hart 0x%lx because "
  "privilege spec version does not match",
  isa_edata_arr[i].name, (unsigned long)env->mhartid);
+#else
+warn_report("disabling %s extension for hart 0x%lx because "
+"privilege spec version does not match",
+isa_edata_arr[i].name);


Hello, Rahul

Looks like you removed the second argument but didn't update the format
string. The second format specifier is still there.


+#endif
  }
  }

--
2.34.1




Best regards,

--
Víctor Cora Colombo
Instituto de Pesquisas ELDORADO
Aviso Legal - Disclaimer 



[PATCH] target/ppc: Change FPSCR_* to follow POWER ISA numbering convention

2022-06-22 Thread Víctor Colombo
FPSCR_* bit values in QEMU are in the 'inverted' order from what Power
ISA defines (e.g. FPSCR.FI is bit 46 but is defined as 17 in cpu.h).
Now that PPC_BIT_NR macro was introduced to fix this situation for the
MSR bits, we can use it for the FPSCR bits too.

Also, adjust the comments to make then fit in 80 columns

Signed-off-by: Víctor Colombo 
---
 target/ppc/cpu.h | 72 
 1 file changed, 36 insertions(+), 36 deletions(-)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 6d78078f37..c78f64cced 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -694,42 +694,42 @@ enum {
 
 /*/
 /* Floating point status and control register*/
-#define FPSCR_DRN2   34 /* Decimal Floating-Point rounding control   */
-#define FPSCR_DRN1   33 /* Decimal Floating-Point rounding control   */
-#define FPSCR_DRN0   32 /* Decimal Floating-Point rounding control   */
-#define FPSCR_FX 31 /* Floating-point exception summary  */
-#define FPSCR_FEX30 /* Floating-point enabled exception summary  */
-#define FPSCR_VX 29 /* Floating-point invalid operation exception summ.  */
-#define FPSCR_OX 28 /* Floating-point overflow exception */
-#define FPSCR_UX 27 /* Floating-point underflow exception*/
-#define FPSCR_ZX 26 /* Floating-point zero divide exception  */
-#define FPSCR_XX 25 /* Floating-point inexact exception  */
-#define FPSCR_VXSNAN 24 /* Floating-point invalid operation exception (sNan) */
-#define FPSCR_VXISI  23 /* Floating-point invalid operation exception (inf)  */
-#define FPSCR_VXIDI  22 /* Floating-point invalid operation exception (inf)  */
-#define FPSCR_VXZDZ  21 /* Floating-point invalid operation exception (zero) */
-#define FPSCR_VXIMZ  20 /* Floating-point invalid operation exception (inf)  */
-#define FPSCR_VXVC   19 /* Floating-point invalid operation exception (comp) */
-#define FPSCR_FR 18 /* Floating-point fraction rounded   */
-#define FPSCR_FI 17 /* Floating-point fraction inexact   */
-#define FPSCR_C  16 /* Floating-point result class descriptor*/
-#define FPSCR_FL 15 /* Floating-point less than or negative  */
-#define FPSCR_FG 14 /* Floating-point greater than or negative   */
-#define FPSCR_FE 13 /* Floating-point equal or zero  */
-#define FPSCR_FU 12 /* Floating-point unordered or NaN   */
-#define FPSCR_FPCC   12 /* Floating-point condition code */
-#define FPSCR_FPRF   12 /* Floating-point result flags   */
-#define FPSCR_VXSOFT 10 /* Floating-point invalid operation exception (soft) */
-#define FPSCR_VXSQRT 9  /* Floating-point invalid operation exception (sqrt) */
-#define FPSCR_VXCVI  8  /* Floating-point invalid operation exception (int)  */
-#define FPSCR_VE 7  /* Floating-point invalid operation exception enable */
-#define FPSCR_OE 6  /* Floating-point overflow exception enable  */
-#define FPSCR_UE 5  /* Floating-point underflow exception enable  
*/
-#define FPSCR_ZE 4  /* Floating-point zero divide exception enable   */
-#define FPSCR_XE 3  /* Floating-point inexact exception enable   */
-#define FPSCR_NI 2  /* Floating-point non-IEEE mode  */
-#define FPSCR_RN11
-#define FPSCR_RN00  /* Floating-point rounding control   */
+#define FPSCR_DRN2   PPC_BIT_NR(29) /* Decimal Floating-Point rounding ctrl. */
+#define FPSCR_DRN1   PPC_BIT_NR(30) /* Decimal Floating-Point rounding ctrl. */
+#define FPSCR_DRN0   PPC_BIT_NR(31) /* Decimal Floating-Point rounding ctrl. */
+#define FPSCR_FX PPC_BIT_NR(32) /* Floating-point exception summary  */
+#define FPSCR_FEXPPC_BIT_NR(33) /* Floating-point enabled exception summ.*/
+#define FPSCR_VX PPC_BIT_NR(34) /* Floating-point invalid op. excp. summ.*/
+#define FPSCR_OX PPC_BIT_NR(35) /* Floating-point overflow exception */
+#define FPSCR_UX PPC_BIT_NR(36) /* Floating-point underflow exceptio */
+#define FPSCR_ZX PPC_BIT_NR(37) /* Floating-point zero divide exception  */
+#define FPSCR_XX PPC_BIT_NR(38) /* Floating-point inexact exception  */
+#define FPSCR_VXSNAN PPC_BIT_NR(39) /* Floating-point invalid op. excp (sNan)*/
+#define FPSCR_VXISI  PPC_BIT_NR(40) /* Floating-point invalid op. excp (inf) */
+#define FPSCR_VXIDI  PPC_BIT_NR(41) /* Floating-point invalid op. excp (inf) */
+#define FPSCR_VXZDZ  PPC_BIT_NR(42) /* Floating-point invalid op. excp (zero)*/
+#define FPSCR_VXIMZ  PPC_BIT_NR(43) /* Floating-point invalid op. excp (inf) */
+#define FPSCR_VXVC   PPC_BIT_NR(44) /* Floating-point invalid op. excp (comp)*/
+#define FPSCR_FR PPC_BIT_NR(45

[RFC PATCH] target/ppc: Implement hashst(p) and hashchk(p) instructions

2022-06-22 Thread Víctor Colombo
Implementation for instructions hashst, hashchk, and its privileged
versions.

It was decided to implement the hash algorithm from ground up in this
patch exactly as described in Power ISA.

Signed-off-by: Víctor Colombo 

---

RFC because I need some feedback on if the approach of implementing
the algorithm is a good idea. Also, it seems that the kernel/kvm are
not implementing the necessary registers?

What do you think about the choice to implement the hash algorithm
from the ground up, following the SIMON-like algorithm presented in
Power ISA? IIUC, this algorithm is not the same as the original[1].
Other options would be to use other algorithm already implemented
in QEMU, or even make this instruction a nop for all Power versions.

Based-on: ppc-next, with 
<20220620210540.112153-1-leandro.lup...@eldorado.org.br> and
<20220615192006.3075821-1-lucas.couti...@eldorado.org.br>
applied on top. The relevant patches are:
[PATCH 01/11] target/ppc: receive DisasContext explicitly in GEN_PRIV
[PATCH 02/11] target/ppc: add macros to check privilege level

[1] https://eprint.iacr.org/2013/404.pdf
---
 linux-headers/asm-powerpc/kvm.h|  3 +
 target/ppc/cpu.h   |  2 +
 target/ppc/cpu_init.c  |  7 ++
 target/ppc/excp_helper.c   | 82 ++
 target/ppc/helper.h|  4 ++
 target/ppc/insn32.decode   | 10 +++
 target/ppc/translate.c |  5 ++
 target/ppc/translate/fixedpoint-impl.c.inc | 34 +
 8 files changed, 147 insertions(+)

diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h
index 9f18fa090f..4ae4718143 100644
--- a/linux-headers/asm-powerpc/kvm.h
+++ b/linux-headers/asm-powerpc/kvm.h
@@ -646,6 +646,9 @@ struct kvm_ppc_cpu_char {
 #define KVM_REG_PPC_SIER3  (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc3)
 #define KVM_REG_PPC_DAWR1  (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc4)
 #define KVM_REG_PPC_DAWRX1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc5)
+/* FIXME: KVM hasn't exposed these registers yet */
+#define KVM_REG_PPC_HASHKEYR(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x00)
+#define KVM_REG_PPC_HASHPKEYR   (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x00)
 
 /* Transactional Memory checkpointed state:
  * This is all GPRs, all VSX regs and a subset of SPRs
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 6d78078f37..d982b779ca 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -1665,6 +1665,8 @@ void ppc_compat_add_property(Object *obj, const char 
*name,
 #define SPR_BOOKE_GIVOR14 (0x1BD)
 #define SPR_TIR   (0x1BE)
 #define SPR_PTCR  (0x1D0)
+#define SPR_POWER_HASHKEYR(0x1D4)
+#define SPR_POWER_HASHPKEYR   (0x1D5)
 #define SPR_BOOKE_SPEFSCR (0x200)
 #define SPR_Exxx_BBEAR(0x201)
 #define SPR_Exxx_BBTAR(0x202)
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index b802bbb641..3ec9e4a27f 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -6484,6 +6484,13 @@ static void init_proc_POWER10(CPUPPCState *env)
 spr_read_generic, spr_write_generic,
 KVM_REG_PPC_PSSCR, 0);
 
+spr_register_kvm(env, SPR_POWER_HASHKEYR, "HASHPKEYR",
+SPR_NOACCESS, SPR_NOACCESS, _read_generic, _write_generic,
+KVM_REG_PPC_HASHKEYR, 0x0);
+spr_register_kvm(env, SPR_POWER_HASHPKEYR, "HASHPKEYR",
+SPR_NOACCESS, SPR_NOACCESS, _read_generic, _write_generic,
+KVM_REG_PPC_HASHPKEYR, 0x0);
+
 /* env variables */
 env->dcache_line_size = 128;
 env->icache_line_size = 128;
diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index cb752b184a..0998e8374e 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -2174,6 +2174,88 @@ void helper_td(CPUPPCState *env, target_ulong arg1, 
target_ulong arg2,
 #endif
 #endif
 
+static uint32_t helper_SIMON_LIKE_32_64(uint32_t x, uint64_t key, uint32_t 
lane)
+{
+const uint16_t c = 0xfffc;
+const uint64_t z0 = 0xfa2561cdf44ac398ULL;
+uint16_t z = 0, temp;
+uint16_t k[32], eff_k[32], xleft[33], xright[33], fxleft[32];
+
+for (int i = 3; i >= 0; i--) {
+k[i] = key & 0x;
+key >>= 16;
+}
+xleft[0] = x & 0x;
+xright[0] = (x >> 16) & 0x;
+
+for (int i = 0; i < 28; i++) {
+z = (z0 >> (63 - i)) & 1;
+temp = ror16(k[i + 3], 3) ^ k[i + 1];
+k[i + 4] = c ^ z ^ k[i] ^ temp ^ ror16(temp, 1);
+}
+
+for (int i = 0; i < 8; i++) {
+eff_k[4 * i + 0] = k[4 * i + ((0 + lane) % 4)];
+eff_k[4 * i + 1] = k[4 * i + ((1 + lane) % 4)];
+eff_k[4 * i + 2] = k[4 * i + ((2 + lane) % 4)];
+eff_k[4 * i + 3] = k[4 * i + ((3 + lane) % 4)];
+}
+
+for (int i = 0; i < 32; i++) {
+fxleft[i] = (rol16(xleft[i], 1) &
+rol16(xle

[PATCH RESEND v2 11/11] target/ppc: implement cdtbcd

2022-06-10 Thread Víctor Colombo
From: Matheus Ferst 

Implements the Convert Declets To Binary Coded Decimal instruction.
Since libdecnumber doesn't expose the methods for direct conversion
(decDigitsFromDPD, DPD2BCD, etc), a positive decimal32 with zero
exponent is used as an intermediate value to convert the declets.

Reviewed-by: Richard Henderson 
Signed-off-by: Matheus Ferst 
Signed-off-by: Víctor Colombo 
---
 target/ppc/dfp_helper.c| 26 ++
 target/ppc/helper.h|  1 +
 target/ppc/insn32.decode   |  1 +
 target/ppc/translate/fixedpoint-impl.c.inc |  7 ++
 4 files changed, 35 insertions(+)

diff --git a/target/ppc/dfp_helper.c b/target/ppc/dfp_helper.c
index db9e994c8c..5ba74b2124 100644
--- a/target/ppc/dfp_helper.c
+++ b/target/ppc/dfp_helper.c
@@ -1392,6 +1392,32 @@ DFP_HELPER_SHIFT(DSCLIQ, 128, 1)
 DFP_HELPER_SHIFT(DSCRI, 64, 0)
 DFP_HELPER_SHIFT(DSCRIQ, 128, 0)
 
+target_ulong helper_CDTBCD(target_ulong s)
+{
+uint64_t res = 0;
+uint32_t dec32, declets;
+uint8_t bcd[6];
+int i, w, sh;
+decNumber a;
+
+for (w = 1; w >= 0; w--) {
+res <<= 32;
+declets = extract64(s, 32 * w, 20);
+if (declets) {
+/* decimal32 with zero exponent and word "w" declets */
+dec32 = (0x225ULL << 20) | declets;
+decimal32ToNumber((decimal32 *), );
+decNumberGetBCD(, bcd);
+for (i = 0; i < a.digits; i++) {
+sh = 4 * (a.digits - 1 - i);
+res |= (uint64_t)bcd[i] << sh;
+}
+}
+}
+
+return res;
+}
+
 target_ulong helper_CBCDTD(target_ulong s)
 {
 uint64_t res = 0;
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 3a18d8da94..25a0fb41e1 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -54,6 +54,7 @@ DEF_HELPER_3(sraw, tl, env, tl, tl)
 DEF_HELPER_FLAGS_2(CFUGED, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_FLAGS_2(PDEPD, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_FLAGS_2(PEXTD, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_1(CDTBCD, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_1(CBCDTD, TCG_CALL_NO_RWG_SE, tl, tl)
 #if defined(TARGET_PPC64)
 DEF_HELPER_FLAGS_2(cmpeqb, TCG_CALL_NO_RWG_SE, i32, tl, tl)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 2c53fa44f0..61b54b0086 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -317,6 +317,7 @@ PEXTD   01 . . . 001000 -   @X
 ## BCD Assist
 
 ADDG6S  01 . . . - 001001010 -  @X
+CDTBCD  01 . . - 0100011010 -   @X_sa
 CBCDTD  01 . . - 0100111010 -   @X_sa
 
 ### Float-Point Load Instructions
diff --git a/target/ppc/translate/fixedpoint-impl.c.inc 
b/target/ppc/translate/fixedpoint-impl.c.inc
index 892c9d2568..cb0097bedb 100644
--- a/target/ppc/translate/fixedpoint-impl.c.inc
+++ b/target/ppc/translate/fixedpoint-impl.c.inc
@@ -530,6 +530,13 @@ static bool trans_ADDG6S(DisasContext *ctx, arg_X *a)
 return true;
 }
 
+static bool trans_CDTBCD(DisasContext *ctx, arg_X_sa *a)
+{
+REQUIRE_INSNS_FLAGS2(ctx, BCDA_ISA206);
+gen_helper_CDTBCD(cpu_gpr[a->ra], cpu_gpr[a->rs]);
+return true;
+}
+
 static bool trans_CBCDTD(DisasContext *ctx, arg_X_sa *a)
 {
 REQUIRE_INSNS_FLAGS2(ctx, BCDA_ISA206);
-- 
2.25.1




[PATCH RESEND v2 10/11] target/ppc: implement cbcdtd

2022-06-10 Thread Víctor Colombo
From: Matheus Ferst 

Implements the Convert Binary Coded Decimal To Declets instruction.
Since libdecnumber doesn't expose the methods for direct conversion
(decDigitsToDPD, BCD2DPD, etc.), the BCD values are converted to
decimal32 format, from which the declets are extracted.

Where the behavior is undefined, we try to match the result observed in
a POWER9 DD2.3.

Reviewed-by: Richard Henderson 
Signed-off-by: Matheus Ferst 
Signed-off-by: Víctor Colombo 
---
 target/ppc/dfp_helper.c| 39 ++
 target/ppc/helper.h|  1 +
 target/ppc/insn32.decode   |  4 +++
 target/ppc/translate/fixedpoint-impl.c.inc |  7 
 4 files changed, 51 insertions(+)

diff --git a/target/ppc/dfp_helper.c b/target/ppc/dfp_helper.c
index 0d01ac3de0..db9e994c8c 100644
--- a/target/ppc/dfp_helper.c
+++ b/target/ppc/dfp_helper.c
@@ -1391,3 +1391,42 @@ DFP_HELPER_SHIFT(DSCLI, 64, 1)
 DFP_HELPER_SHIFT(DSCLIQ, 128, 1)
 DFP_HELPER_SHIFT(DSCRI, 64, 0)
 DFP_HELPER_SHIFT(DSCRIQ, 128, 0)
+
+target_ulong helper_CBCDTD(target_ulong s)
+{
+uint64_t res = 0;
+uint32_t dec32;
+uint8_t bcd[6];
+int w, i, offs;
+decNumber a;
+decContext context;
+
+decContextDefault(, DEC_INIT_DECIMAL32);
+
+for (w = 1; w >= 0; w--) {
+res <<= 32;
+decNumberZero();
+/* Extract each BCD field of word "w" */
+for (i = 5; i >= 0; i--) {
+offs = 4 * (5 - i) + 32 * w;
+bcd[i] = extract64(s, offs, 4);
+if (bcd[i] > 9) {
+/*
+ * If the field value is greater than 9, the results are
+ * undefined. We could use a fixed value like 0 or 9, but
+ * an and with 9 seems to better match the hardware behavior.
+ */
+bcd[i] &= 9;
+}
+}
+
+/* Create a decNumber with the BCD values and convert to decimal32 */
+decNumberSetBCD(, bcd, 6);
+decimal32FromNumber((decimal32 *), , );
+
+/* Extract the two declets from the decimal32 value */
+res |= dec32 & 0xf;
+}
+
+return res;
+}
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 6233e28d85..3a18d8da94 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -54,6 +54,7 @@ DEF_HELPER_3(sraw, tl, env, tl, tl)
 DEF_HELPER_FLAGS_2(CFUGED, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_FLAGS_2(PDEPD, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_FLAGS_2(PEXTD, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_1(CBCDTD, TCG_CALL_NO_RWG_SE, tl, tl)
 #if defined(TARGET_PPC64)
 DEF_HELPER_FLAGS_2(cmpeqb, TCG_CALL_NO_RWG_SE, i32, tl, tl)
 DEF_HELPER_FLAGS_1(popcntw, TCG_CALL_NO_RWG_SE, tl, tl)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index f71721f3c0..2c53fa44f0 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -87,6 +87,9 @@
 _rc   rt ra rb rc:bool
 @X_rc   .. rt:5 ra:5 rb:5 .. rc:1   _rc
 
+_sa   rs ra
+@X_sa   .. rs:5 ra:5 . .. . _sa
+
 %x_frtp 22:4 !function=times_2
 %x_frap 17:4 !function=times_2
 %x_frbp 12:4 !function=times_2
@@ -314,6 +317,7 @@ PEXTD   01 . . . 001000 -   @X
 ## BCD Assist
 
 ADDG6S  01 . . . - 001001010 -  @X
+CBCDTD  01 . . - 0100111010 -   @X_sa
 
 ### Float-Point Load Instructions
 
diff --git a/target/ppc/translate/fixedpoint-impl.c.inc 
b/target/ppc/translate/fixedpoint-impl.c.inc
index 490e49cfc7..892c9d2568 100644
--- a/target/ppc/translate/fixedpoint-impl.c.inc
+++ b/target/ppc/translate/fixedpoint-impl.c.inc
@@ -529,3 +529,10 @@ static bool trans_ADDG6S(DisasContext *ctx, arg_X *a)
 
 return true;
 }
+
+static bool trans_CBCDTD(DisasContext *ctx, arg_X_sa *a)
+{
+REQUIRE_INSNS_FLAGS2(ctx, BCDA_ISA206);
+gen_helper_CBCDTD(cpu_gpr[a->ra], cpu_gpr[a->rs]);
+return true;
+}
-- 
2.25.1




[PATCH RESEND v2 06/11] target/ppc: Implement mffscdrn[i] instructions

2022-06-10 Thread Víctor Colombo
Signed-off-by: Víctor Colombo 
---
 target/ppc/insn32.decode   |  5 
 target/ppc/translate/fp-impl.c.inc | 41 ++
 2 files changed, 46 insertions(+)

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 8954a13d6b..da507758b8 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -133,6 +133,9 @@
 _imm2 rt imm
 @X_imm2 .. rt:5 . ... imm:2 .. ._imm2
 
+_imm3 rt imm
+@X_imm3 .. rt:5 . .. imm:3 .. . _imm3
+
 %x_xt   0:1 21:5
 _imm5 xt imm:uint8_t vrb
 @X_imm5 .. . imm:5 vrb:5 .. .   _imm5 
xt=%x_xt
@@ -348,7 +351,9 @@ SETNBCR 01 . . - 00 -   
@X_bi
 MFFS11 . 0 - 1001000111 .   @X_t_rc
 MFFSCE  11 . 1 - 1001000111 -   @X_t
 MFFSCRN 11 . 10110 . 1001000111 -   @X_tb
+MFFSCDRN11 . 10100 . 1001000111 -   @X_tb
 MFFSCRNI11 . 10111 ---.. 1001000111 -   @X_imm2
+MFFSCDRNI   11 . 10101 --... 1001000111 -   @X_imm3
 MFFSL   11 . 11000 - 1001000111 -   @X_t
 
 ### Decimal Floating-Point Arithmetic Instructions
diff --git a/target/ppc/translate/fp-impl.c.inc 
b/target/ppc/translate/fp-impl.c.inc
index d6231358f8..319513d001 100644
--- a/target/ppc/translate/fp-impl.c.inc
+++ b/target/ppc/translate/fp-impl.c.inc
@@ -696,6 +696,27 @@ static bool trans_MFFSCRN(DisasContext *ctx, arg_X_tb *a)
 return true;
 }
 
+static bool trans_MFFSCDRN(DisasContext *ctx, arg_X_tb *a)
+{
+TCGv_i64 t1, fpscr;
+
+REQUIRE_INSNS_FLAGS2(ctx, ISA300);
+REQUIRE_FPU(ctx);
+
+t1 = tcg_temp_new_i64();
+get_fpr(t1, a->rb);
+tcg_gen_andi_i64(t1, t1, FP_DRN);
+
+gen_reset_fpstatus();
+fpscr = place_from_fpscr(a->rt, FP_DRN | FP_ENABLES | FP_NI | FP_RN);
+store_fpscr_masked(fpscr, FP_DRN, t1, 0x0100);
+
+tcg_temp_free_i64(t1);
+tcg_temp_free_i64(fpscr);
+
+return true;
+}
+
 static bool trans_MFFSCRNI(DisasContext *ctx, arg_X_imm2 *a)
 {
 TCGv_i64 t1, fpscr;
@@ -716,6 +737,26 @@ static bool trans_MFFSCRNI(DisasContext *ctx, arg_X_imm2 
*a)
 return true;
 }
 
+static bool trans_MFFSCDRNI(DisasContext *ctx, arg_X_imm3 *a)
+{
+TCGv_i64 t1, fpscr;
+
+REQUIRE_INSNS_FLAGS2(ctx, ISA300);
+REQUIRE_FPU(ctx);
+
+t1 = tcg_temp_new_i64();
+tcg_gen_movi_i64(t1, (uint64_t)a->imm << FPSCR_DRN0);
+
+gen_reset_fpstatus();
+fpscr = place_from_fpscr(a->rt, FP_DRN | FP_ENABLES | FP_NI | FP_RN);
+store_fpscr_masked(fpscr, FP_DRN, t1, 0x0100);
+
+tcg_temp_free_i64(t1);
+tcg_temp_free_i64(fpscr);
+
+return true;
+}
+
 static bool trans_MFFSL(DisasContext *ctx, arg_X_t *a)
 {
 TCGv_i64 fpscr;
-- 
2.25.1




[PATCH RESEND v2 04/11] target/ppc: Move mffsl to decodetree

2022-06-10 Thread Víctor Colombo
Signed-off-by: Víctor Colombo 
---
 target/ppc/insn32.decode   |  1 +
 target/ppc/translate/fp-impl.c.inc | 38 +-
 target/ppc/translate/fp-ops.c.inc  |  2 --
 3 files changed, 17 insertions(+), 24 deletions(-)

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 7cf08ab436..90f7cca34e 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -345,6 +345,7 @@ SETNBCR 01 . . - 00 -   
@X_bi
 MFFSCE  11 . 1 - 1001000111 -   @X_t
 MFFSCRN 11 . 10110 . 1001000111 -   @X_tb
 MFFSCRNI11 . 10111 ---.. 1001000111 -   @X_imm2
+MFFSL   11 . 11000 - 1001000111 -   @X_t
 
 ### Decimal Floating-Point Arithmetic Instructions
 
diff --git a/target/ppc/translate/fp-impl.c.inc 
b/target/ppc/translate/fp-impl.c.inc
index 64e26b9b42..4f4d57c611 100644
--- a/target/ppc/translate/fp-impl.c.inc
+++ b/target/ppc/translate/fp-impl.c.inc
@@ -633,28 +633,6 @@ static void gen_mffs(DisasContext *ctx)
 tcg_temp_free_i64(t0);
 }
 
-/* mffsl */
-static void gen_mffsl(DisasContext *ctx)
-{
-TCGv_i64 t0;
-
-if (unlikely(!(ctx->insns_flags2 & PPC2_ISA300))) {
-return gen_mffs(ctx);
-}
-
-if (unlikely(!ctx->fpu_enabled)) {
-gen_exception(ctx, POWERPC_EXCP_FPU);
-return;
-}
-t0 = tcg_temp_new_i64();
-gen_reset_fpstatus();
-tcg_gen_extu_tl_i64(t0, cpu_fpscr);
-/* Mask everything except mode, status, and enables.  */
-tcg_gen_andi_i64(t0, t0, FP_DRN | FP_STATUS | FP_ENABLES | FP_RN);
-set_fpr(rD(ctx->opcode), t0);
-tcg_temp_free_i64(t0);
-}
-
 static TCGv_i64 place_from_fpscr(int rt, uint64_t mask)
 {
 TCGv_i64 fpscr = tcg_temp_new_i64();
@@ -739,6 +717,22 @@ static bool trans_MFFSCRNI(DisasContext *ctx, arg_X_imm2 
*a)
 return true;
 }
 
+static bool trans_MFFSL(DisasContext *ctx, arg_X_t *a)
+{
+TCGv_i64 fpscr;
+
+REQUIRE_INSNS_FLAGS2(ctx, ISA300);
+REQUIRE_FPU(ctx);
+
+gen_reset_fpstatus();
+fpscr = place_from_fpscr(a->rt,
+FP_DRN | FP_STATUS | FP_ENABLES | FP_NI | FP_RN);
+
+tcg_temp_free_i64(fpscr);
+
+return true;
+}
+
 /* mtfsb0 */
 static void gen_mtfsb0(DisasContext *ctx)
 {
diff --git a/target/ppc/translate/fp-ops.c.inc 
b/target/ppc/translate/fp-ops.c.inc
index a76943b8bf..f8c35124ae 100644
--- a/target/ppc/translate/fp-ops.c.inc
+++ b/target/ppc/translate/fp-ops.c.inc
@@ -75,8 +75,6 @@ GEN_HANDLER_E(fmrgew, 0x3F, 0x06, 0x1E, 0x0001, PPC_NONE, 
PPC2_VSX207),
 GEN_HANDLER_E(fmrgow, 0x3F, 0x06, 0x1A, 0x0001, PPC_NONE, PPC2_VSX207),
 GEN_HANDLER(mcrfs, 0x3F, 0x00, 0x02, 0x0063F801, PPC_FLOAT),
 GEN_HANDLER_E_2(mffs, 0x3F, 0x07, 0x12, 0x00, 0x, PPC_FLOAT, PPC_NONE),
-GEN_HANDLER_E_2(mffsl, 0x3F, 0x07, 0x12, 0x18, 0x, PPC_FLOAT,
-PPC2_ISA300),
 GEN_HANDLER(mtfsb0, 0x3F, 0x06, 0x02, 0x001FF800, PPC_FLOAT),
 GEN_HANDLER(mtfsb1, 0x3F, 0x06, 0x01, 0x001FF800, PPC_FLOAT),
 GEN_HANDLER(mtfsf, 0x3F, 0x07, 0x16, 0x, PPC_FLOAT),
-- 
2.25.1




[PATCH RESEND v2 02/11] target/ppc: Move mffscrn[i] to decodetree

2022-06-10 Thread Víctor Colombo
Signed-off-by: Víctor Colombo 
---
 target/ppc/insn32.decode   |  8 +++
 target/ppc/internal.h  |  3 --
 target/ppc/translate/fp-impl.c.inc | 83 +++---
 target/ppc/translate/fp-ops.c.inc  |  4 --
 4 files changed, 50 insertions(+), 48 deletions(-)

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 532a478dcb..f4b007fe6a 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -124,6 +124,9 @@
 _bfl  bf l:bool ra rb
 @X_bfl  .. bf:3 . l:1 ra:5 rb:5 .. ._bfl
 
+_imm2 rt imm
+@X_imm2 .. rt:5 . ... imm:2 .. ._imm2
+
 %x_xt   0:1 21:5
 _imm5 xt imm:uint8_t vrb
 @X_imm5 .. . imm:5 vrb:5 .. .   _imm5 
xt=%x_xt
@@ -334,6 +337,11 @@ SETBCR  01 . . - 011010 -   
@X_bi
 SETNBC  01 . . - 011100 -   @X_bi
 SETNBCR 01 . . - 00 -   @X_bi
 
+### Move To/From FPSCR
+
+MFFSCRN 11 . 10110 . 1001000111 -   @X_tb
+MFFSCRNI11 . 10111 ---.. 1001000111 -   @X_imm2
+
 ### Decimal Floating-Point Arithmetic Instructions
 
 DADD111011 . . . 10 .   @X_rc
diff --git a/target/ppc/internal.h b/target/ppc/internal.h
index 2add128cd1..467f3046c8 100644
--- a/target/ppc/internal.h
+++ b/target/ppc/internal.h
@@ -159,9 +159,6 @@ EXTRACT_HELPER(FPL, 25, 1);
 EXTRACT_HELPER(FPFLM, 17, 8);
 EXTRACT_HELPER(FPW, 16, 1);
 
-/* mffscrni */
-EXTRACT_HELPER(RM, 11, 2);
-
 /* addpcis */
 EXTRACT_HELPER_SPLIT_3(DX, 10, 6, 6, 5, 16, 1, 1, 0, 0)
 #if defined(TARGET_PPC64)
diff --git a/target/ppc/translate/fp-impl.c.inc 
b/target/ppc/translate/fp-impl.c.inc
index f9b58b844e..bcb7ec2689 100644
--- a/target/ppc/translate/fp-impl.c.inc
+++ b/target/ppc/translate/fp-impl.c.inc
@@ -685,71 +685,72 @@ static void gen_mffsce(DisasContext *ctx)
 tcg_temp_free_i64(t0);
 }
 
-static void gen_helper_mffscrn(DisasContext *ctx, TCGv_i64 t1)
+static TCGv_i64 place_from_fpscr(int rt, uint64_t mask)
 {
-TCGv_i64 t0 = tcg_temp_new_i64();
-TCGv_i32 mask = tcg_const_i32(0x0001);
+TCGv_i64 fpscr = tcg_temp_new_i64();
+TCGv_i64 fpscr_masked = tcg_temp_new_i64();
 
-gen_reset_fpstatus();
-tcg_gen_extu_tl_i64(t0, cpu_fpscr);
-tcg_gen_andi_i64(t0, t0, FP_DRN | FP_ENABLES | FP_RN);
-set_fpr(rD(ctx->opcode), t0);
+tcg_gen_extu_tl_i64(fpscr, cpu_fpscr);
+tcg_gen_andi_i64(fpscr_masked, fpscr, mask);
+set_fpr(rt, fpscr_masked);
 
-/* Mask FPSCR value to clear RN.  */
-tcg_gen_andi_i64(t0, t0, ~FP_RN);
+tcg_temp_free_i64(fpscr_masked);
 
-/* Merge RN into FPSCR value.  */
-tcg_gen_or_i64(t0, t0, t1);
+return fpscr;
+}
 
-gen_helper_store_fpscr(cpu_env, t0, mask);
+static void store_fpscr_masked(TCGv_i64 fpscr, uint64_t clear_mask,
+   TCGv_i64 set_mask, uint32_t store_mask)
+{
+TCGv_i64 fpscr_masked = tcg_temp_new_i64();
+TCGv_i32 st_mask = tcg_constant_i32(store_mask);
 
-tcg_temp_free_i32(mask);
-tcg_temp_free_i64(t0);
+tcg_gen_andi_i64(fpscr_masked, fpscr, ~clear_mask);
+tcg_gen_or_i64(fpscr_masked, fpscr_masked, set_mask);
+gen_helper_store_fpscr(cpu_env, fpscr_masked, st_mask);
+
+tcg_temp_free_i64(fpscr_masked);
 }
 
-/* mffscrn */
-static void gen_mffscrn(DisasContext *ctx)
+static bool trans_MFFSCRN(DisasContext *ctx, arg_X_tb *a)
 {
-TCGv_i64 t1;
+TCGv_i64 t1, fpscr;
 
-if (unlikely(!(ctx->insns_flags2 & PPC2_ISA300))) {
-return gen_mffs(ctx);
-}
-
-if (unlikely(!ctx->fpu_enabled)) {
-gen_exception(ctx, POWERPC_EXCP_FPU);
-return;
-}
+REQUIRE_INSNS_FLAGS2(ctx, ISA300);
+REQUIRE_FPU(ctx);
 
 t1 = tcg_temp_new_i64();
-get_fpr(t1, rB(ctx->opcode));
-/* Mask FRB to get just RN.  */
+get_fpr(t1, a->rb);
 tcg_gen_andi_i64(t1, t1, FP_RN);
 
-gen_helper_mffscrn(ctx, t1);
+gen_reset_fpstatus();
+fpscr = place_from_fpscr(a->rt, FP_DRN | FP_ENABLES | FP_NI | FP_RN);
+store_fpscr_masked(fpscr, FP_RN, t1, 0x0001);
 
 tcg_temp_free_i64(t1);
+tcg_temp_free_i64(fpscr);
+
+return true;
 }
 
-/* mffscrni */
-static void gen_mffscrni(DisasContext *ctx)
+static bool trans_MFFSCRNI(DisasContext *ctx, arg_X_imm2 *a)
 {
-TCGv_i64 t1;
-
-if (unlikely(!(ctx->insns_flags2 & PPC2_ISA300))) {
-return gen_mffs(ctx);
-}
+TCGv_i64 t1, fpscr;
 
-if (unlikely(!ctx->fpu_enabled)) {
-gen_exception(ctx, POWERPC_EXCP_FPU);
-return;
-}
+REQUIRE_INSNS_FLAGS2(ctx, ISA300);
+REQUIRE_FPU(ctx);
 
-t1 = tcg_const_i64((uint64_t)RM(ctx->opcode));
+t1 = tcg_temp_new_i64();
+tcg_gen_movi_i64(t1, a->imm);
 
-gen_helper_mffscrn(ctx, t1);
+gen_reset_fpstatus();
+fpscr = place_from_fpscr(a->rt, FP_DRN | FP_ENABLES 

[PATCH RESEND v2 05/11] target/ppc: Move mffs[.] to decodetree

2022-06-10 Thread Víctor Colombo
Signed-off-by: Víctor Colombo 
---
 target/ppc/insn32.decode   |  4 
 target/ppc/translate/fp-impl.c.inc | 35 +++---
 target/ppc/translate/fp-ops.c.inc  |  1 -
 3 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 90f7cca34e..8954a13d6b 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -100,6 +100,9 @@
 _tb   rt rb
 @X_tb   .. rt:5 . rb:5 .. . _tb
 
+_t_rc rt rc:bool
+@X_t_rc .. rt:5 . . .. rc:1 _t_rc
+
 _tb_rcrt rb rc:bool
 @X_tb_rc.. rt:5 . rb:5 .. rc:1  _tb_rc
 
@@ -342,6 +345,7 @@ SETNBCR 01 . . - 00 -   
@X_bi
 
 ### Move To/From FPSCR
 
+MFFS11 . 0 - 1001000111 .   @X_t_rc
 MFFSCE  11 . 1 - 1001000111 -   @X_t
 MFFSCRN 11 . 10110 . 1001000111 -   @X_tb
 MFFSCRNI11 . 10111 ---.. 1001000111 -   @X_imm2
diff --git a/target/ppc/translate/fp-impl.c.inc 
b/target/ppc/translate/fp-impl.c.inc
index 4f4d57c611..d6231358f8 100644
--- a/target/ppc/translate/fp-impl.c.inc
+++ b/target/ppc/translate/fp-impl.c.inc
@@ -615,24 +615,6 @@ static void gen_mcrfs(DisasContext *ctx)
 tcg_temp_free_i64(tnew_fpscr);
 }
 
-/* mffs */
-static void gen_mffs(DisasContext *ctx)
-{
-TCGv_i64 t0;
-if (unlikely(!ctx->fpu_enabled)) {
-gen_exception(ctx, POWERPC_EXCP_FPU);
-return;
-}
-t0 = tcg_temp_new_i64();
-gen_reset_fpstatus();
-tcg_gen_extu_tl_i64(t0, cpu_fpscr);
-set_fpr(rD(ctx->opcode), t0);
-if (unlikely(Rc(ctx->opcode))) {
-gen_set_cr1_from_fpscr(ctx);
-}
-tcg_temp_free_i64(t0);
-}
-
 static TCGv_i64 place_from_fpscr(int rt, uint64_t mask)
 {
 TCGv_i64 fpscr = tcg_temp_new_i64();
@@ -660,6 +642,23 @@ static void store_fpscr_masked(TCGv_i64 fpscr, uint64_t 
clear_mask,
 tcg_temp_free_i64(fpscr_masked);
 }
 
+static bool trans_MFFS(DisasContext *ctx, arg_X_t_rc *a)
+{
+TCGv_i64 fpscr;
+
+REQUIRE_FPU(ctx);
+
+gen_reset_fpstatus();
+fpscr = place_from_fpscr(a->rt, UINT64_MAX);
+if (a->rc) {
+gen_set_cr1_from_fpscr(ctx);
+}
+
+tcg_temp_free_i64(fpscr);
+
+return true;
+}
+
 static bool trans_MFFSCE(DisasContext *ctx, arg_X_t *a)
 {
 TCGv_i64 fpscr;
diff --git a/target/ppc/translate/fp-ops.c.inc 
b/target/ppc/translate/fp-ops.c.inc
index f8c35124ae..1b65f5ab73 100644
--- a/target/ppc/translate/fp-ops.c.inc
+++ b/target/ppc/translate/fp-ops.c.inc
@@ -74,7 +74,6 @@ GEN_HANDLER_E(fcpsgn, 0x3F, 0x08, 0x00, 0x, PPC_NONE, 
PPC2_ISA205),
 GEN_HANDLER_E(fmrgew, 0x3F, 0x06, 0x1E, 0x0001, PPC_NONE, PPC2_VSX207),
 GEN_HANDLER_E(fmrgow, 0x3F, 0x06, 0x1A, 0x0001, PPC_NONE, PPC2_VSX207),
 GEN_HANDLER(mcrfs, 0x3F, 0x00, 0x02, 0x0063F801, PPC_FLOAT),
-GEN_HANDLER_E_2(mffs, 0x3F, 0x07, 0x12, 0x00, 0x, PPC_FLOAT, PPC_NONE),
 GEN_HANDLER(mtfsb0, 0x3F, 0x06, 0x02, 0x001FF800, PPC_FLOAT),
 GEN_HANDLER(mtfsb1, 0x3F, 0x06, 0x01, 0x001FF800, PPC_FLOAT),
 GEN_HANDLER(mtfsf, 0x3F, 0x07, 0x16, 0x, PPC_FLOAT),
-- 
2.25.1




[PATCH RESEND v2 07/11] tests/tcg/ppc64: Add mffsce test

2022-06-10 Thread Víctor Colombo
Add mffsce test to check both the return value and the new fpscr
stored in the cpu.

Signed-off-by: Víctor Colombo 
---
 tests/tcg/ppc64/Makefile.target   |  1 +
 tests/tcg/ppc64le/Makefile.target |  1 +
 tests/tcg/ppc64le/mffsce.c| 37 +++
 3 files changed, 39 insertions(+)
 create mode 100644 tests/tcg/ppc64le/mffsce.c

diff --git a/tests/tcg/ppc64/Makefile.target b/tests/tcg/ppc64/Makefile.target
index babd209573..331fae628e 100644
--- a/tests/tcg/ppc64/Makefile.target
+++ b/tests/tcg/ppc64/Makefile.target
@@ -11,6 +11,7 @@ endif
 $(PPC64_TESTS): CFLAGS += -mpower8-vector
 
 PPC64_TESTS += mtfsf
+PPC64_TESTS += mffsce
 
 ifneq ($(CROSS_CC_HAS_POWER10),)
 PPC64_TESTS += byte_reverse sha512-vector
diff --git a/tests/tcg/ppc64le/Makefile.target 
b/tests/tcg/ppc64le/Makefile.target
index 5b0eb5e870..6ca3003f02 100644
--- a/tests/tcg/ppc64le/Makefile.target
+++ b/tests/tcg/ppc64le/Makefile.target
@@ -24,6 +24,7 @@ run-sha512-vector: QEMU_OPTS+=-cpu POWER10
 run-plugin-sha512-vector-with-%: QEMU_OPTS+=-cpu POWER10
 
 PPC64LE_TESTS += mtfsf
+PPC64LE_TESTS += mffsce
 PPC64LE_TESTS += signal_save_restore_xer
 PPC64LE_TESTS += xxspltw
 
diff --git a/tests/tcg/ppc64le/mffsce.c b/tests/tcg/ppc64le/mffsce.c
new file mode 100644
index 00..20d882cb45
--- /dev/null
+++ b/tests/tcg/ppc64le/mffsce.c
@@ -0,0 +1,37 @@
+#include 
+#include 
+#include 
+
+#define MTFSF(FLM, FRB) asm volatile ("mtfsf %0, %1" :: "i" (FLM), "f" (FRB))
+#define MFFS(FRT) asm("mffs %0" : "=f" (FRT))
+#define MFFSCE(FRT) asm("mffsce %0" : "=f" (FRT))
+
+#define PPC_BIT_NR(nr) (63 - (nr))
+
+#define FP_VE  (1ull << PPC_BIT_NR(56))
+#define FP_UE  (1ull << PPC_BIT_NR(58))
+#define FP_ZE  (1ull << PPC_BIT_NR(59))
+#define FP_XE  (1ull << PPC_BIT_NR(60))
+#define FP_NI  (1ull << PPC_BIT_NR(61))
+#define FP_RN1 (1ull << PPC_BIT_NR(63))
+
+int main(void)
+{
+uint64_t frt, fpscr;
+uint64_t test_value = FP_VE | FP_UE | FP_ZE |
+  FP_XE | FP_NI | FP_RN1;
+MTFSF(0b, test_value); /* set test value to cpu fpscr */
+MFFSCE(frt);
+MFFS(fpscr); /* read the value that mffsce stored to cpu fpscr */
+
+/* the returned value should be as the cpu fpscr was before */
+assert((frt & 0xff) == test_value);
+
+/*
+ * the cpu fpscr last 3 bits should be unchanged
+ * and enable bits should be unset
+ */
+assert((fpscr & 0xff) == (test_value & 0x7));
+
+return 0;
+}
-- 
2.25.1




  1   2   3   >