Re: [Qemu-devel] [PATCH v2 4/4] target-tricore: Add instructions of RCR opcode format

2014-11-19 Thread Bastian Koppelmann


On 11/14/2014 01:39 PM, Richard Henderson wrote:

On 11/13/2014 06:12 PM, Bastian Koppelmann wrote:

+tcg_gen_ext_i32_i64(t3, r3);
+tcg_gen_concat_i32_i64(t2, r2_low, r2_high);
+/* extend the sign for r2 to high 64 bits */
+tcg_gen_sari_i64(t4, t2, 63);
+tcg_gen_ext_i32_i64(t1, r1);
+
+tcg_gen_muls2_i64(t1, t3, t1, t3);
+tcg_gen_add2_i64(t1, t3, t2, t4, t1, t3);
+

I don't believe that you need 128 bit arithemetic for multiply-accumulate,
either here or elsewhere (e.g. msub).

Looking at unsigned, the maximum result of the multiply is 2*(2^n-1), or 2^(2n)
- 2^(n+1).  Which means that the accumulate with a 2^n-1 value cannot overflow
a double-word intermediate result.
Madd.u has the following signature 64 + (32 * 32) -- 64, as far as I 
read the documentation, and would result as you described in a max 
result of 2^(2n) - 2^(n+1) for the multiplication, but it would 
accumulate with 2^(2n) -1, which can definitly overflow, with n = 32.


However for signed multiply accumulate I don't need 128 bit arithmetic, 
because only the add/sub operation of those two can overflow. Thanks for 
the tip!


Cheers,
Bastian



Re: [Qemu-devel] [PATCH v2 4/4] target-tricore: Add instructions of RCR opcode format

2014-11-14 Thread Richard Henderson
On 11/13/2014 06:12 PM, Bastian Koppelmann wrote:
 +tcg_gen_ext_i32_i64(t3, r3);
 +tcg_gen_concat_i32_i64(t2, r2_low, r2_high);
 +/* extend the sign for r2 to high 64 bits */
 +tcg_gen_sari_i64(t4, t2, 63);
 +tcg_gen_ext_i32_i64(t1, r1);
 +
 +tcg_gen_muls2_i64(t1, t3, t1, t3);
 +tcg_gen_add2_i64(t1, t3, t2, t4, t1, t3);
 +

I don't believe that you need 128 bit arithemetic for multiply-accumulate,
either here or elsewhere (e.g. msub).

Looking at unsigned, the maximum result of the multiply is 2*(2^n-1), or 2^(2n)
- 2^(n+1).  Which means that the accumulate with a 2^n-1 value cannot overflow
a double-word intermediate result.


r~



[Qemu-devel] [PATCH v2 4/4] target-tricore: Add instructions of RCR opcode format

2014-11-13 Thread Bastian Koppelmann
Add instructions of RCR opcode format.
Add helper for madd32/64_ssov and madd32/64_suov.
Add helper for msub32/64_ssov and msub32/64_suov.
Add microcode generator function madd/msub for 32bit and 64bit, which calculate 
a mul and a add/sub.
OPC2_32_RCR_MSUB_U_32 - OPC2_32_RCR_MSUB_U_32.

Signed-off-by: Bastian Koppelmann kbast...@mail.uni-paderborn.de
---
 target-tricore/helper.h  |   8 +
 target-tricore/op_helper.c   | 192 +++
 target-tricore/translate.c   | 493 +++
 target-tricore/tricore-opcodes.h |   3 +-
 4 files changed, 695 insertions(+), 1 deletion(-)

diff --git a/target-tricore/helper.h b/target-tricore/helper.h
index 2eb33ea..6c07bd7 100644
--- a/target-tricore/helper.h
+++ b/target-tricore/helper.h
@@ -24,6 +24,14 @@ DEF_HELPER_3(mul_ssov, i32, env, i32, i32)
 DEF_HELPER_3(mul_suov, i32, env, i32, i32)
 DEF_HELPER_3(sha_ssov, i32, env, i32, i32)
 DEF_HELPER_3(absdif_ssov, i32, env, i32, i32)
+DEF_HELPER_4(madd32_ssov, i32, env, i32, i32, i32)
+DEF_HELPER_4(madd32_suov, i32, env, i32, i32, i32)
+DEF_HELPER_4(madd64_ssov, i64, env, i32, i64, i32)
+DEF_HELPER_4(madd64_suov, i64, env, i32, i64, i32)
+DEF_HELPER_4(msub32_ssov, i32, env, i32, i32, i32)
+DEF_HELPER_4(msub32_suov, i32, env, i32, i32, i32)
+DEF_HELPER_4(msub64_ssov, i64, env, i32, i64, i32)
+DEF_HELPER_4(msub64_suov, i64, env, i32, i64, i32)
 /* CSA */
 DEF_HELPER_2(call, void, env, i32)
 DEF_HELPER_1(ret, void, env)
diff --git a/target-tricore/op_helper.c b/target-tricore/op_helper.c
index 0b6b471..22fa452 100644
--- a/target-tricore/op_helper.c
+++ b/target-tricore/op_helper.c
@@ -56,6 +56,16 @@ uint32_t helper_circ_update(uint32_t reg, uint32_t off)
 return reg - index + new_index;
 }

+static void add128(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b)
+{
+*plow += a;
+/* carry test */
+if (*plow  a) {
+(*phigh)++;
+}
+*phigh += b;
+}
+
 #define SSOV(env, ret, arg, len) do {   \
 int64_t max_pos = INT##len ##_MAX;  \
 int64_t max_neg = INT##len ##_MIN;  \
@@ -198,6 +208,188 @@ target_ulong helper_absdif_ssov(CPUTriCoreState *env, 
target_ulong r1,
 SSOV(env, ret, result, 32);
 return ret;
 }
+
+target_ulong helper_madd32_ssov(CPUTriCoreState *env, target_ulong r1,
+target_ulong r2, target_ulong r3)
+{
+target_ulong ret;
+int64_t t1 = sextract64(r1, 0, 32);
+int64_t t2 = sextract64(r2, 0, 32);
+int64_t t3 = sextract64(r3, 0, 32);
+int64_t result;
+
+result = t2 + (t1 * t3);
+SSOV(env, ret, result, 32);
+return ret;
+}
+
+target_ulong helper_madd32_suov(CPUTriCoreState *env, target_ulong r1,
+target_ulong r2, target_ulong r3)
+{
+target_ulong ret;
+uint64_t t1 = extract64(r1, 0, 32);
+uint64_t t2 = extract64(r2, 0, 32);
+uint64_t t3 = extract64(r3, 0, 32);
+int64_t result;
+
+result = t2 + (t1 * t3);
+SUOV(env, ret, result, 32);
+return ret;
+}
+
+uint64_t helper_madd64_ssov(CPUTriCoreState *env, target_ulong r1,
+uint64_t r2, target_ulong r3)
+{
+uint64_t ret_low, ret_high;
+uint64_t r2_high;
+int64_t t1 = sextract64(r1, 0, 32);
+int64_t t3 = sextract64(r3, 0, 32);
+
+muls64(ret_low, ret_high, t1, t3);
+r2_high = ((int64_t)r2  63);
+add128(ret_low, ret_high, r2, r2_high);
+
+/* check for saturate */
+t1 = (int64_t)ret_low  63;
+if (t1 != ret_high) {
+env-PSW_USB_V = (1  31);
+env-PSW_USB_SV = (1  31);
+if (t1 == 0x0) {
+ret_low = INT64_MIN;
+} else {
+ret_low = INT64_MAX;
+}
+} else {
+env-PSW_USB_V = 0;
+}
+t1 = ret_low  32;
+env-PSW_USB_AV = t1 ^ t1 * 2u;
+env-PSW_USB_SAV |= env-PSW_USB_AV;
+
+return ret_low;
+}
+
+uint64_t helper_madd64_suov(CPUTriCoreState *env, target_ulong r1,
+uint64_t r2, target_ulong r3)
+{
+uint64_t ret_low, ret_high;
+uint64_t t1 = extract64(r1, 0, 32);
+uint64_t t3 = extract64(r3, 0, 32);
+
+mulu64(ret_low, ret_high, t1, t3);
+add128(ret_low, ret_high, r2, 0);
+
+if (ret_high != 0) {
+env-PSW_USB_V = (1  31);
+env-PSW_USB_SV = (1  31);
+ret_low = UINT64_MAX;
+} else if ((ret_high  (1LL  63)) != 0) {
+ret_low = 0;
+env-PSW_USB_V = (1  31);
+env-PSW_USB_SV = (1  31);
+} else {
+env-PSW_USB_V = 0;
+}
+t1 = ret_low  32;
+env-PSW_USB_AV = t1 ^ t1 * 2u;
+env-PSW_USB_SAV |= env-PSW_USB_AV;
+return ret_low;
+}
+
+target_ulong helper_msub32_ssov(CPUTriCoreState *env, target_ulong r1,
+target_ulong r2, target_ulong r3)
+{
+target_ulong ret;
+int64_t t1 = sextract64(r1, 0, 32);
+int64_t t2 = sextract64(r2, 0, 32);
+int64_t t3 = sextract64(r3, 0, 32);
+int64_t result;
+
+result = t2 - (t1