On 09/26/2016 03:56 AM, Sagar Karandikar wrote:
+#if defined(TARGET_RISCV64)
+target_ulong helper_mulhsu(CPURISCVState *env, target_ulong arg1,
+ target_ulong arg2)
+{
+ int64_t a = arg1;
+ uint64_t b = arg2;
+ return (int64_t)((__int128_t)a * b >> 64);
+}
+#endif
This won't compile on a 32-bit host, or indeed a 64-bit host without
CONFIG_INT128.
But what you should actually be using is tcg_gen_mulu2_i64, with a fixup
afterward for the one signed argument. See tcg_gen_muls2_i64 in tcg/tcg-op.c
for an example of fixing up an unsigned multiply for two signed inputs; you
would need only half of that for a single signed input.
+/* Wrapper for setting reg values - need to check of reg is zero since
+ * cpu_gpr[0] is not actually allocated. this is more for safety purposes,
+ * since we usually avoid calling the OP_TYPE_gen function if we see a write to
+ * $zero
+ */
+static inline void gen_set_gpr(int reg_num_dst, TCGv t)
+{
+ if (reg_num_dst != 0) {
+ tcg_gen_mov_tl(cpu_gpr[reg_num_dst], t);
+ }
+}
FWIW, target-alpha used to have lots and lots of checks for the zero register.
In the end it was much cleaner to simply allocate a scratch temporary for the
zero-register sink. Aside from known patterns, such as canonical nop
formations, you'll almost never see such instructions. While it's true that
you must do something in order to be architecturally correct, it's better to do
something that minimizes the impact to the rest of the translator.
+ tcg_gen_shri_i64(t0, t0, 32);
+ tcg_gen_extrl_i64_i32(ret, t0);
This would be tcg_gen_extrh_i64_i32.
+static inline void gen_arith(DisasContext *ctx, uint32_t opc, int rd, int rs1,
+ int rs2)
+{
+ TCGv source1, source2, cond1, cond2, zeroreg, resultopt1;
+ cond1 = tcg_temp_new();
+ cond2 = tcg_temp_new();
+ source1 = tcg_temp_new();
+ source2 = tcg_temp_new();
+ zeroreg = tcg_temp_new();
+ resultopt1 = tcg_temp_new();
+ gen_get_gpr(source1, rs1);
+ gen_get_gpr(source2, rs2);
+ tcg_gen_movi_tl(zeroreg, 0); /* hardcoded zero for compare in DIV, etc */
It would be far preferable to allocate this only when needed.
+
+ switch (opc) {
+#if defined(TARGET_RISCV64)
+ case OPC_RISC_ADDW:
+#endif
+ case OPC_RISC_ADD:
Can we avoid sprinkling so many ifdefs? Perhaps with something akin to
#ifdef TARGET_RISCV64
#define CASE_OP_32_64(X) case X: case glue(X, W)
#else
#define CASE_OP_32_64(X) case X
#endif
+#if defined(TARGET_RISCV64)
+ case OPC_RISC_SLLW:
+ tcg_gen_andi_tl(source2, source2, 0x1F);
+ /* fall through to SLL */
+#endif
+ case OPC_RISC_SLL:
+ tcg_gen_andi_tl(source2, source2, TARGET_LONG_BITS - 1);
+ tcg_gen_shl_tl(source1, source1, source2);
Better to not fall through at this point, to avoid the double and.
+#if defined(TARGET_RISCV64)
+ case OPC_RISC_SRLW:
+ /* clear upper 32 */
+ tcg_gen_andi_tl(source1, source1, 0x00000000FFFFFFFFLL);
+ tcg_gen_andi_tl(source2, source2, 0x1F);
+ /* fall through to SRL */
+#endif
+ case OPC_RISC_SRL:
+ tcg_gen_andi_tl(source2, source2, TARGET_LONG_BITS - 1);
+ tcg_gen_shr_tl(source1, source1, source2);
Likewise. Also, tcg_gen_ext32u_tl to clear upper 32.
+ break;
+#if defined(TARGET_RISCV64)
+ case OPC_RISC_SRAW:
+ /* first, trick to get it to act like working on 32 bits (get rid of
+ upper 32, sign extend to fill space) */
+ tcg_gen_shli_tl(source1, source1, 32);
+ tcg_gen_sari_tl(source1, source1, 32);
+ tcg_gen_andi_tl(source2, source2, 0x1F);
+ /* fall through to SRA */
+#endif
+ case OPC_RISC_SRA:
+ tcg_gen_andi_tl(source2, source2, TARGET_LONG_BITS - 1);
Likewise. Also, tcg_gen_ext32s_tl to sign-extend.
+#if defined(TARGET_RISCV64)
+ case OPC_RISC_MULW:
+#endif
+ case OPC_RISC_MUL:
+ tcg_gen_muls2_tl(source1, source2, source1, source2);
tcg_gen_mul_tl, since source2 is dead.
+#if defined(TARGET_RISCV64)
+ case OPC_RISC_DIVW:
+ tcg_gen_ext32s_tl(source1, source1);
+ tcg_gen_ext32s_tl(source2, source2);
+ /* fall through to DIV */
+#endif
+ case OPC_RISC_DIV:
+ /* Handle by altering args to tcg_gen_div to produce req'd results:
+ * For overflow: want source1 in source1 and 1 in source2
+ * For div by zero: want -1 in source1 and 1 in source2 -> -1 result */
+ tcg_gen_movi_tl(resultopt1, (target_ulong)0xFFFFFFFFFFFFFFFF);
You'd need ULL for a constant with so many F's, but a plain -1 works just fine.
+ tcg_gen_setcondi_tl(TCG_COND_EQ, cond2, source2, (target_ulong)(~0L));
Likewise -1.
+ tcg_gen_setcondi_tl(TCG_COND_EQ, cond1, source1,
+ 1L << (TARGET_LONG_BITS - 1));
ULL, not L, or better as (target_ulong)1.
+ case OPC_RISC_DIVU:
+ tcg_gen_setcondi_tl(TCG_COND_EQ, cond1, source2, 0);
+ tcg_gen_movi_tl(resultopt1, (target_ulong)(~0L));
-1 again.
+ case OPC_RISC_REM:
...
+ case OPC_RISC_REMU:
Similarly.
+static inline void gen_arith_imm(DisasContext *ctx, uint32_t opc, int rd,
+ int rs1, int16_t imm)
+{
+ TCGv source1;
+ source1 = tcg_temp_new();
+ gen_get_gpr(source1, rs1);
+ /* lower 12 bits of imm are valid */
+ target_long uimm = (target_long)imm; /* sign ext 16->64 bits */
Better to just make the function argument target_long, surely. I don't
understand the "uimm", as this isn't unsigned...
+#if defined(TARGET_RISCV64)
+ case OPC_RISC_SLLIW:
+ if ((uimm >= 32)) {
+ kill_unknown(ctx, RISCV_EXCP_ILLEGAL_INST);
+ }
+ /* fall through to SLLI */
break after known exception.
+#if defined(TARGET_RISCV64)
+ case OPC_RISC_SHIFT_RIGHT_IW:
+ if ((uimm & 0x3ff) >= 32) {
+ kill_unknown(ctx, RISCV_EXCP_ILLEGAL_INST);
+ }
+ tcg_gen_shli_tl(source1, source1, 32);
+ extra_shamt = 32;
tcg_gen_ext32{u,s}_tl is better than extra shifts.
r~