On 29/10/2013 1:05 PM, Max Filippov wrote:
On Tue, Oct 29, 2013 at 11:04 PM, Sebastian Macke <sebast...@macke.de> wrote:
This patch separates the load and store instruction to a
separate function.
The repetition of the source code can be reduced and further
optimizations can be implemented.
In this case it checks for a zero offset and optimizes it.

Additional this patch solves a severe bug for the softmmu emulation.
The pc has to be saved as these instructions can fail and lead
to a tlb miss exception.
In case of an exception we re-translate the TB to find the PC where
the exception happened, see cpu_restore_state call from the tlb_fill
function. Also this applies to both user and system emulation, but
you only handle the system emulation case.


The problem is the epcr register in the interrupt routine in which the current pc must be saved. Of course in the user emulation case the interrupt handler is never executed.

When is the pc of the fault determined? Before or after the interrupt handler? Finding this problem gave me a long headache. But it would be nice if there is a better solution.



Signed-off-by: Sebastian Macke <sebast...@macke.de>
---
  target-openrisc/translate.c | 130 ++++++++++++++++++++++++++------------------
  1 file changed, 76 insertions(+), 54 deletions(-)

diff --git a/target-openrisc/translate.c b/target-openrisc/translate.c
index 31f8717..1bb686c 100644
--- a/target-openrisc/translate.c
+++ b/target-openrisc/translate.c
@@ -692,6 +692,73 @@ static void dec_calc(DisasContext *dc, uint32_t insn)
      }
  }

+static void gen_loadstore(DisasContext *dc, uint32 op0,
+                          uint32_t ra, uint32_t rb, uint32_t rd,
+                          uint32_t offset)
+{
+
+/* The load and store instructions can fail and lead to a
+ *  tlb miss exception. The correct pc has to be stored for
+ *  this case.
+ */
+#if !defined(CONFIG_USER_ONLY)
+    tcg_gen_movi_tl(cpu_pc, dc->pc);
+#endif
+
+    TCGv t0 = cpu_R[ra];
+    if (offset != 0) {
+        t0 = tcg_temp_new();
+        tcg_gen_addi_tl(t0, cpu_R[ra], sign_extend(offset, 16));
+    }
+
+    switch (op0) {
+    case 0x21:    /* l.lwz */
+        tcg_gen_qemu_ld32u(cpu_R[rd], t0, dc->mem_idx);
+        break;
+
+    case 0x22:    /* l.lws */
+        tcg_gen_qemu_ld32s(cpu_R[rd], t0, dc->mem_idx);
+        break;
+
+    case 0x23:    /* l.lbz */
+        tcg_gen_qemu_ld8u(cpu_R[rd], t0, dc->mem_idx);
+        break;
+
+    case 0x24:    /* l.lbs */
+        tcg_gen_qemu_ld8s(cpu_R[rd], t0, dc->mem_idx);
+        break;
+
+    case 0x25:    /* l.lhz */
+        tcg_gen_qemu_ld16u(cpu_R[rd], t0, dc->mem_idx);
+        break;
+
+    case 0x26:    /* l.lhs */
+        tcg_gen_qemu_ld16s(cpu_R[rd], t0, dc->mem_idx);
+        break;
+
+    case 0x35:    /* l.sw */
+        tcg_gen_qemu_st32(cpu_R[rb], t0, dc->mem_idx);
+        break;
+
+    case 0x36:    /* l.sb */
+        tcg_gen_qemu_st8(cpu_R[rb], t0, dc->mem_idx);
+        break;
+
+    case 0x37:    /* l.sh */
+        tcg_gen_qemu_st16(cpu_R[rb], t0, dc->mem_idx);
+        break;
+
+    default:
+    break;
Broken indentation.

+    }
+
+    if (offset != 0) {
+        tcg_temp_free(t0);
+    }
+
+}
+
+
  static void dec_misc(DisasContext *dc, uint32_t insn)
  {
      uint32_t op0, op1;
@@ -843,62 +910,32 @@ static void dec_misc(DisasContext *dc, uint32_t insn)

      case 0x21:    /* l.lwz */
          LOG_DIS("l.lwz r%d, r%d, %d\n", rd, ra, I16);
-        {
-            TCGv t0 = tcg_temp_new();
-            tcg_gen_addi_tl(t0, cpu_R[ra], sign_extend(I16, 16));
-            tcg_gen_qemu_ld32u(cpu_R[rd], t0, dc->mem_idx);
-            tcg_temp_free(t0);
-        }
+        gen_loadstore(dc, op0, ra, rb, rd, I16);
          break;

      case 0x22:    /* l.lws */
          LOG_DIS("l.lws r%d, r%d, %d\n", rd, ra, I16);
-        {
-            TCGv t0 = tcg_temp_new();
-            tcg_gen_addi_tl(t0, cpu_R[ra], sign_extend(I16, 16));
-            tcg_gen_qemu_ld32s(cpu_R[rd], t0, dc->mem_idx);
-            tcg_temp_free(t0);
-        }
+        gen_loadstore(dc, op0, ra, rb, rd, I16);
          break;

      case 0x23:    /* l.lbz */
          LOG_DIS("l.lbz r%d, r%d, %d\n", rd, ra, I16);
-        {
-            TCGv t0 = tcg_temp_new();
-            tcg_gen_addi_tl(t0, cpu_R[ra], sign_extend(I16, 16));
-            tcg_gen_qemu_ld8u(cpu_R[rd], t0, dc->mem_idx);
-            tcg_temp_free(t0);
-        }
+        gen_loadstore(dc, op0, ra, rb, rd, I16);
          break;

      case 0x24:    /* l.lbs */
          LOG_DIS("l.lbs r%d, r%d, %d\n", rd, ra, I16);
-        {
-            TCGv t0 = tcg_temp_new();
-            tcg_gen_addi_tl(t0, cpu_R[ra], sign_extend(I16, 16));
-            tcg_gen_qemu_ld8s(cpu_R[rd], t0, dc->mem_idx);
-            tcg_temp_free(t0);
-        }
+        gen_loadstore(dc, op0, ra, rb, rd, I16);
          break;

      case 0x25:    /* l.lhz */
          LOG_DIS("l.lhz r%d, r%d, %d\n", rd, ra, I16);
-        {
-            TCGv t0 = tcg_temp_new();
-            tcg_gen_addi_tl(t0, cpu_R[ra], sign_extend(I16, 16));
-            tcg_gen_qemu_ld16u(cpu_R[rd], t0, dc->mem_idx);
-            tcg_temp_free(t0);
-        }
+        gen_loadstore(dc, op0, ra, rb, rd, I16);
          break;

      case 0x26:    /* l.lhs */
          LOG_DIS("l.lhs r%d, r%d, %d\n", rd, ra, I16);
-        {
-            TCGv t0 = tcg_temp_new();
-            tcg_gen_addi_tl(t0, cpu_R[ra], sign_extend(I16, 16));
-            tcg_gen_qemu_ld16s(cpu_R[rd], t0, dc->mem_idx);
-            tcg_temp_free(t0);
-        }
+        gen_loadstore(dc, op0, ra, rb, rd, I16);
          break;

      case 0x27:    /* l.addi */
@@ -1047,32 +1084,17 @@ static void dec_misc(DisasContext *dc, uint32_t insn)

      case 0x35:    /* l.sw */
          LOG_DIS("l.sw %d, r%d, r%d, %d\n", I5, ra, rb, I11);
-        {
-            TCGv t0 = tcg_temp_new();
-            tcg_gen_addi_tl(t0, cpu_R[ra], sign_extend(tmp, 16));
-            tcg_gen_qemu_st32(cpu_R[rb], t0, dc->mem_idx);
-            tcg_temp_free(t0);
-        }
+        gen_loadstore(dc, op0, ra, rb, rd, tmp);
          break;

      case 0x36:    /* l.sb */
          LOG_DIS("l.sb %d, r%d, r%d, %d\n", I5, ra, rb, I11);
-        {
-            TCGv t0 = tcg_temp_new();
-            tcg_gen_addi_tl(t0, cpu_R[ra], sign_extend(tmp, 16));
-            tcg_gen_qemu_st8(cpu_R[rb], t0, dc->mem_idx);
-            tcg_temp_free(t0);
-        }
+        gen_loadstore(dc, op0, ra, rb, rd, tmp);
          break;

      case 0x37:    /* l.sh */
+        gen_loadstore(dc, op0, ra, rb, rd, tmp);
          LOG_DIS("l.sh %d, r%d, r%d, %d\n", I5, ra, rb, I11);
In other cases you do it in the reverse order.
Looks like all these cases can be further consolidated into
a pair of LOG_DIS(); gen_loadstore(); with a small table for
LOG_DIS format string each.

You are right. This is not optimal. But before it was worse. I will try to I optimize it in the V2 patchset.



-        {
-            TCGv t0 = tcg_temp_new();
-            tcg_gen_addi_tl(t0, cpu_R[ra], sign_extend(tmp, 16));
-            tcg_gen_qemu_st16(cpu_R[rb], t0, dc->mem_idx);
-            tcg_temp_free(t0);
-        }
          break;

      default:


Reply via email to