Update of /cvsroot/arcem/arcem
In directory sfp-cvs-1.v30.ch3.sourceforge.com:/tmp/cvs-serv26947

Modified Files:
      Tag: jit
        Makefile armdefs.h armemu.c 
Log Message:
WIP ARM-on-ARM JIT engine
This is the beginnings of an ARM-on-ARM JIT engine, designed to be used by 
emulators like ArcEm and RPCEmu
Main functionality issues to resolve:
* Currently it's only functional for RISC OS hosts. But it should be fairly 
striaghtforward to get it working on other host OS's
* Not all instructions are supported yet; unsupported instructions will be 
interpreted
* The simplified interpreter loop which the JIT is invoked from doesn't 
implement the instruction prefetch pipeline; this will eventually need fixing 
(e.g. make the loop smart enough to stay in interpreter mode until the 
prefetched instructions match what's in memory, i.e. the CPU has left the 
self-modifying code sequence)
* The JIT will update the cycle counter but won't actually trigger any events 
until the end of the JIT code block is reached, this may cause issues with some 
software
* However the biggest problem is likely to be that the single-pass code 
generation results in sub-optimal handling of complex instructions like 
LDR/STR. So future development is likely to focus on experimenting with more 
complex code generation techniques, e.g. compiler-style code graphs



Index: armemu.c
===================================================================
RCS file: /cvsroot/arcem/arcem/armemu.c,v
retrieving revision 1.20
retrieving revision 1.20.2.1
diff -u -d -r1.20 -r1.20.2.1
--- armemu.c    26 Apr 2016 00:53:57 -0000      1.20
+++ armemu.c    27 May 2017 20:54:06 -0000      1.20.2.1
@@ -24,15 +24,22 @@
 #include "arch/archio.h"
 #include "arch/ArcemConfig.h"
 #include "ControlPane.h"
+#ifdef DEBUG_JIT_METRICS
+#include "jit/metrics.h"
+#endif
 
+#ifndef AMIGA
 ARMul_State statestr;
+#endif
 
 /* global used to terminate the emulator */
 static bool kill_emulator;
 
 typedef struct {
   ARMword instr;
+#ifdef ARMUL_INSTR_FUNC_CACHE
   ARMEmuFunc func;
+#endif
 } PipelineEntry;
 
 extern PipelineEntry abortpipe;
@@ -57,6 +64,7 @@
   {
     ARMword *data = FastMap_Log2Phy(entry,addr);
     ARMword instr = p->instr = *data;
+#ifdef ARMUL_INSTR_FUNC_CACHE
     ARMEmuFunc *pfunc = FastMap_Phy2Func(state,data);
     ARMEmuFunc temp = *pfunc;
     if(temp == FASTMAP_CLOBBEREDFUNC)
@@ -74,12 +82,15 @@
     }
 #endif
     p->func = temp;
+#endif
   }
   else if(FASTMAP_RESULT_FUNC(res))
   {
     /* Use function, means we can't write back the decode result */
     ARMword instr = p->instr = FastMap_LoadFunc(entry,state,addr);
+#ifdef ARMUL_INSTR_FUNC_CACHE
     p->func = ARMul_Emulate_DecodeInstr(instr);
+#endif
   }
   else
   {
@@ -113,11 +124,14 @@
   if(FASTMAP_RESULT_DIRECT(res))
   {
     ARMword *data = FastMap_Log2Phy(entry,addr);
+#ifdef ARMUL_INSTR_FUNC_CACHE
     ARMEmuFunc *pfunc = FastMap_Phy2Func(state,data);
+#endif
     int i;
     for(i=0;i<3;i++)
     {
       ARMword instr = p->instr = *data;
+#ifdef ARMUL_INSTR_FUNC_CACHE
       ARMEmuFunc temp = *pfunc;
       if(temp == FASTMAP_CLOBBEREDFUNC)
       {
@@ -125,8 +139,9 @@
         temp = *pfunc = ARMul_Emulate_DecodeInstr(instr);
       }
       p->func = temp;
-      data++;
       pfunc++;
+#endif
+      data++;
       p++;
     }
   }
@@ -842,17 +857,25 @@
              This assumes we don't differentiate between N & S cycles */
           ARMul_CLEARABORT;
           data = FastMap_Log2Phy(entry,address&~3);
-          pfunc = FastMap_Phy2Func(state,data);
           count=1;
-          *(data++) = state->Reg[temp++];
+#ifdef ARMUL_INSTR_FUNC_CACHE
+          pfunc = FastMap_Phy2Func(state,data);
           *(pfunc++) = FASTMAP_CLOBBEREDFUNC;
+#else
+          FastMap_PhyClobberFunc(state,data);
+#endif
+          *(data++) = state->Reg[temp++];
           if (BIT(21) && LHSReg != 15)
              LSBase = WBBase;
           for(;temp<16;temp++)
             if(BIT(temp))
             {
-              *(data++) = state->Reg[temp];
+#ifdef ARMUL_INSTR_FUNC_CACHE
               *(pfunc++) = FASTMAP_CLOBBEREDFUNC;
+#else
+              FastMap_PhyClobberFunc(state,data);
+#endif
+              *(data++) = state->Reg[temp];
               count++;
             }
           state->NumCycles += count;
@@ -931,17 +954,25 @@
              This assumes we don't differentiate between N & S cycles */
           ARMul_CLEARABORT;
           data = FastMap_Log2Phy(entry,address&~3);
-          pfunc = FastMap_Phy2Func(state,data);
           count=1;
-          *(data++) = state->Reg[temp++];
+#ifdef ARMUL_INSTR_FUNC_CACHE
+          pfunc = FastMap_Phy2Func(state,data);
           *(pfunc++) = FASTMAP_CLOBBEREDFUNC;
+#else
+          FastMap_PhyClobberFunc(state,data);
+#endif
+          *(data++) = state->Reg[temp++];
           if (BIT(21) && LHSReg != 15)
              LSBase = WBBase;
           for(;temp<16;temp++)
             if(BIT(temp))
             {
-              *(data++) = state->Reg[temp];
+#ifdef ARMUL_INSTR_FUNC_CACHE
               *(pfunc++) = FASTMAP_CLOBBEREDFUNC;
+#else
+              FastMap_PhyClobberFunc(state,data);
+#endif
+              *(data++) = state->Reg[temp];
               count++;
             }
           state->NumCycles += count;
@@ -1075,8 +1106,10 @@
 
 /* Pipeline entry used for prefetch aborts */
 PipelineEntry abortpipe = {
-  ARMul_ABORTWORD,
-  EMFUNCDECL26(SWI)
+  ARMul_ABORTWORD
+#ifdef ARMUL_INSTR_FUNC_CACHE
+  , EMFUNCDECL26(SWI)
+#endif
 };
 
 #define FLATPIPE
@@ -1087,6 +1120,31 @@
 #define PIPESIZE 4 /* 3 or 4. 4 seems to be slightly faster? */
 #endif
 
+static inline void execute_instruction(ARMul_State *state,const PipelineEntry 
*entry,ARMword r15)
+{
+  ARMword instr = entry->instr;
+  if(ARMul_CCCheck(instr,(r15 & CCBITS)))
+  {
+#ifdef ARMUL_INSTR_FUNC_CACHE
+    ARMEmuFunc func = entry->func;
+#else
+    ARMEmuFunc func = ARMul_Emulate_DecodeInstr(instr);
+#endif
+    Prof_BeginFunc(func);
+    (func)(state, instr);
+    Prof_EndFunc(func);
+  }
+}
+
+#ifdef DEBUG_JIT_TEST_EXEC
+void extern_execute_instruction(ARMul_State *state,ARMword instr,ARMword r15)
+{
+  PipelineEntry p = {instr};
+  execute_instruction(state,&p,r15);
+}
+#endif
+
+#ifndef JIT
 void
 ARMul_Emulate26(ARMul_State *state)
 {
@@ -1111,8 +1169,10 @@
       pc            = state->pc;
 #endif
 
+#ifdef ARMUL_INSTR_FUNC_CACHE
       pipe[1].func = ARMul_Emulate_DecodeInstr(pipe[1].instr);
       pipe[2].func = ARMul_Emulate_DecodeInstr(pipe[2].instr);
+#endif
 #ifndef FLATPIPE
       pipeidx = 0;
 #endif
@@ -1205,18 +1265,12 @@
         break;
       }
 
-      ARMword instr = pipe[pipeidx].instr;
-      /*fprintf(stderr, "exec: pc=0x%08x instr=0x%08x\n", pc, instr);*/
-      if(ARMul_CCCheck(instr,ECC))
-      {
-        Prof_BeginFunc(pipe[pipeidx].func);
-        (pipe[pipeidx].func)(state, instr);
-        Prof_EndFunc(pipe[pipeidx].func);
-      }
+      /*fprintf(stderr, "exec: pc=0x%08x instr=0x%08x\n", pc, 
pipe[pipeidx].instr);*/
+      execute_instruction(state,&pipe[pipeidx],state->Reg[15]);
 #else
 /* pipeidx = 0 */
       CycleCount local_time;
-      ARMword excep, instr;
+      ARMword excep;
       ARMword r15 = state->Reg[15];
       Prof_Begin("Fetch/decode");
       switch (state->NextInstr) {
@@ -1259,13 +1313,7 @@
         break;
       }
 
-      instr = pipe[1].instr;
-      if(ARMul_CCCheck(instr,(r15 & CCBITS)))
-      {
-        Prof_BeginFunc(pipe[1].func);
-        (pipe[1].func)(state, instr);
-        Prof_EndFunc(pipe[1].func);
-      }
+      execute_instruction(state,&pipe[1],r15);
 
 /* pipeidx = 1 */
       r15 = state->Reg[15];
@@ -1310,13 +1358,7 @@
         break;
       }
 
-      instr = pipe[2].instr;
-      if(ARMul_CCCheck(instr,(r15 & CCBITS)))
-      {
-        Prof_BeginFunc(pipe[2].func);
-        (pipe[2].func)(state, instr);
-        Prof_EndFunc(pipe[2].func);
-      }
+      execute_instruction(state,&pipe[2],r15);
 
 /* pipeidx = 2 */
       r15 = state->Reg[15];
@@ -1365,13 +1407,7 @@
         break;
       }
 
-      instr = pipe[0].instr;
-      if(ARMul_CCCheck(instr,(r15 & CCBITS)))
-      {
-        Prof_BeginFunc(pipe[0].func);
-        (pipe[0].func)(state, instr);
-        Prof_EndFunc(pipe[0].func);
-      }
+      execute_instruction(state,&pipe[0],r15);
 #endif
     } /* for loop */
 
@@ -1384,3 +1420,146 @@
 #endif
   }
 } /* Emulate 26 in instruction based mode */
+
+#else /* JIT */
+
+#ifdef DEBUG_JIT_TEST_ALL_EXEC
+extern JITResult test_exec(JITEmuState *state,ARMword *addr);
+#endif
+
+void ARMul_Emulate26(ARMul_State *state)
+{
+  EmuRate_Reset(state);
+  kill_emulator = false;
+  while (kill_emulator == false) {
+    for (;;) { /* just keep going */
+      CycleCount local_time;
+      ARMword excep;
+      ARMword r15 = state->Reg[15];
+      PipelineEntry p;
+      switch (state->NextInstr) {
+        case NORMAL:
+          r15 += 4;
+        case PCINCED:
+          break;
+        default:
+          state->Aborted = 0;
+          r15 += 8;
+          state->NumCycles += 2;
+          break;
+      }
+      NORMALCYCLE; /* state->NextInstr = NORMAL */
+      /* r15-8 = instruction to execute */
+  
+      local_time = ARMul_Time;
+      while(((CycleDiff) (local_time-state->EventQ[0].Time)) >= 0)
+      {
+        EventQ_Func func = state->EventQ[0].Func;
+        Prof_BeginFunc(func);
+        (func)(state,local_time);
+        Prof_EndFunc(func);
+      }
+  
+      excep = state->Exception &~r15;
+      
+      /* Write back updated PC before handling exception/instruction */
+      state->Reg[15] = r15;
+  
+      if (excep) { /* Any exceptions */
+        if (excep & Exception_FIQ) {
+          Prof_BeginFunc(ARMul_Abort);
+          ARMul_Abort(state, ARMul_FIQV);
+          Prof_EndFunc(ARMul_Abort);
+        } else {
+          Prof_BeginFunc(ARMul_Abort);
+          ARMul_Abort(state, ARMul_IRQV);
+          Prof_EndFunc(ARMul_Abort);
+        }
+        break;
+      }
+
+      /* Call into JIT if possible */
+      {
+        ARMword addr = (r15-8) & 0x3fffffc;
+        FastMapEntry *entry;
+        FastMapRes res;
+        entry = FastMap_GetEntryNoWrap(state,addr);
+        res = FastMap_DecodeRead(entry,state->FastMapMode);
+        if(FASTMAP_RESULT_DIRECT(res))
+        {
+          ARMword *data = FastMap_Log2Phy(entry,addr);
+          JITFunc *func = JIT_Phy2Func(JIT_GetState(state),data);
+#ifdef JIT_DEBUG
+          fprintf(stderr,"%08x %08x %08x\n",r15,addr,*data);
+#endif
+          JITResult jres;
+#ifdef DEBUG_JIT_METRICS_EXEC
+          state->jit.exec_count=0;
+#endif
+#ifdef DEBUG_JIT_TRACE
+          fprintf(stderr,"E %08x\n",addr);
+#endif
+#ifdef DEBUG_JIT_TEST_ALL_EXEC
+          if (*func != &JIT_Generate) {
+            jres = test_exec(state,data);
+          }
+          else
+#endif
+          {
+            jres = (*func)(state,data);
+          }
+
+#ifdef DEBUG_JIT_METRICS
+          {
+            TerminateReason terminate = TerminateReason_Normal;
+#ifdef DEBUG_JIT_METRICS_EXEC
+            /* Work out how many instructions were executed by the JIT (if 
any) */
+            uint32_t length = state->jit.exec_count-1;
+            if (length < JITPAGE_SIZE/4)
+            {
+              jitmetrics.execute_histogram[length]++;
+            }
+#endif
+            if (jres == JITResult_Interpret)
+            {
+              /* What instruction caused this result? */
+              addr = R15PC-8;
+              entry = FastMap_GetEntryNoWrap(state,addr);
+              res = FastMap_DecodeRead(entry,state->FastMapMode);
+              if (FASTMAP_RESULT_DIRECT(res))
+              {
+                ARMword *data = FastMap_Log2Phy(entry,addr);
+                Instruction instr;
+                Decoder_Decode(&instr,*data);
+                terminate = (TerminateReason) instr.type;
+              }
+              else
+              {
+                terminate = TerminateReason_Special;
+              }
+            }
+            jitmetrics.terminate_reason[terminate]++;
+          }
+#endif
+
+          if (jres == JITResult_Normal)
+          {
+            continue;
+          }
+          r15 = state->Reg[15];
+#ifdef JIT_DEBUG
+           fprintf(stderr,"%08xI\n",r15);
+#endif
+        }
+      }
+
+#ifdef DEBUG_JIT_METRICS
+      jitmetrics.interpret_count++;
+#endif
+      ARMul_LoadInstr(state,r15-8,&p);
+      execute_instruction(state,&p,r15);
+    }
+  }
+}
+
+#endif /* JIT */

Index: armdefs.h
===================================================================
RCS file: /cvsroot/arcem/arcem/armdefs.h,v
retrieving revision 1.8
retrieving revision 1.8.2.1
diff -u -d -r1.8 -r1.8.2.1
--- armdefs.h   6 Mar 2013 19:07:29 -0000       1.8
+++ armdefs.h   27 May 2017 20:54:06 -0000      1.8.2.1
@@ -24,11 +24,19 @@
 #include <limits.h>
 
 #include "c99.h"
+#ifdef JIT
+#include "jit/jitstate.h"
+#endif
+
+/* Control caching of instruction handler functions */
+//#define ARMUL_INSTR_FUNC_CACHE
 
 typedef uint32_t ARMword; /* must be 32 bits wide */
 
 typedef struct ARMul_State ARMul_State;
+#ifndef AMIGA
 extern ARMul_State statestr;
+#endif
 
 #define FALSE 0
 #define TRUE 1
@@ -134,7 +142,9 @@
 #define FASTMAP_ACCESSFUNC_BYTE        0x02UL /* Only relevant for writes */
 #define FASTMAP_ACCESSFUNC_STATECHANGE 0x04UL /* Only relevant for writes */
 
+#ifdef ARMUL_INSTR_FUNC_CACHE
 #define FASTMAP_CLOBBEREDFUNC 0 /* Value written when a func gets clobbered */
+#endif
 
 typedef FastMapInt FastMapRes; /* Result of a DecodeRead/DecodeWrite function 
*/
 
@@ -234,13 +244,20 @@
    unsigned NtransSig;        /* MEMC USR/SVC flag, somewhat redundant with 
FastMapMode */
    ARMword Base;              /* extra hand for base writeback */
 
+#ifdef JIT
+   /* JIT */
+   JITState jit;
+#endif
+
    /* Event queue */
    EventQ_Entry EventQ[EVENTQ_SIZE];
    uint_fast8_t NumEvents;
 
    /* Fastmap stuff */
    FastMapUInt FastMapMode;   /* Current access mode flags */
+#ifdef ARMUL_INSTR_FUNC_CACHE
    FastMapUInt FastMapInstrFuncOfs; /* Offset between the RAM/ROM data and the 
ARMEmuFunc data */
+#endif
    FastMapEntry *FastMap;
 
    /* Less common stuff */   
@@ -358,6 +375,9 @@
 # define HOURGLASS_RATE      1023   /* 2^n - 1 */
 #endif
 
+#ifdef JIT
+#include "jit/jitstate2.h"
+#endif
 #include "arch/archio.h"
 #include "arch/armarc.h"
 #include "eventq.h"

Index: Makefile
===================================================================
RCS file: /cvsroot/arcem/arcem/Makefile,v
retrieving revision 1.45
retrieving revision 1.45.2.1
diff -u -d -r1.45 -r1.45.2.1
--- Makefile    16 Dec 2015 19:30:22 -0000      1.45
+++ Makefile    27 May 2017 20:54:06 -0000      1.45.2.1
@@ -41,11 +41,18 @@
 # development, arcem-1.50, etc.
 MANUAL?=development
 
+# Whether you want the JIT enabled
+JIT?=yes
+
 # Windowing System
 ifeq ($(SYSTEM),)
 SYSTEM=X
 endif
 
+ifneq ($(SYSTEM),riscos-single)
+JIT=no
+endif
+
 CC=gcc
 LD=gcc
 LDFLAGS=
@@ -77,6 +84,21 @@
 INSTALL=cp
 
 
+# JIT debug flags
+# DEBUG_JIT_TEST_EXEC: Test single-stepping instruction sequences against the 
JIT
+# DEBUG_JIT_SINGLE_INSTR: Only generate code sequences one instruction long
+# DEBUG_JIT_FAKE: Don't execute JIT code, fake it (requires 
DEBUG_JIT_TEST_EXEC)
+# JIT_DEBUG: stderr spam
+# DEBUG_JIT_TEST_ALL_EXEC: Test all code block executions, not just on first 
generate (requires DEBUG_JIT_TEST_EXEC)
+# DEBUG_JIT_FORCE_NORMAL: Force all non-empty code blocks to return 
JITResult_Normal
+# DEBUG_JIT_METRICS: Collect and report metrics
+# DEBUG_JIT_METRICS_EXEC: Measure number of instructions executed by JIT 
(affects JIT code generation)
+# DEBUG_JIT_DUMP: Dump disassembly of JIT code sequences
+# DEBUG_JIT_TRACE: Trace PC, as seen by main loop (i.e. interpreted 
instructions + code block starts)
+#CFLAGS += -DDEBUG_JIT_METRICS -DDEBUG_JIT_METRICS_EXEC
+#CFLAGS += -DDEBUG_JIT_TEST_EXEC -DDEBUG_JIT_TEST_ALL_EXEC
+#CFLAGS += -DDEBUG_JIT_DUMP -DDEBUG_JIT_TRACE
+
 # Everything else should be ok as it is.
 
 OBJS = armcopro.o armemu.o arminit.o \
@@ -98,6 +120,12 @@
   arch/i2c.h arch/archio.h arch/fdc1772.h arch/ControlPane.h \
   arch/hdc63463.h arch/keyboard.h arch/ArcemConfig.h arch/cp15.h
 
+ifeq ($(JIT),yes)
+OBJS += jit/codeblocks.o jit/decoder.o jit/dirtyranges.o jit/emuinterf.o 
jit/jit.o jit/jitpage.o jit/memattr.o jit/metrics.o jit/regalloc.o
+SRCS += jit/codeblocks.c jit/decoder.c jit/dirtyranges.c jit/emuinterf.c 
jit/jit.c jit/jitpage.c jit/memattr.c jit/metrics.c jit/regalloc.c
+CFLAGS += -DJIT
+endif
+
 TARGET=arcem
 
 ifeq (${SYSTEM},amiga)
@@ -343,5 +371,32 @@
 X/pseudo.o: X/pseudo.c
        $(CC) $(CFLAGS) -c $*.c -o X/pseudo.o
 
+jit/codeblocks.o: jit/codeblocks.c
+       $(CC) $(CFLAGS) -c $*.c -o $@
+
+jit/decoder.o: jit/decoder.c
+       $(CC) $(CFLAGS) -c $*.c -o $@
+
+jit/dirtyranges.o: jit/dirtyranges.c
+       $(CC) $(CFLAGS) -c $*.c -o $@
+
+jit/emuinterf.o: jit/emuinterf.c
+       $(CC) $(CFLAGS) -c $*.c -o $@
+
+jit/jit.o: jit/jit.c
+       $(CC) $(CFLAGS) -c $*.c -o $@
+
+jit/jitpage.o: jit/jitpage.c
+       $(CC) $(CFLAGS) -c $*.c -o $@
+
+jit/memattr.o: jit/memattr.c
+       $(CC) $(CFLAGS) -c $*.c -o $@
+
+jit/metrics.o: jit/metrics.c
+       $(CC) $(CFLAGS) -c $*.c -o $@
+
+jit/regalloc.o: jit/regalloc.c
+       $(CC) $(CFLAGS) -c $*.c -o $@
+
 
 # DO NOT DELETE


------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
-- 
arcem-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/arcem-cvs

Reply via email to