This patch slightly improves the embench-iot benchmark score for
PRU code size.  There is also small improvement in a few real-world
firmware programs.

  Embench-iot size
  ------------------------------------------
  Benchmark          before   after    delta
  ---------           ----    ----     -----
  aha-mont64          4.15    4.15         0
  crc32               6.04    6.04         0
  cubic              21.64   21.62     -0.02
  edn                 6.37    6.37         0
  huffbench          18.63   18.55     -0.08
  matmult-int         5.44    5.44         0
  md5sum             25.56   25.43     -0.13
  minver             12.82   12.76     -0.06
  nbody              15.09   14.97     -0.12
  nettle-aes          4.75    4.75         0
  nettle-sha256       4.67    4.67         0
  nsichneu            3.77    3.77         0
  picojpeg            4.11    4.11         0
  primecount          7.90    7.90         0
  qrduino             7.18    7.16     -0.02
  sglib-combined     13.63   13.59     -0.04
  slre                5.19    5.19         0
  st                 14.23   14.12     -0.11
  statemate           2.34    2.34         0
  tarfind            36.85   36.64     -0.21
  ud                 10.51   10.46     -0.05
  wikisort            7.44    7.41     -0.03
  ---------          -----   -----
  Geometric mean      8.42    8.40     -0.02
  Geometric SD        2.00    2.00         0
  Geometric range    12.68   12.62     -0.06

Reg-tested pru-unknown-elf, and committed to trunk.

gcc/ChangeLog:

        * config/pru/pru.cc (pru_insn_cost): New function.
        (TARGET_INSN_COST): Define for PRU.

Signed-off-by: Dimitar Dimitrov <dimi...@dinux.eu>
---
 gcc/config/pru/pru.cc | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/gcc/config/pru/pru.cc b/gcc/config/pru/pru.cc
index 6e8112be64a..fd1924e38dc 100644
--- a/gcc/config/pru/pru.cc
+++ b/gcc/config/pru/pru.cc
@@ -783,6 +783,39 @@ pru_rtx_costs (rtx x, machine_mode mode,
       }
     }
 }
+
+/* Insn costs on PRU are straightforward because:
+     - Insns emit 0, 1 or more instructions.
+     - All instructions are 32-bit length.
+     - All instructions execute in 1 cycle (sans memory access delays).
+   The "length" attribute maps nicely to the insn cost.  */
+
+static int
+pru_insn_cost (rtx_insn *insn, bool speed)
+{
+  /* Use generic cost calculation for unrecognized insns.  */
+  if (recog_memoized (insn) < 0)
+    return pattern_cost (insn, speed);
+
+  unsigned int len = get_attr_length (insn);
+
+  gcc_assert ((len % 4) == 0);
+
+  int cost = COSTS_N_INSNS (len / 4);
+  /* Some insns have zero length (e.g. blockage, pruloop_end).
+     In such cases give the minimum cost, because a return of
+     0 would incorrectly indicate that the insn cost is unknown.  */
+  if (cost == 0)
+    cost = 1;
+
+  /* Writes are usually posted, so they take 1 cycle.  Reads
+     from DMEM usually take 3 cycles.
+     See TI document SPRACE8A, Device-Specific PRU Read Latency Values.  */
+  if (speed && get_attr_type (insn) == TYPE_LD)
+    cost += COSTS_N_INSNS (2);
+
+  return cost;
+}
 
 static GTY(()) rtx eqdf_libfunc;
 static GTY(()) rtx nedf_libfunc;
@@ -3175,6 +3208,9 @@ pru_unwind_word_mode (void)
 #undef TARGET_RTX_COSTS
 #define TARGET_RTX_COSTS pru_rtx_costs
 
+#undef TARGET_INSN_COST
+#define TARGET_INSN_COST pru_insn_cost
+
 #undef TARGET_PRINT_OPERAND
 #define TARGET_PRINT_OPERAND pru_print_operand
 
-- 
2.41.0

Reply via email to