On 11/15/16 13:21, Richard Sandiford wrote:
> Bernd Edlinger <bernd.edlin...@hotmail.de> writes:
>> Hi!
>>
>> The genattrtab build-tool uses way too much memory in general.
>> I think there is no other build step that uses more memory.
>>
>> On the currently trunk it takes around 700MB to build the
>> ARM latency tab files.  I debugged that yesterday
>> and found that this can be reduced to 8MB (!).  Yes, really.
>>
>> So the attached patch does try really hard to hash and re-use
>> all ever created rtx objects.
>>
>> Bootstrapped and reg-tested on x86_64-pc-linux-gnu and ARM.
>> Is it OK for trunk?
>
> Just to check: does this produce the same output as before?
> And did you notice any difference in the time genattrtab
> takes to run?
>

The run time was in the range of 24-25s, with and without the patch.

However the tables are a bit different, although that seems only to be
w flaw with the ATTR_CURR_SIMPLIFIED_P which is now re-used when a
matching rtx was found in the hash.  As I said, the generated functions
do really work, probably because just a few simplifications are missing.

So it looks like I need to clear the ATTR_CURR_SIMPLIFIED_P on re-used
binary ops.  That I found out just by try-and-error.  I can say that now
the generated functions are exactly identical for i386, arm and mips.
The memory and the run time did not change due to this re-hashing.

>
> ATTR_PERMANENT_P is supposed to guarantee that no other rtx like it exists,
> so that x != y when x or y is "permanent" implies that the attributes
> must be different.  This lets attr_equal_p avoid a recursive walk:
>
> static int
> attr_equal_p (rtx x, rtx y)
> {
>   return (x == y || (! (ATTR_PERMANENT_P (x) && ATTR_PERMANENT_P (y))
>                    && rtx_equal_p (x, y)));
> }
>
> Does the patch still guarantee that?
>

Hmm, I see.  I expected that ATTR_PERMANENT_P means more or less,
that it is in the hash table.  I believe that a long time ago, there
was a kind of garbage collection of temporary rtx objects, that needed
to be copied from the temporary space to the permanent space, after
the simplification was done.  And then all temporary objects were
just tossed away.  But that was long before my time.  Today
everything is permanent, that is why the memory usage is unbounded.

But I can fix that, by only setting ATTR_PERMANENT_P on the hashed
rtx when both sub-rtx are also ATTR_PERMANENT_P.


How does that new version look, is it OK?


Thanks
Bernd.
2016-11-15  Bernd Edlinger  <bernd.edlin...@hotmail.de>

        * genattrtab.c (attr_rtx_1): Avoid allocating new rtx objects.
        Clear ATTR_CURR_SIMPLIFIED_P for re-used binary rtx objects.
        Use DEF_ATTR_STRING for string arguments.  Use RTL_HASH for
        integer arguments.  Only set ATTR_PERMANENT_P on newly hashed
        rtx when all sub-rtx are also permanent.
        (attr_eq): Simplify.
        (attr_copy_rtx): Remove.
        (make_canonical, get_attr_value): Use attr_equal_p.
        (copy_boolean): Rehash NOT.
        (simplify_test_exp_in_temp,
        optimize_attrs): Remove call to attr_copy_rtx.
        (attr_alt_intersection, attr_alt_union,
        attr_alt_complement, mk_attr_alt): Rehash EQ_ATTR_ALT.
        (make_automaton_attrs): Use attr_eq.
Index: gcc/genattrtab.c
===================================================================
--- gcc/genattrtab.c	(revision 242335)
+++ gcc/genattrtab.c	(working copy)
@@ -386,6 +386,7 @@ attr_rtx_1 (enum rtx_code code, va_list p)
   unsigned int hashcode;
   struct attr_hash *h;
   struct obstack *old_obstack = rtl_obstack;
+  int permanent_p = 1;
 
   /* For each of several cases, search the hash table for an existing entry.
      Use that entry if one is found; otherwise create a new RTL and add it
@@ -395,13 +396,8 @@ attr_rtx_1 (enum rtx_code code, va_list p)
     {
       rtx arg0 = va_arg (p, rtx);
 
-      /* A permanent object cannot point to impermanent ones.  */
       if (! ATTR_PERMANENT_P (arg0))
-	{
-	  rt_val = rtx_alloc (code);
-	  XEXP (rt_val, 0) = arg0;
-	  return rt_val;
-	}
+	permanent_p = 0;
 
       hashcode = ((HOST_WIDE_INT) code + RTL_HASH (arg0));
       for (h = attr_hash_table[hashcode % RTL_HASH_SIZE]; h; h = h->next)
@@ -425,14 +421,8 @@ attr_rtx_1 (enum rtx_code code, va_list p)
       rtx arg0 = va_arg (p, rtx);
       rtx arg1 = va_arg (p, rtx);
 
-      /* A permanent object cannot point to impermanent ones.  */
       if (! ATTR_PERMANENT_P (arg0) || ! ATTR_PERMANENT_P (arg1))
-	{
-	  rt_val = rtx_alloc (code);
-	  XEXP (rt_val, 0) = arg0;
-	  XEXP (rt_val, 1) = arg1;
-	  return rt_val;
-	}
+	permanent_p = 0;
 
       hashcode = ((HOST_WIDE_INT) code + RTL_HASH (arg0) + RTL_HASH (arg1));
       for (h = attr_hash_table[hashcode % RTL_HASH_SIZE]; h; h = h->next)
@@ -440,7 +430,10 @@ attr_rtx_1 (enum rtx_code code, va_list p)
 	    && GET_CODE (h->u.rtl) == code
 	    && XEXP (h->u.rtl, 0) == arg0
 	    && XEXP (h->u.rtl, 1) == arg1)
-	  return h->u.rtl;
+	  {
+	    ATTR_CURR_SIMPLIFIED_P (h->u.rtl) = 0;
+	    return h->u.rtl;
+	  }
 
       if (h == 0)
 	{
@@ -481,6 +474,9 @@ attr_rtx_1 (enum rtx_code code, va_list p)
       char *arg0 = va_arg (p, char *);
       char *arg1 = va_arg (p, char *);
 
+      arg0 = DEF_ATTR_STRING (arg0);
+      arg1 = DEF_ATTR_STRING (arg1);
+
       hashcode = ((HOST_WIDE_INT) code + RTL_HASH (arg0) + RTL_HASH (arg1));
       for (h = attr_hash_table[hashcode % RTL_HASH_SIZE]; h; h = h->next)
 	if (h->hashcode == hashcode
@@ -497,6 +493,29 @@ attr_rtx_1 (enum rtx_code code, va_list p)
 	  XSTR (rt_val, 1) = arg1;
 	}
     }
+  else if (GET_RTX_LENGTH (code) == 2
+	   && GET_RTX_FORMAT (code)[0] == 'i'
+	   && GET_RTX_FORMAT (code)[1] == 'i')
+    {
+      int  arg0 = va_arg (p, int);
+      int  arg1 = va_arg (p, int);
+
+      hashcode = ((HOST_WIDE_INT) code + RTL_HASH (arg0) + RTL_HASH (arg1));
+      for (h = attr_hash_table[hashcode % RTL_HASH_SIZE]; h; h = h->next)
+	if (h->hashcode == hashcode
+	    && GET_CODE (h->u.rtl) == code
+	    && XINT (h->u.rtl, 0) == arg0
+	    && XINT (h->u.rtl, 1) == arg1)
+	  return h->u.rtl;
+
+      if (h == 0)
+	{
+	  rtl_obstack = hash_obstack;
+	  rt_val = rtx_alloc (code);
+	  XINT (rt_val, 0) = arg0;
+	  XINT (rt_val, 1) = arg1;
+	}
+    }
   else if (code == CONST_INT)
     {
       HOST_WIDE_INT arg0 = va_arg (p, HOST_WIDE_INT);
@@ -552,7 +571,7 @@ attr_rtx_1 (enum rtx_code code, va_list p)
 
   rtl_obstack = old_obstack;
   attr_hash_add_rtx (hashcode, rt_val);
-  ATTR_PERMANENT_P (rt_val) = 1;
+  ATTR_PERMANENT_P (rt_val) = permanent_p;
   return rt_val;
 }
 
@@ -592,7 +611,7 @@ attr_printf (unsigned int len, const char *fmt, ..
 static rtx
 attr_eq (const char *name, const char *value)
 {
-  return attr_rtx (EQ_ATTR, DEF_ATTR_STRING (name), DEF_ATTR_STRING (value));
+  return attr_rtx (EQ_ATTR, name, value);
 }
 
 static const char *
@@ -646,89 +665,6 @@ attr_equal_p (rtx x, rtx y)
 		     && rtx_equal_p (x, y)));
 }
 
-/* Copy an attribute value expression,
-   descending to all depths, but not copying any
-   permanent hashed subexpressions.  */
-
-static rtx
-attr_copy_rtx (rtx orig)
-{
-  rtx copy;
-  int i, j;
-  RTX_CODE code;
-  const char *format_ptr;
-
-  /* No need to copy a permanent object.  */
-  if (ATTR_PERMANENT_P (orig))
-    return orig;
-
-  code = GET_CODE (orig);
-
-  switch (code)
-    {
-    case REG:
-    CASE_CONST_ANY:
-    case SYMBOL_REF:
-    case MATCH_TEST:
-    case CODE_LABEL:
-    case PC:
-    case CC0:
-      return orig;
-
-    default:
-      break;
-    }
-
-  copy = rtx_alloc (code);
-  PUT_MODE (copy, GET_MODE (orig));
-  ATTR_IND_SIMPLIFIED_P (copy) = ATTR_IND_SIMPLIFIED_P (orig);
-  ATTR_CURR_SIMPLIFIED_P (copy) = ATTR_CURR_SIMPLIFIED_P (orig);
-  ATTR_PERMANENT_P (copy) = ATTR_PERMANENT_P (orig);
-
-  format_ptr = GET_RTX_FORMAT (GET_CODE (copy));
-
-  for (i = 0; i < GET_RTX_LENGTH (GET_CODE (copy)); i++)
-    {
-      switch (*format_ptr++)
-	{
-	case 'e':
-	  XEXP (copy, i) = XEXP (orig, i);
-	  if (XEXP (orig, i) != NULL)
-	    XEXP (copy, i) = attr_copy_rtx (XEXP (orig, i));
-	  break;
-
-	case 'E':
-	case 'V':
-	  XVEC (copy, i) = XVEC (orig, i);
-	  if (XVEC (orig, i) != NULL)
-	    {
-	      XVEC (copy, i) = rtvec_alloc (XVECLEN (orig, i));
-	      for (j = 0; j < XVECLEN (copy, i); j++)
-		XVECEXP (copy, i, j) = attr_copy_rtx (XVECEXP (orig, i, j));
-	    }
-	  break;
-
-	case 'n':
-	case 'i':
-	  XINT (copy, i) = XINT (orig, i);
-	  break;
-
-	case 'w':
-	  XWINT (copy, i) = XWINT (orig, i);
-	  break;
-
-	case 's':
-	case 'S':
-	  XSTR (copy, i) = XSTR (orig, i);
-	  break;
-
-	default:
-	  gcc_unreachable ();
-	}
-    }
-  return copy;
-}
-
 /* Given a test expression EXP for attribute ATTR, ensure it is validly
    formed.  LOC is the location of the .md construct that contains EXP.
 
@@ -1236,7 +1172,7 @@ make_canonical (file_location loc, struct attr_des
 	    XVECEXP (exp, 0, i) = copy_boolean (XVECEXP (exp, 0, i));
 	    XVECEXP (exp, 0, i + 1)
 	      = make_canonical (loc, attr, XVECEXP (exp, 0, i + 1));
-	    if (! rtx_equal_p (XVECEXP (exp, 0, i + 1), defval))
+	    if (! attr_equal_p (XVECEXP (exp, 0, i + 1), defval))
 	      allsame = 0;
 	  }
 	if (allsame)
@@ -1257,6 +1193,8 @@ copy_boolean (rtx exp)
   if (GET_CODE (exp) == AND || GET_CODE (exp) == IOR)
     return attr_rtx (GET_CODE (exp), copy_boolean (XEXP (exp, 0)),
 		     copy_boolean (XEXP (exp, 1)));
+  else if (GET_CODE (exp) == NOT)
+    return attr_rtx (NOT, copy_boolean (XEXP (exp, 0)));
   if (GET_CODE (exp) == MATCH_OPERAND)
     {
       XSTR (exp, 1) = DEF_ATTR_STRING (XSTR (exp, 1));
@@ -1298,7 +1236,7 @@ get_attr_value (file_location loc, rtx value, stru
     }
 
   for (av = attr->first_value; av; av = av->next)
-    if (rtx_equal_p (value, av->value)
+    if (attr_equal_p (value, av->value)
 	&& (num_alt == 0 || av->first_insn == NULL
 	    || insn_alternatives[av->first_insn->def->insn_code]))
       return av;
@@ -2339,9 +2277,7 @@ simplify_test_exp_in_temp (rtx exp, int insn_code,
   rtl_obstack = temp_obstack;
   x = simplify_test_exp (exp, insn_code, insn_index);
   rtl_obstack = old;
-  if (x == exp || rtl_obstack == temp_obstack)
-    return x;
-  return attr_copy_rtx (x);
+  return x;
 }
 
 /* Returns true if S1 is a subset of S2.  */
@@ -2397,28 +2333,27 @@ attr_alt_subset_of_compl_p (rtx s1, rtx s2)
 static rtx
 attr_alt_intersection (rtx s1, rtx s2)
 {
-  rtx result = rtx_alloc (EQ_ATTR_ALT);
+  int result;
 
   switch ((XINT (s1, 1) << 1) | XINT (s2, 1))
     {
     case (0 << 1) | 0:
-      XINT (result, 0) = XINT (s1, 0) & XINT (s2, 0);
+      result = XINT (s1, 0) & XINT (s2, 0);
       break;
     case (0 << 1) | 1:
-      XINT (result, 0) = XINT (s1, 0) & ~XINT (s2, 0);
+      result = XINT (s1, 0) & ~XINT (s2, 0);
       break;
     case (1 << 1) | 0:
-      XINT (result, 0) = XINT (s2, 0) & ~XINT (s1, 0);
+      result = XINT (s2, 0) & ~XINT (s1, 0);
       break;
     case (1 << 1) | 1:
-      XINT (result, 0) = XINT (s1, 0) | XINT (s2, 0);
+      result = XINT (s1, 0) | XINT (s2, 0);
       break;
     default:
       gcc_unreachable ();
     }
-  XINT (result, 1) = XINT (s1, 1) & XINT (s2, 1);
 
-  return result;
+  return attr_rtx (EQ_ATTR_ALT, result, XINT (s1, 1) & XINT (s2, 1));
 }
 
 /* Return EQ_ATTR_ALT expression representing union of S1 and S2.  */
@@ -2426,28 +2361,27 @@ attr_alt_intersection (rtx s1, rtx s2)
 static rtx
 attr_alt_union (rtx s1, rtx s2)
 {
-  rtx result = rtx_alloc (EQ_ATTR_ALT);
+  int result;
 
   switch ((XINT (s1, 1) << 1) | XINT (s2, 1))
     {
     case (0 << 1) | 0:
-      XINT (result, 0) = XINT (s1, 0) | XINT (s2, 0);
+      result = XINT (s1, 0) | XINT (s2, 0);
       break;
     case (0 << 1) | 1:
-      XINT (result, 0) = XINT (s2, 0) & ~XINT (s1, 0);
+      result = XINT (s2, 0) & ~XINT (s1, 0);
       break;
     case (1 << 1) | 0:
-      XINT (result, 0) = XINT (s1, 0) & ~XINT (s2, 0);
+      result = XINT (s1, 0) & ~XINT (s2, 0);
       break;
     case (1 << 1) | 1:
-      XINT (result, 0) = XINT (s1, 0) & XINT (s2, 0);
+      result = XINT (s1, 0) & XINT (s2, 0);
       break;
     default:
       gcc_unreachable ();
     }
 
-  XINT (result, 1) = XINT (s1, 1) | XINT (s2, 1);
-  return result;
+  return attr_rtx (EQ_ATTR_ALT, result, XINT (s1, 1) | XINT (s2, 1));
 }
 
 /* Return EQ_ATTR_ALT expression representing complement of S.  */
@@ -2455,12 +2389,7 @@ attr_alt_union (rtx s1, rtx s2)
 static rtx
 attr_alt_complement (rtx s)
 {
-  rtx result = rtx_alloc (EQ_ATTR_ALT);
-
-  XINT (result, 0) = XINT (s, 0);
-  XINT (result, 1) = 1 - XINT (s, 1);
-
-  return result;
+  return attr_rtx (EQ_ATTR_ALT, XINT (s, 0), 1 - XINT (s, 1));
 }
 
 /* Return EQ_ATTR_ALT expression representing set containing elements set
@@ -2469,12 +2398,7 @@ attr_alt_complement (rtx s)
 static rtx
 mk_attr_alt (uint64_t e)
 {
-  rtx result = rtx_alloc (EQ_ATTR_ALT);
-
-  XINT (result, 0) = e;
-  XINT (result, 1) = 0;
-
-  return result;
+  return attr_rtx (EQ_ATTR_ALT, (int)e, 0);
 }
 
 /* Given an expression, see if it can be simplified for a particular insn
@@ -3045,7 +2969,6 @@ optimize_attrs (int num_insn_codes)
 	      && attr_rtx_cost (newexp) < 26
 	     )
 	    {
-	      newexp = attr_copy_rtx (newexp);
 	      remove_insn_ent (av, ie);
 	      av = get_attr_value (ie->def->loc, newexp, attr,
 				   ie->def->insn_code);
@@ -5004,7 +4927,7 @@ make_automaton_attrs (void)
 	{
 	  int j;
 	  char *name;
-	  rtx test = attr_rtx (EQ_ATTR, tune_attr->name, XSTR (val->value, 0));
+	  rtx test = attr_eq (tune_attr->name, XSTR (val->value, 0));
 
 	  if (val == tune_attr->default_val)
 	    continue;

Reply via email to