On Mon, 15 Oct 2018, Richard Biener wrote:
> I think it's sound but also note that I think it is logically independent of
> asm inline ().  While it may work for the inlining issue for some kernel
> examples to asm inline () is sth similar to always_inline for functions,
> that is, even though an asm _does_ have non-negligible .text size
> we do want to ignore that for the purpose of inlining (but not for the
> purpose of branch size estimation).

My understanding is that kernel folks are not demanding "always_inline"
semantics though; they were unhappy with inlining cost mis-estimation.

FWIW they went ahead and worked around the problem on their end via
assembler macros (and Borislav's ping has got no response so far).

> Note in your docs you refer to "non-code" sections but it should
> equally apply to .text sections that are not the section of the asm
> context (.text.cold, for example).  So better wording there would
> be appreciated.

I've tried to address this with the following incremental change. I think
it was the only suggestion, so the rest of the patch is unchanged.

@@ -9849,11 +9849,11 @@ a label is unreachable.
 Likewise, it is possible for GCC to significantly over-estimate the
 number of instructions in an @code{asm}, resulting in suboptimal decisions
 when the estimate is used during inlining and branch range optimization.
-This can happen if the @code{asm} template has many lines that do not
-correspond to instructions, for example when the @samp{.pushsection}
-directive is used to emit auxiliary data in a non-code section.
+For instance, this can happen if a @samp{.pushsection} directive is used to
+temporarily switch to another section and emit auxiliary code or data there.
 For Extended @code{asm} statements, you can improve the estimate by
-wrapping the non-code portion in @samp{%` ... %`} delimiters.
+wrapping the parts where newlines and separators should not be counted in
+@samp{%` ... %`} delimiters.


        * doc/extend.texi (Extended Asm): Document %` in template.
        (Size of an Asm): Document intended use of %`.
        * final.c (asm_insn_count): Adjust.
        (asm_str_count): Add argument to distinguish basic and extended asms.
        In extended asms, ignore separators inside of %` ... %`.
        (output_asm_insn): Handle %`.
        * rtl.h (asm_str_count): Adjust prototype.
        * tree-inline.c (estimate_num_insns): Adjust.
        * config/arm/arm.c (arm_rtx_costs_internal): Adjust.

diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 34ecc4f8d14..dac4728375b 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -8622,6 +8622,11 @@ generates multiple assembler instructions.
 Outputs @samp{@{}, @samp{|}, and @samp{@}} characters (respectively)
 into the assembler code.  When unescaped, these characters have special
 meaning to indicate multiple assembler dialects, as described below.
+
+@item %`
+Signifies a boundary of a region where instruction separators are not
+counted towards its size (@pxref{Size of an asm}). Must be followed by
+a whitespace character.
 @end table
 
 @subsubheading Multiple assembler dialects in @code{asm} templates
@@ -9830,7 +9835,7 @@ does this by counting the number of instructions in the 
pattern of the
 @code{asm} and multiplying that by the length of the longest
 instruction supported by that processor.  (When working out the number
 of instructions, it assumes that any occurrence of a newline or of
-whatever statement separator character is supported by the assembler --
+whatever statement separator character is supported by the assembler ---
 typically @samp{;} --- indicates the end of an instruction.)
 
 Normally, GCC's estimate is adequate to ensure that correct
@@ -9841,6 +9846,15 @@ space in the object file than is needed for a single 
instruction.
 If this happens then the assembler may produce a diagnostic saying that
 a label is unreachable.
 
+Likewise, it is possible for GCC to significantly over-estimate the
+number of instructions in an @code{asm}, resulting in suboptimal decisions
+when the estimate is used during inlining and branch range optimization.
+For instance, this can happen if a @samp{.pushsection} directive is used to
+temporarily switch to another section and emit auxiliary code or data there.
+For Extended @code{asm} statements, you can improve the estimate by
+wrapping the parts where newlines and separators should not be counted in
+@samp{%` ... %`} delimiters.
+
 @node Alternate Keywords
 @section Alternate Keywords
 @cindex alternate keywords
diff --git a/gcc/final.c b/gcc/final.c
index 6e61f1e17a8..1953293d379 100644
--- a/gcc/final.c
+++ b/gcc/final.c
@@ -1408,29 +1408,40 @@ static int
 asm_insn_count (rtx body)
 {
   const char *templ;
+  bool basic = GET_CODE (body) == ASM_INPUT;
 
-  if (GET_CODE (body) == ASM_INPUT)
+  if (basic)
     templ = XSTR (body, 0);
   else
     templ = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
 
-  return asm_str_count (templ);
+  return asm_str_count (templ, basic);
 }
 
 /* Return the number of machine instructions likely to be generated for the
-   inline-asm template. */
+   inline-asm template.  BASIC indicates if it is used in a basic asm.  */
 int
-asm_str_count (const char *templ)
+asm_str_count (const char *templ, bool basic)
 {
   int count = 1;
+  bool in_backticks = false;
 
   if (!*templ)
     return 0;
 
   for (; *templ; templ++)
-    if (IS_ASM_LOGICAL_LINE_SEPARATOR (*templ, templ)
-       || *templ == '\n')
-      count++;
+    if (*templ == '%' && !basic)
+      {
+       templ++;
+       if (!*templ)
+         break;
+       /* Separators inside %` ... %` are not counted.  */
+       if (*templ == '`')
+         in_backticks = !in_backticks;
+      }
+    else if (IS_ASM_LOGICAL_LINE_SEPARATOR (*templ, templ)
+            || *templ == '\n')
+      count += !in_backticks;
 
   return count;
 }
@@ -3825,6 +3836,7 @@ output_asm_insn (const char *templ, rtx *operands)
   int oporder[MAX_RECOG_OPERANDS];
   char opoutput[MAX_RECOG_OPERANDS];
   int ops = 0;
+  bool in_backticks = false;
 
   /* An insn may return a null string template
      in a case where no assembler code is needed.  */
@@ -3891,6 +3903,17 @@ output_asm_insn (const char *templ, rtx *operands)
            p++;
            fprintf (asm_out_file, "%d", insn_counter);
          }
+       /* %` guards parts of the template that should not participate in
+          estimating its cost, e.g. where it switches to a non-text section.
+          It does not result in any output.  */
+       else if (*p == '`')
+         {
+           p++;
+           in_backticks = !in_backticks;
+           /* Leave room for future extensions.  */
+           if (*p && !ISSPACE (*p))
+             output_operand_lossage ("%%` must be followed by whitespace");
+         }
        /* % followed by a letter and some digits
           outputs an operand in a special way depending on the letter.
           Letters `acln' are implemented directly.
@@ -3984,6 +4007,9 @@ output_asm_insn (const char *templ, rtx *operands)
     output_asm_name ();
 
   putc ('\n', asm_out_file);
+
+  if (in_backticks)
+    output_operand_lossage ("missing closing %%`");
 }
 
 /* Output a LABEL_REF, or a bare CODE_LABEL, as an assembler symbol.  */
diff --git a/gcc/rtl.h b/gcc/rtl.h
index 68d3ceab29f..d29229d2817 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -4288,7 +4288,7 @@ extern void simplify_using_condition (rtx, rtx *, bitmap);
 /* In final.c  */
 extern unsigned int compute_alignments (void);
 extern void update_alignments (vec<rtx> &);
-extern int asm_str_count (const char *templ);
+extern int asm_str_count (const char *, bool);
 
 struct rtl_hooks
 {
diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c
index 913425394e0..70004528339 100644
--- a/gcc/tree-inline.c
+++ b/gcc/tree-inline.c
@@ -4103,7 +4103,9 @@ estimate_num_insns (gimple *stmt, eni_weights *weights)
 
     case GIMPLE_ASM:
       {
-       int count = asm_str_count (gimple_asm_string (as_a <gasm *> (stmt)));
+       gasm *asm_stmt = as_a <gasm *> (stmt);
+       const char *templ = gimple_asm_string (asm_stmt);
+       int count = asm_str_count (templ, gimple_asm_input_p (asm_stmt));
        /* 1000 means infinity. This avoids overflows later
           with very long asm statements.  */
        if (count > 1000)
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 8810df53aa3..63c35400dc9 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -11026,7 +11026,8 @@ arm_rtx_costs_internal (rtx x, enum rtx_code code, enum 
rtx_code outer_code,
       /* Just a guess.  Guess number of instructions in the asm
          plus one insn per input.  Always a minimum of COSTS_N_INSNS (1)
          though (see PR60663).  */
-        int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
+        const char *templ = ASM_OPERANDS_TEMPLATE (x);
+        int asm_length = MAX (1, asm_str_count (templ, false));
         int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
 
         *cost = COSTS_N_INSNS (asm_length + num_operands);

Reply via email to