There are many places in GCC where alternative local sequences are
tried in order to determine what is the cheapest or best alternative
to use in the current target.  When any of these sequences involve a
libcall, the current implementation of emit_library_call_value_1
introduce a side-effect consisting on emitting an external declaration
for the funcall (such as __divdi3) which is thus emitted even if the
sequence that does the libcall is not retained.

This is problematic in targets such as BPF, because the kernel loader
chokes on the spurious symbol __divdi3 and makes the resulting BPF
object unloadable.  Note that BPF objects are not linked before being
loaded.

This patch changes asssemble_external_libcall to defer emitting
declarations of external libcall symbols, by saving the call tree
nodes in a temporary list pending_libcall_symbols and letting
process_pending_assembly_externals to emit them only if they have been
referenced.  Solution suggested and sketched by Richard Sandiford.

Regtested in x86_64-linux-gnu.
Tested with host x86_64-linux-gnu with target bpf-unknown-none.

gcc/ChangeLog

        * varasm.cc (pending_libcall_symbols): New variable.
        (process_pending_assemble_externals): Process
        pending_libcall_symbols.
        (assemble_external_libcall): Defer emitting external libcall
        symbols to process_pending_assemble_externals.

gcc/testsuite/ChangeLog

        * gcc.target/bpf/divmod-libcall-1.c: New test.
        * gcc.target/bpf/divmod-libcall-2.c: Likewise.
        * gcc.c-torture/compile/libcall-2.c: Likewise.
---
 .../gcc.c-torture/compile/libcall-2.c         |  8 +++++++
 .../gcc.target/bpf/divmod-libcall-1.c         | 19 ++++++++++++++++
 .../gcc.target/bpf/divmod-libcall-2.c         | 16 ++++++++++++++
 gcc/varasm.cc                                 | 22 ++++++++++++++++++-
 4 files changed, 64 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.c-torture/compile/libcall-2.c
 create mode 100644 gcc/testsuite/gcc.target/bpf/divmod-libcall-1.c
 create mode 100644 gcc/testsuite/gcc.target/bpf/divmod-libcall-2.c

diff --git a/gcc/testsuite/gcc.c-torture/compile/libcall-2.c 
b/gcc/testsuite/gcc.c-torture/compile/libcall-2.c
new file mode 100644
index 00000000000..b33944c83ff
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/libcall-2.c
@@ -0,0 +1,8 @@
+/* Make sure that external refences for libcalls are generated even for
+   indirect calls.  */
+
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcmodel=large" { target x86_64-*-* } } */
+/* { dg-final { scan-assembler "globl\t__divti3" } } */
+
+__int128 a, b; void foo () { a = a / b; }
diff --git a/gcc/testsuite/gcc.target/bpf/divmod-libcall-1.c 
b/gcc/testsuite/gcc.target/bpf/divmod-libcall-1.c
new file mode 100644
index 00000000000..7481076602a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/bpf/divmod-libcall-1.c
@@ -0,0 +1,19 @@
+/* This test makes sure that no spurious external symbol declarations are
+   emitted for libcalls in tried but eventually not used code sequences.  */
+
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=v3" } */
+/* { dg-final { scan-assembler-not "global\t__divdi3" } } */
+/* { dg-final { scan-assembler-not "global\t__moddi3" } } */
+
+int
+foo (unsigned int len)
+{
+  return ((unsigned long)len) * 234 / 5;
+}
+
+int
+bar (unsigned int len)
+{
+  return ((unsigned long)len) * 234 % 5;
+}
diff --git a/gcc/testsuite/gcc.target/bpf/divmod-libcall-2.c 
b/gcc/testsuite/gcc.target/bpf/divmod-libcall-2.c
new file mode 100644
index 00000000000..792d689395a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/bpf/divmod-libcall-2.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=v3" } */
+/* { dg-final { scan-assembler "global\t__divdi3" } } */
+/* { dg-final { scan-assembler "global\t__moddi3" } } */
+
+int
+foo (unsigned int len)
+{
+  return ((long)len) * 234 / 5;
+}
+
+int
+bar (unsigned int len)
+{
+  return ((long)len) * 234 % 5;
+}
diff --git a/gcc/varasm.cc b/gcc/varasm.cc
index 6ae35edc5ae..deb7eab7af9 100644
--- a/gcc/varasm.cc
+++ b/gcc/varasm.cc
@@ -2461,6 +2461,10 @@ contains_pointers_p (tree type)
    it all the way to final.  See PR 17982 for further discussion.  */
 static GTY(()) tree pending_assemble_externals;
 
+/* A similar list of pending libcall symbols.  We only want to declare
+   symbols that are actually used in the final assembly.  */
+static GTY(()) rtx pending_libcall_symbols;
+
 #ifdef ASM_OUTPUT_EXTERNAL
 /* Some targets delay some output to final using TARGET_ASM_FILE_END.
    As a result, assemble_external can be called after the list of externals
@@ -2520,8 +2524,17 @@ process_pending_assemble_externals (void)
   for (list = pending_assemble_externals; list; list = TREE_CHAIN (list))
     assemble_external_real (TREE_VALUE (list));
 
+  for (rtx list = pending_libcall_symbols; list; list = XEXP (list, 1))
+    {
+      rtx symbol = XEXP (list, 0);
+      tree id = get_identifier (XSTR (symbol, 0));
+      if (TREE_SYMBOL_REFERENCED (id))
+       targetm.asm_out.external_libcall (symbol);
+    }
+
   pending_assemble_externals = 0;
   pending_assemble_externals_processed = true;
+  pending_libcall_symbols = NULL_RTX;
   delete pending_assemble_externals_set;
 #endif
 }
@@ -2594,8 +2607,15 @@ assemble_external_libcall (rtx fun)
   /* Declare library function name external when first used, if nec.  */
   if (! SYMBOL_REF_USED (fun))
     {
+      gcc_assert (!pending_assemble_externals_processed);
       SYMBOL_REF_USED (fun) = 1;
-      targetm.asm_out.external_libcall (fun);
+      /* Make sure the libcall symbol is in the symtab so any
+         reference to it will mark its tree node as referenced, via
+         assemble_name_resolve.  These are eventually emitted, if
+         used, in process_pending_assemble_externals. */
+      get_identifier (XSTR (fun, 0));
+      pending_libcall_symbols = gen_rtx_EXPR_LIST (VOIDmode, fun,
+                                                  pending_libcall_symbols);
     }
 }
 
-- 
2.30.2

Reply via email to