On 08/29/2018 06:08 PM, Alexander Monakov wrote:
> On Wed, 29 Aug 2018, Martin Liška wrote:
>>> Can you shortly tell why the testcase in the PR segfaults?  Does the issue
>>> only affect indirect call profiling?
>>
>> What happens is that there will exist 2 instances of:
>> void * __gcov_indirect_call_callee;
>>
>> one in main executable, and one another in DSO loaded via dlopen.
> 
> Sorry, but no, there's no dlopen in the testcase, only plain dynamic linking,
> and the reason for segfault is more subtle than that.
> 
> Preparing the testcase was quite an enlightening experience.
> 
> Alexander
> 

Hello.

I've discussed the topic with Alexander on the Cauldron and we hoped
that the issue with unique __gcov_root can be handled with DECL_COMMON in each 
DSO.
Apparently this approach does not work as all DSOs in an executable have 
eventually
just a single instance of the __gcov_root, which is bad.

So that I returned back to catch the root of the failure. It shows that even 
with
-Wl,--dynamic-list-data __gcov_indirect_call_callee point to a different 
variable
among the DSOs. The reason is that the variable is TLS:

        data16  leaq    __gcov_indirect_call_callee@tlsgd(%rip), %rdi
        .value  0x6666
        rex64
        call    __tls_get_addr@PLT

which eventually points to a stack location:

   │0x7ffff7fee4ce <bazar+4>                                data16 lea 
0x4ad2(%rip),%rdi        # 0x7ffff7ff2fa8                                       
                                                                                
                                        │
  >│0x7ffff7fee4d6 <bazar+12>                               data16 data16 callq 
0x7ffff7fee190 <__tls_get_addr@plt>                                             
                                                                                
                               │

(gdb) p /x $rdi
$2 = 0x7ffff7ff2fa8

It's probably known limitation of TLS global variables.

So my next focus was on removal of TLS of variables and making the code not 
racy.
It's implemented in my patch where I check that both *_callee and *_counter 
variables
are non-null and if so profiler value is updated. I'm aware of racy of the 
code, but
price for it seems reasonable for me.

Doing 1000x calls from 2 concurrent threads of an empty function produce 
following
number of collisions:

$ gcov-dump -l bar.gcda
...
bar.gcda:    01a90000:   6:COUNTERS indirect_call 3 counts
bar.gcda:                   0: 1452010722 1837 1841 
...

That said I would like to go this direction. I would be happy to receive
a feedback. Then I'll test the patch.

Thanks,
Martin
>From b90cc7b270cf73d6fd376ab7b105c5207128ece0 Mon Sep 17 00:00:00 2001
From: marxin <mli...@suse.cz>
Date: Tue, 11 Sep 2018 15:41:24 +0200
Subject: [PATCH] Fix indirect call instrumentation in between DSOs (PR
 gcov-profile/84107).

---
 gcc/doc/gcov.texi         |  3 ++
 gcc/tree-profile.c        |  4 ---
 libgcc/libgcov-profiler.c | 70 ++++++++++++++++++++++-----------------
 3 files changed, 43 insertions(+), 34 deletions(-)

diff --git a/gcc/doc/gcov.texi b/gcc/doc/gcov.texi
index 98f4a876293..e0291804344 100644
--- a/gcc/doc/gcov.texi
+++ b/gcc/doc/gcov.texi
@@ -798,8 +798,11 @@ Instrumented applications use a static destructor with priority 99
 to invoke the @code{__gcov_dump} function. Thus @code{__gcov_dump}
 is executed after all user defined static destructors,
 as well as handlers registered with @code{atexit}.
+
 If an executable loads a dynamic shared object via dlopen functionality,
 @option{-Wl,--dynamic-list-data} is needed to dump all profile data.
+Same option is needed in order to properly instrument destinations
+of indirect calls that happen in between multiple dynamic shared objects.
 
 Profiling run-time library reports various errors related to profile
 manipulation and profile saving.  Errors are printed into standard error output
diff --git a/gcc/tree-profile.c b/gcc/tree-profile.c
index f96bd4b9704..477995020fd 100644
--- a/gcc/tree-profile.c
+++ b/gcc/tree-profile.c
@@ -94,8 +94,6 @@ init_ic_make_global_vars (void)
   TREE_STATIC (ic_void_ptr_var) = 1;
   DECL_ARTIFICIAL (ic_void_ptr_var) = 1;
   DECL_INITIAL (ic_void_ptr_var) = NULL;
-  if (targetm.have_tls)
-    set_decl_tls_model (ic_void_ptr_var, decl_default_tls_model (ic_void_ptr_var));
 
   gcov_type_ptr = build_pointer_type (get_gcov_type ());
 
@@ -111,8 +109,6 @@ init_ic_make_global_vars (void)
   TREE_STATIC (ic_gcov_type_ptr_var) = 1;
   DECL_ARTIFICIAL (ic_gcov_type_ptr_var) = 1;
   DECL_INITIAL (ic_gcov_type_ptr_var) = NULL;
-  if (targetm.have_tls)
-    set_decl_tls_model (ic_gcov_type_ptr_var, decl_default_tls_model (ic_gcov_type_ptr_var));
 }
 
 /* Create the type and function decls for the interface with gcov.  */
diff --git a/libgcc/libgcov-profiler.c b/libgcc/libgcov-profiler.c
index 7e208d75d86..6cd23442fb2 100644
--- a/libgcc/libgcov-profiler.c
+++ b/libgcc/libgcov-profiler.c
@@ -263,20 +263,14 @@ __gcov_topn_value_profiler_body (gcov_type *counters, gcov_type value)
      }
 }
 
-/* These two variables are used to actually track caller and callee.  Keep
-   them in TLS memory so races are not common (they are written to often).
+/* These two variables are used to actually track caller and callee.
+   We do not use TLS variables because they don't work properly
+   with -Wl,--dynamic-list-data.
    The variables are set directly by GCC instrumented code, so declaration
    here must match one in tree-profile.c.  */
 
-#if defined(HAVE_CC_TLS) && !defined (USE_EMUTLS)
-__thread
-#endif
-gcov_type *__gcov_indirect_call_topn_counters ATTRIBUTE_HIDDEN;
-
-#if defined(HAVE_CC_TLS) && !defined (USE_EMUTLS)
-__thread
-#endif
-void *__gcov_indirect_call_topn_callee ATTRIBUTE_HIDDEN;
+gcov_type *__gcov_indirect_call_topn_counters;
+void *__gcov_indirect_call_topn_callee;
 
 #ifdef TARGET_VTABLE_USES_DESCRIPTORS
 #define VTABLE_USES_DESCRIPTORS 1
@@ -284,37 +278,42 @@ void *__gcov_indirect_call_topn_callee ATTRIBUTE_HIDDEN;
 #define VTABLE_USES_DESCRIPTORS 0
 #endif
 
-/* This fucntion is instrumented at function entry to track topn indirect
-   calls to CUR_FUNC.  */
+/* This function is instrumented at function entry to track topn indirect
+   calls to CUR_FUNC.  As multiple threads can run concurrently,
+   we have to carefull about __gcov_indirect_call_topn_callee and
+   __gcov_indirect_call_topn_counters global variables.  It is possible
+   that a data race will confuse indirect call counter, but it should
+   be relatively rare.  */
  
 void
 __gcov_indirect_call_topn_profiler (gcov_type value, void* cur_func)
 {
   void *callee_func = __gcov_indirect_call_topn_callee;
+  void *counters = __gcov_indirect_call_topn_counters;
+
   /* If the C++ virtual tables contain function descriptors then one
      function may have multiple descriptors and we need to dereference
      the descriptors to see if they point to the same function.  */
-  if (cur_func == callee_func
-      || (VTABLE_USES_DESCRIPTORS && callee_func
-	  && *(void **) cur_func == *(void **) callee_func))
-    __gcov_topn_value_profiler_body (__gcov_indirect_call_topn_counters, value);
+  if ((cur_func == callee_func
+       || (VTABLE_USES_DESCRIPTORS && callee_func
+	   && *(void **) cur_func == *(void **) callee_func))
+      && counters != NULL)
+    __gcov_topn_value_profiler_body (counters, value);
+
+  __gcov_indirect_call_topn_callee = NULL;
+  __gcov_indirect_call_topn_counters = NULL;
 }
 #endif
 
 #ifdef L_gcov_indirect_call_profiler_v2
 
-/* These two variables are used to actually track caller and callee.  Keep
-   them in TLS memory so races are not common (they are written to often).
+/* These two variables are used to actually track caller and callee.
+   We do not use TLS variables because they don't work properly
+   with -Wl,--dynamic-list-data.
    The variables are set directly by GCC instrumented code, so declaration
    here must match one in tree-profile.c  */
 
-#if defined(HAVE_CC_TLS) && !defined (USE_EMUTLS)
-__thread
-#endif
 void * __gcov_indirect_call_callee;
-#if defined(HAVE_CC_TLS) && !defined (USE_EMUTLS)
-__thread
-#endif
 gcov_type * __gcov_indirect_call_counters;
 
 /* By default, the C++ compiler will use function addresses in the
@@ -323,21 +322,32 @@ gcov_type * __gcov_indirect_call_counters;
    of this macro says how many words wide the descriptor is (normally 2).
 
    It is assumed that the address of a function descriptor may be treated
-   as a pointer to a function.  */
+   as a pointer to a function.
+
+   As multiple threads can run concurrently,
+   we have to carefull about __gcov_indirect_call_callee and
+   __gcov_indirect_call_counters global variables.  It is possible
+   that a data race will confuse indirect call counter, but it should
+   be relatively rare.  */
 
 /* Tries to determine the most common value among its inputs. */
 void
 __gcov_indirect_call_profiler_v2 (gcov_type value, void* cur_func)
 {
+  void *callee = __gcov_indirect_call_callee;
+  void *counters = __gcov_indirect_call_counters;
+
   /* If the C++ virtual tables contain function descriptors then one
      function may have multiple descriptors and we need to dereference
      the descriptors to see if they point to the same function.  */
-  if (cur_func == __gcov_indirect_call_callee
-      || (__LIBGCC_VTABLE_USES_DESCRIPTORS__
-          && *(void **) cur_func == *(void **) __gcov_indirect_call_callee))
-    __gcov_one_value_profiler_body (__gcov_indirect_call_counters, value, 0);
+  if ((cur_func == callee
+       || (__LIBGCC_VTABLE_USES_DESCRIPTORS__
+	   && *(void **) cur_func == *(void **) callee))
+      && counters != NULL)
+    __gcov_one_value_profiler_body (counters, value, 0);
 
   __gcov_indirect_call_callee = NULL;
+  __gcov_indirect_call_counters = NULL;
 }
 #endif
 
-- 
2.18.0

Reply via email to