I've posted versions of this patch before, but muddled up with other changes. Hopefully this version is sufficiently self-contained to be understandable.

When a dynamic object is unregistered, we unload the offloaded routines it registed. We walk the splay tree mapping host-side functions and vars to device-side versions. Unfortunately we do this after having destroyed the splay tree itself. thus we never find anything, and this ends up leaking memory (the vector containing the mappings inserted into the tree).

this patch breaks out a new function, gomp_unload_image_from_device, which IHO makes the code more understandable in its own right. But we also arrange to call this function before destroying the splay tree.

I rename gomp_offload_image_to_device to gomp_load_image_to_device for consistency -- gomp_offload_unload_image_from_device seemed a bit of a mouthful.

ok for trunk?

nathan
2015-07-20  Nathan Sidwell  <nat...@codesourcery.com>

	libgomp/
	* target.c (gomp_offload_image_to_device): Rename to ...
	(gomp_load_image_to_device): ... here.
	(GOMP_offload_register): Adjust call.
	(gomp_init_device): Likewise.
	(gomp_unload_image_from_device): New.  Broken out of ...
	(GOMP_offload_unregister): ... here.  Call it.
	(gomp_unload_device): New.
	* libgomp.h (gomp_unload_device): Declare.
	* oacc-init.c (acc_shutdown_1): Unload from device before deleting
	mem maps.

	gcc/
	* config/nvptx/mkoffload.c (process): Add destructor call.

Index: gcc/config/nvptx/mkoffload.c
===================================================================
--- gcc/config/nvptx/mkoffload.c	(revision 226013)
+++ gcc/config/nvptx/mkoffload.c	(working copy)
@@ -880,18 +880,29 @@ process (FILE *in, FILE *out)
   fprintf (out, "#ifdef __cplusplus\n"
 	   "extern \"C\" {\n"
 	   "#endif\n");
+
   fprintf (out, "extern void GOMP_offload_register"
 	   " (const void *, int, const void *);\n");
+  fprintf (out, "extern void GOMP_offload_unregister"
+	   " (const void *, int, const void *);\n");
+
   fprintf (out, "#ifdef __cplusplus\n"
 	   "}\n"
 	   "#endif\n");
 
   fprintf (out, "extern const void *const __OFFLOAD_TABLE__[];\n\n");
-  fprintf (out, "static __attribute__((constructor)) void init (void)\n{\n");
-  fprintf (out, "  GOMP_offload_register (__OFFLOAD_TABLE__, %d,\n",
-	   GOMP_DEVICE_NVIDIA_PTX);
-  fprintf (out, "                         &target_data);\n");
-  fprintf (out, "};\n");
+
+  fprintf (out, "static __attribute__((constructor)) void init (void)\n"
+	   "{\n"
+	   "  GOMP_offload_register (__OFFLOAD_TABLE__, %d/*NVIDIA_PTX*/,\n"
+	   "                         &target_data);\n"
+	   "};\n", GOMP_DEVICE_NVIDIA_PTX);
+
+  fprintf (out, "static __attribute__((destructor)) void fini (void)\n"
+	   "{\n"
+	   "  GOMP_offload_unregister (__OFFLOAD_TABLE__, %d/*NVIDIA_PTX*/,\n"
+	   "                           &target_data);\n"
+	   "};\n", GOMP_DEVICE_NVIDIA_PTX);
 }
 
 static void
Index: libgomp/libgomp.h
===================================================================
--- libgomp/libgomp.h	(revision 226013)
+++ libgomp/libgomp.h	(working copy)
@@ -782,6 +782,7 @@ extern void gomp_unmap_vars (struct targ
 extern void gomp_init_device (struct gomp_device_descr *);
 extern void gomp_free_memmap (struct splay_tree_s *);
 extern void gomp_fini_device (struct gomp_device_descr *);
+extern void gomp_unload_device (struct gomp_device_descr *);
 
 /* work.c */
 
Index: libgomp/oacc-init.c
===================================================================
--- libgomp/oacc-init.c	(revision 226013)
+++ libgomp/oacc-init.c	(working copy)
@@ -252,6 +252,18 @@ acc_shutdown_1 (acc_device_t d)
   /* Get the base device for this device type.  */
   base_dev = resolve_device (d, true);
 
+  ndevs = base_dev->get_num_devices_func ();
+
+  /* Unload all the devices of this type that have been opened.  */
+  for (i = 0; i < ndevs; i++)
+    {
+      struct gomp_device_descr *acc_dev = &base_dev[i];
+
+      gomp_mutex_lock (&acc_dev->lock);
+      gomp_unload_device (acc_dev);
+      gomp_mutex_unlock (&acc_dev->lock);
+    }
+  
   gomp_mutex_lock (&goacc_thread_lock);
 
   /* Free target-specific TLS data and close all devices.  */
@@ -290,7 +302,6 @@ acc_shutdown_1 (acc_device_t d)
 
   gomp_mutex_unlock (&goacc_thread_lock);
 
-  ndevs = base_dev->get_num_devices_func ();
 
   /* Close all the devices of this type that have been opened.  */
   for (i = 0; i < ndevs; i++)
Index: libgomp/target.c
===================================================================
--- libgomp/target.c	(revision 226013)
+++ libgomp/target.c	(working copy)
@@ -638,12 +638,13 @@ gomp_update (struct gomp_device_descr *d
 
 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
    And insert to splay tree the mapping between addresses from HOST_TABLE and
-   from loaded target image.  */
+   from loaded target image.  We rely in the host and device compiler
+   emitting variable and functions in the same order.  */
 
 static void
-gomp_offload_image_to_device (struct gomp_device_descr *devicep,
-			      const void *host_table, const void *target_data,
-			      bool is_register_lock)
+gomp_load_image_to_device (struct gomp_device_descr *devicep,
+			   const void *host_table, const void *target_data,
+			   bool is_register_lock)
 {
   void **host_func_table = ((void ***) host_table)[0];
   void **host_funcs_end  = ((void ***) host_table)[1];
@@ -658,7 +659,8 @@ gomp_offload_image_to_device (struct gom
   /* Load image to device and get target addresses for the image.  */
   struct addr_pair *target_table = NULL;
   int i, num_target_entries
-    = devicep->load_image_func (devicep->target_id, target_data, &target_table);
+    = devicep->load_image_func (devicep->target_id, target_data,
+				&target_table);
 
   if (num_target_entries != num_funcs + num_vars)
     {
@@ -725,6 +727,59 @@ gomp_offload_image_to_device (struct gom
   free (target_table);
 }
 
+/* Unload the mappings described by target_data from device DEVICE_P.
+   The device must be locked.   */
+
+static void
+gomp_unload_image_from_device (struct gomp_device_descr *devicep,
+			       const void *host_table, const void *target_data)
+{
+  void **host_func_table = ((void ***) host_table)[0];
+  void **host_funcs_end  = ((void ***) host_table)[1];
+  void **host_var_table  = ((void ***) host_table)[2];
+  void **host_vars_end   = ((void ***) host_table)[3];
+
+  /* The func table contains only addresses, the var table contains addresses
+     and corresponding sizes.  */
+  int num_funcs = host_funcs_end - host_func_table;
+  int num_vars  = (host_vars_end - host_var_table) / 2;
+
+  unsigned j;
+  struct splay_tree_key_s k;
+  splay_tree_key node = NULL;
+
+  /* Find mapping at start of node array */
+  if (num_funcs || num_vars)
+    {
+      k.host_start = num_funcs ? (uintptr_t) host_func_table[0] : (uintptr_t) host_var_table[0];
+      k.host_end = k.host_start + 1;
+      node = splay_tree_lookup (&devicep->mem_map, &k);
+    }
+  
+  devicep->unload_image_func (devicep->target_id, target_data);
+
+  /* Remove mappings from splay tree.  */
+  for (j = 0; j < num_funcs; j++)
+    {
+      k.host_start = (uintptr_t) host_func_table[j];
+      k.host_end = k.host_start + 1;
+      splay_tree_remove (&devicep->mem_map, &k);
+    }
+
+  for (j = 0; j < num_vars; j++)
+    {
+      k.host_start = (uintptr_t) host_var_table[j * 2];
+      k.host_end = k.host_start + (uintptr_t) host_var_table[j * 2 + 1];
+      splay_tree_remove (&devicep->mem_map, &k);
+    }
+
+  if (node)
+    {
+      free (node->tgt);
+      free (node);
+    }
+}
+
 /* This function should be called from every offload image while loading.
    It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
    the target, and TARGET_DATA needed by target plugin.  */
@@ -742,7 +797,7 @@ GOMP_offload_register (const void *host_
       struct gomp_device_descr *devicep = &devices[i];
       gomp_mutex_lock (&devicep->lock);
       if (devicep->type == target_type && devicep->is_initialized)
-	gomp_offload_image_to_device (devicep, host_table, target_data, true);
+	gomp_load_image_to_device (devicep, host_table, target_data, true);
       gomp_mutex_unlock (&devicep->lock);
     }
 
@@ -767,69 +822,17 @@ void
 GOMP_offload_unregister (const void *host_table, int target_type,
 			 const void *target_data)
 {
-  void **host_func_table = ((void ***) host_table)[0];
-  void **host_funcs_end  = ((void ***) host_table)[1];
-  void **host_var_table  = ((void ***) host_table)[2];
-  void **host_vars_end   = ((void ***) host_table)[3];
   int i;
 
-  /* The func table contains only addresses, the var table contains addresses
-     and corresponding sizes.  */
-  int num_funcs = host_funcs_end - host_func_table;
-  int num_vars  = (host_vars_end - host_var_table) / 2;
-
   gomp_mutex_lock (&register_lock);
 
   /* Unload image from all initialized devices.  */
   for (i = 0; i < num_devices; i++)
     {
-      int j;
       struct gomp_device_descr *devicep = &devices[i];
       gomp_mutex_lock (&devicep->lock);
-      if (devicep->type != target_type || !devicep->is_initialized)
-	{
-	  gomp_mutex_unlock (&devicep->lock);
-	  continue;
-	}
-
-      devicep->unload_image_func (devicep->target_id, target_data);
-
-      /* Remove mapping from splay tree.  */
-      struct splay_tree_key_s k;
-      splay_tree_key node = NULL;
-      if (num_funcs > 0)
-	{
-	  k.host_start = (uintptr_t) host_func_table[0];
-	  k.host_end = k.host_start + 1;
-	  node = splay_tree_lookup (&devicep->mem_map, &k);
-	}
-      else if (num_vars > 0)
-	{
-	  k.host_start = (uintptr_t) host_var_table[0];
-	  k.host_end = k.host_start + (uintptr_t) host_var_table[1];
-	  node = splay_tree_lookup (&devicep->mem_map, &k);
-	}
-
-      for (j = 0; j < num_funcs; j++)
-	{
-	  k.host_start = (uintptr_t) host_func_table[j];
-	  k.host_end = k.host_start + 1;
-	  splay_tree_remove (&devicep->mem_map, &k);
-	}
-
-      for (j = 0; j < num_vars; j++)
-	{
-	  k.host_start = (uintptr_t) host_var_table[j * 2];
-	  k.host_end = k.host_start + (uintptr_t) host_var_table[j * 2 + 1];
-	  splay_tree_remove (&devicep->mem_map, &k);
-	}
-
-      if (node)
-	{
-	  free (node->tgt);
-	  free (node);
-	}
-
+      if (devicep->type == target_type && devicep->is_initialized)
+	gomp_unload_image_from_device(devicep, host_table, target_data);
       gomp_mutex_unlock (&devicep->lock);
     }
 
@@ -858,13 +861,31 @@ gomp_init_device (struct gomp_device_des
     {
       struct offload_image_descr *image = &offload_images[i];
       if (image->type == devicep->type)
-	gomp_offload_image_to_device (devicep, image->host_table,
-				      image->target_data, false);
+	gomp_load_image_to_device (devicep, image->host_table,
+				   image->target_data, false);
     }
 
   devicep->is_initialized = true;
 }
 
+attribute_hidden void
+gomp_unload_device (struct gomp_device_descr *devicep)
+{
+  if (devicep->is_initialized)
+    {
+      unsigned i;
+      
+      /* Unload from device all images registered at the moment.  */
+      for (i = 0; i < num_offload_images; i++)
+	{
+	  struct offload_image_descr *image = &offload_images[i];
+	  if (image->type == devicep->type)
+	    gomp_unload_image_from_device (devicep, image->host_table,
+					   image->target_data);
+	}
+    }
+}
+
 /* Free address mapping tables.  MM must be locked on entry, and remains locked
    on return.  */
 

Reply via email to