Greetings,

the following patch (which is never meant to be committed, and is therefore
very ugly in purpose) is a proof of concept for an alternative to the recent
problematic feature proposal of returning failure status for gmond and that
is part of 3.1.3^H4^H5.

it has been tested on Linux amd64 and OpenBSD amd64 and applies to trunk
(includes reverting r2025 for simplicity).

it replaces apr_proc_detach with an inline implementation of it on plain
POSIX and that should be most likely as portable (at least for the platforms
we care of) and doesn't intentionally include any error checking to make it
obvious functionality wise and has been implemented by brute force search
and replace and therefore is definitely missing several other interesting
failure paths.

Carlo

---
Index: lib/error_msg.c
===================================================================
--- lib/error_msg.c	(revision 2133)
+++ lib/error_msg.c	(working copy)
@@ -21,6 +21,7 @@
 int daemon_proc;    /* set nonzero by daemon_init() */
 
 int ganglia_quiet_errors = 0;
+int gmond_status = 0;
 
 static void err_doit (int, int, const char *, va_list);
 
@@ -121,7 +122,8 @@
    va_start (ap, fmt);
    err_doit (0, LOG_ERR, fmt, ap);
    va_end (ap);
-   exit (1);
+   gmond_status = 1;
+   exit (gmond_status);
 }
 
 /* Print a message and return to caller.
Index: gmond/gmond.c
===================================================================
--- gmond/gmond.c	(revision 2133)
+++ gmond/gmond.c	(working copy)
@@ -84,6 +84,9 @@
 /* The directory where DSO modules are located */
 char *module_dir = NULL;
 
+static int pipefd[2];
+extern int gmond_status;
+
 /* The array for outgoing UDP message channels */
 Ganglia_udp_send_channels udp_send_channels = NULL;
 
@@ -214,6 +217,13 @@
 char **gmond_argv;
 extern char **environ;
 
+void gmond_terminate()
+{
+  if (daemon_proc) {
+    write(pipefd[1], &gmond_status, sizeof(gmond_status));
+  }
+}
+
 /* apr_socket_send can't assure all characters in buf been sent. */
 static apr_status_t
 socket_send(apr_socket_t *sock, const char *buf, apr_size_t *len)
@@ -263,7 +273,8 @@
   exit(0);
 #endif
   err_msg("execve failed to reload %s: %s", gmond_bin, strerror(errno));
-  exit(1);
+  gmond_status = 1;
+  exit(gmond_status);
 }
 
 /* this is just a temporary function */
@@ -317,9 +328,25 @@
   if(!args_info.foreground_flag && should_daemonize && !debug_level)
     {
       char *cwd;
+      pid_t cpid;
 
       apr_filepath_get(&cwd, 0, global_context);
-      apr_proc_detach(1);
+      pipe(pipefd);
+      cpid = fork();
+      if (cpid > 0) {
+          close(pipefd[1]);
+          read(pipefd[0], &gmond_status, sizeof(gmond_status));
+          close(pipefd[0]);
+          _exit(gmond_status);
+      }
+      atexit(gmond_terminate);
+      close(pipefd[0]);
+      chdir("/");
+      setsid();
+      setpgid(0, 0);
+      freopen("/dev/null", "r", stdin);
+      freopen("/dev/null", "w", stdout);
+      freopen("/dev/null", "w", stderr);
       apr_filepath_set(cwd, global_context);
 
       /* enable errmsg logging to syslog */
@@ -359,7 +386,8 @@
   if(deaf && mute)
     {
       err_msg("Configured to run both deaf and mute. Nothing to do. Exiting.\n");
-      exit(1);
+      gmond_status = 1;
+      exit(gmond_status);
     }
 }
 
@@ -404,7 +432,8 @@
   if(!acl)
     {
       err_msg("Unable to allocate memory for ACL. Exiting.\n");
-      exit(1);
+      gmond_status = 1;
+      exit(gmond_status);
     }
 
   default_action = cfg_getstr( acl_config, "default");
@@ -419,7 +448,8 @@
   else
     {
       err_msg("Invalid default ACL '%s'. Exiting.\n", default_action);
-      exit(1);
+      gmond_status = 1;
+      exit(gmond_status);
     }
 
   /* Create an array to hold each of the access instructions */
@@ -427,7 +457,8 @@
   if(!acl->access_array)
     {
       err_msg("Unable to malloc access array. Exiting.\n");
-      exit(1);
+      gmond_status = 1;
+      exit(gmond_status);
     }
   for(i=0; i< num_access; i++)
     {
@@ -440,7 +471,8 @@
           /* This shouldn't happen unless maybe acl is empty and
            * the safest thing to do it exit */
           err_msg("Unable to process ACLs. Exiting.\n");
-          exit(1);
+          gmond_status = 1;
+          exit(gmond_status);
         }
 
       ip     = cfg_getstr( access_config, "ip");
@@ -449,7 +481,8 @@
       if(!ip && !mask && !action)
         {
           err_msg("An access record requires an ip, mask and action. Exiting.\n");
-          exit(1);
+          gmond_status = 1;
+          exit(gmond_status);
         }
 
       /* Process the action first */
@@ -464,7 +497,8 @@
       else
         {
           err_msg("ACL access entry has action '%s'. Must be deny|allow. Exiting.\n", action);
-          exit(1);
+          gmond_status = 1;
+          exit(gmond_status);
         }  
 
       /* Create the subnet */
@@ -473,7 +507,8 @@
       if(status != APR_SUCCESS)
         {
           err_msg("ACL access entry has invalid ip('%s')/mask('%s'). Exiting.\n", ip, mask);
-          exit(1);
+          gmond_status = 1;
+          exit(gmond_status);
         }
 
       /* Save this access entry to the acl */
@@ -525,12 +560,14 @@
       return APR_INET6;
 #else
       err_msg("IPv6 is not supported on this host. Exiting.\n");
-      exit(1);
+      gmond_status = 1;
+      exit(gmond_status);
 #endif
     }
 
   err_msg("Unknown family '%s'. Try inet4|inet6. Exiting.\n", family);
-  exit(1);
+  gmond_status = 1;
+  exit(gmond_status);
   /* shouldn't get here */
   return APR_UNSPEC;
 }
@@ -561,7 +598,8 @@
           char apr_err[512];
           apr_strerror(status, apr_err, 511);
           err_msg("apr_pollset_create failed: %s", apr_err);
-          exit(1);
+          gmond_status = 1;
+          exit(gmond_status);
         }
     }
 
@@ -572,7 +610,8 @@
           char apr_err[512];
           apr_strerror(status, apr_err, 511);
           err_msg("apr_pcalloc failed: %s", apr_err);
-          exit(1);
+          gmond_status = 1;
+          exit(gmond_status);
         }
     }
 
@@ -617,7 +656,8 @@
             {
               err_msg("Error creating multicast server mcast_join=%s port=%d mcast_if=%s family='%s'. Exiting.\n",
                   mcast_join? mcast_join: "NULL", port, mcast_if? mcast_if:"NULL",family);
-              exit(1);
+              gmond_status = 1;
+              exit(gmond_status);
             }
         }
       else
@@ -632,7 +672,8 @@
             {
               err_msg("Error creating UDP server on port %d bind=%s. Exiting.\n",
                   port, bindaddr? bindaddr: "unspecified");
-              exit(1);
+              gmond_status = 1;
+              exit(gmond_status);
             }
         }
 
@@ -647,7 +688,8 @@
       if(!channel)
         {
           err_msg("Unable to malloc memory for channel.  Exiting. \n");
-          exit(1);
+          gmond_status = 1;
+          exit(gmond_status);
         }
 
       /* Mark this channel as a udp_recv_channel */
@@ -668,7 +710,8 @@
       if(status != APR_SUCCESS)
         {
           err_msg("Failed to add socket to pollset. Exiting.\n");
-          exit(1);
+          gmond_status = 1;
+          exit(gmond_status);
         }
     }
 
@@ -677,7 +720,8 @@
         char apr_err[512];
         apr_strerror(status, apr_err, 511);
         err_msg("apr_pcalloc failed: %s", apr_err);
-        exit(1);
+        gmond_status = 1;
+        exit(gmond_status);
       }
 
   /* Process all the tcp_accept_channels */ 
@@ -711,7 +755,8 @@
       if(!socket)
         {
           err_msg("Unable to create tcp_accept_channel. Exiting.\n");
-          exit(1);
+          gmond_status = 1;
+          exit(gmond_status);
         }
 
       tcp_sockets[i] = socket;
@@ -725,7 +770,8 @@
       if(!channel)
         {
           err_msg("Unable to malloc data for channel. Exiting.\n");
-          exit(1);
+          gmond_status = 1;
+          exit(gmond_status);
         }
       
       channel->type = TCP_ACCEPT_CHANNEL;
@@ -744,7 +790,8 @@
       if(status != APR_SUCCESS)
          {
             err_msg("Failed to add socket to pollset. Exiting.\n");
-            exit(1);
+            gmond_status = 1;
+            exit(gmond_status);
          }
     }
 }
@@ -2150,7 +2197,8 @@
       if(!group)
         {
           err_msg("Unable to malloc memory for collection group. Exiting.\n");
-          exit(1);
+          gmond_status = 1;
+          exit(gmond_status);
         }
 
       group_conf  = cfg_getnsec( config_file, "collection_group", i);
@@ -2197,7 +2245,8 @@
               if((pcre_re = pcre_compile(name_match, 0, &pcre_err_ptr, &pcre_err_offset, NULL)) == NULL)
                 {
                   err_msg ("pcre_compile failed on %s\n", name_match);
-                  exit (1);
+                  gmond_status = 1;
+                  exit (gmond_status);
                 }
 
 
@@ -2205,7 +2254,8 @@
               if(apr_pool_create(&p, global_context) != APR_SUCCESS)
                 {
                   err_msg("pool creation failed\n");
-                  exit(1);
+                  gmond_status = 1;
+                  exit(gmond_status);
                 }
 
               for(hi = apr_hash_first(p, metric_callbacks);
@@ -2226,7 +2276,8 @@
                           default:
                             /* unexpected error */
                             err_msg ("unexpected pcre_exec error\n");
-                            exit (1);
+                            gmond_status = 1;
+                            exit (gmond_status);
                         }
                     }
                   else
@@ -2243,7 +2294,8 @@
                           if((ptrs = apr_pcalloc(p, strlen(title_tmpl) * sizeof(struct iovec))) == NULL)
                             {
                               err_msg("apr_pcalloc failed\n");
-                              exit(1);
+                              gmond_status = 1;
+                              exit(gmond_status);
                             }
                           for (i = 0; title_tmpl[i] != 0; i++)
                             {
@@ -2264,7 +2316,8 @@
                                       if(index < 1 || index > PCRE_MAX_SUBPATTERNS)
                                         {
                                           err_msg("title [%s] contains invalid reference to subpattern\n", title_tmpl);
-                                          exit(1);
+                                          gmond_status = 1;
+                                          exit(gmond_status);
                                         }
                                       pos1 = pcre_ovector[index * 2];
                                       pos2 = pcre_ovector[index * 2 + 1];
@@ -2778,8 +2831,10 @@
 
   gmond_argv = argv;
 
-  if (cmdline_parser (argc, argv, &args_info) != 0)
-      exit(1) ;
+  if (cmdline_parser (argc, argv, &args_info) != 0) {
+      gmond_status = 1;
+      exit(gmond_status);
+  }
 
   if(args_info.convert_given)
     {
@@ -2833,7 +2888,8 @@
       host_location = args_info.location_arg;
     }
 
-  
+  daemonize_if_necessary( argv );
+
   /* Collect my hostname */
   apr_gethostname( myname, APRMAXHOSTLEN+1, global_context);
 
@@ -2882,16 +2938,9 @@
   /* Initialize time variables */
   udp_last_heard = last_cleanup = next_collection = now = apr_time_now();
 
-  /* Daemonizing is one of the last things we do - if any of the preceding
-     steps have failed, the foreground phase has exited with a non-zero
-     status and the caller is hopefully aware that remediation is required.
-     Any non-zero return status after daemonizing is not passed to the caller,
-     as the caller will receive a return status of 0 at the moment we daemonize. */
-  daemonize_if_necessary( argv );
-  if (args_info.pid_file_given)
-    {
-      update_pidfile (args_info.pid_file_arg);
-    }
+  /* notify success to parent */
+  write(pipefd[1], &gmond_status, sizeof(gmond_status));
+  close(pipefd[1]);
 
   /* Loop */
   for(;!done;)
------------------------------------------------------------------------------
Join us December 9, 2009 for the Red Hat Virtual Experience,
a free event focused on virtualization and cloud computing. 
Attend in-depth sessions from your desk. Your couch. Anywhere.
http://p.sf.net/sfu/redhat-sfdev2dev
_______________________________________________
Ganglia-developers mailing list
Ganglia-developers@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/ganglia-developers

Reply via email to