The branch, 1.2.40 has been updated
       via  a17316ab3a5ee3cd748b26863c55e82937edf013 (commit)
       via  dc2bf2f7d27913f29f5b6c92e41c6b59fa31d163 (commit)
      from  7c4998cad4d3debb06f62108fb099b6427310419 (commit)

http://gitweb.samba.org/?p=ctdb.git;a=shortlog;h=1.2.40


- Log -----------------------------------------------------------------
commit a17316ab3a5ee3cd748b26863c55e82937edf013
Author: Amitay Isaacs <ami...@gmail.com>
Date:   Wed Sep 12 15:02:30 2012 +1000

    New version 1.2.50
    
    Signed-off-by: Amitay Isaacs <ami...@gmail.com>

commit dc2bf2f7d27913f29f5b6c92e41c6b59fa31d163
Author: Martin Schwenke <mar...@meltin.net>
Date:   Thu Sep 6 20:22:38 2012 +1000

    common: Debug ctdb_addr_to_str() using new function ctdb_external_trace()
    
    We've seen this function report "Unknown family, 0" and then CTDB
    disappeared without a trace.  If we can reproduce it then this might
    help us to debug it.
    
    The idea is that you do something like the following in /etc/sysconfig/ctdb:
    
      export CTDB_EXTERNAL_TRACE="/etc/ctdb/config/gcore_trace.sh"
    
    When we hit this error than we call out to gcore to get a core file so
    we can do forensics.  This might block CTDB for a few seconds.
    
    Signed-off-by: Martin Schwenke <mar...@meltin.net>

-----------------------------------------------------------------------

Summary of changes:
 Makefile.in                |    1 +
 common/ctdb_util.c         |   25 +++++++++++++++++++++++++
 config/gcore_trace.sh      |    3 +++
 include/ctdb_private.h     |    1 +
 packaging/RPM/ctdb.spec.in |    5 ++++-
 5 files changed, 34 insertions(+), 1 deletions(-)
 create mode 100755 config/gcore_trace.sh


Changeset truncated at 500 lines:

diff --git a/Makefile.in b/Makefile.in
index c97a9ca..e3d6d5b 100755
--- a/Makefile.in
+++ b/Makefile.in
@@ -291,6 +291,7 @@ install: all
        if [ -f doc/ltdbtool.1 ]; then ${INSTALLCMD} -m 644 doc/ltdbtool.1 
$(DESTDIR)$(mandir)/man1; fi
        if [ ! -f $(DESTDIR)$(etcdir)/ctdb/notify.sh ];then ${INSTALLCMD} -m 
755 config/notify.sh $(DESTDIR)$(etcdir)/ctdb; fi
        if [ ! -f $(DESTDIR)$(etcdir)/ctdb/ctdb-crash-cleanup.sh ];then 
${INSTALLCMD} -m 755 config/ctdb-crash-cleanup.sh $(DESTDIR)$(etcdir)/ctdb; fi
+       if [ ! -f $(DESTDIR)$(etcdir)/ctdb/gcore_trace.sh ];then ${INSTALLCMD} 
-m 755 config/gcore_trace.sh $(DESTDIR)$(etcdir)/ctdb; fi
 
 test: all
        tests/run_tests.sh
diff --git a/common/ctdb_util.c b/common/ctdb_util.c
index 061c16d..bb212f5 100644
--- a/common/ctdb_util.c
+++ b/common/ctdb_util.c
@@ -60,6 +60,30 @@ void ctdb_fatal(struct ctdb_context *ctdb, const char *msg)
        abort();
 }
 
+/* Invoke an external program to do some sort of tracing on the CTDB
+ * process.  This might block for a little while.  The external
+ * program is specified by the environment variable
+ * CTDB_EXTERNAL_TRACE.  This program should take one argument: the
+ * pid of the process to trace.  Commonly, the program would be a
+ * wrapper script around gcore.
+ */
+void ctdb_external_trace(void)
+{
+
+       const char * t = getenv("CTDB_EXTERNAL_TRACE");
+       char * cmd;
+
+       if (t == NULL) {
+               return;
+       }
+
+       cmd = talloc_asprintf(NULL, "%s %lu", t, (unsigned long) getpid());
+       DEBUG(DEBUG_WARNING,("begin external trace: %s\n", cmd));
+       system(cmd);
+       DEBUG(DEBUG_WARNING,("end external trace: %s\n", cmd));
+       talloc_free(cmd);
+}
+
 /*
   parse a IP:port pair
 */
@@ -574,6 +598,7 @@ char *ctdb_addr_to_str(ctdb_sock_addr *addr)
                break;
        default:
                DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family %u\n", 
addr->sa.sa_family));
+               ctdb_external_trace();
        }
 
        return cip;
diff --git a/config/gcore_trace.sh b/config/gcore_trace.sh
new file mode 100755
index 0000000..4d3e1d1
--- /dev/null
+++ b/config/gcore_trace.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+gcore -o "/var/log/core" "$1" 2>&1 | logger -t "ctdb:gcore_trace"
diff --git a/include/ctdb_private.h b/include/ctdb_private.h
index 3a5d3cf..b5bd45c 100644
--- a/include/ctdb_private.h
+++ b/include/ctdb_private.h
@@ -655,6 +655,7 @@ struct ctdb_fetch_handle {
 /* internal prototypes */
 void ctdb_set_error(struct ctdb_context *ctdb, const char *fmt, ...) 
PRINTF_ATTRIBUTE(2,3);
 void ctdb_fatal(struct ctdb_context *ctdb, const char *msg);
+void ctdb_external_trace(void);
 bool ctdb_same_address(struct ctdb_address *a1, struct ctdb_address *a2);
 int ctdb_parse_address(struct ctdb_context *ctdb,
                       TALLOC_CTX *mem_ctx, const char *str,
diff --git a/packaging/RPM/ctdb.spec.in b/packaging/RPM/ctdb.spec.in
index 2d744b2..2548eba 100644
--- a/packaging/RPM/ctdb.spec.in
+++ b/packaging/RPM/ctdb.spec.in
@@ -3,7 +3,7 @@ Name: ctdb
 Summary: Clustered TDB
 Vendor: Samba Team
 Packager: Samba Team <sa...@samba.org>
-Version: 1.2.49
+Version: 1.2.50
 Release: 1GITHASH
 Epoch: 0
 License: GNU GPL version 3
@@ -89,6 +89,7 @@ rm -rf $RPM_BUILD_ROOT
 %config(noreplace) %{_sysconfdir}/sysconfig/ctdb
 %config(noreplace) %{_sysconfdir}/ctdb/notify.sh
 %config(noreplace) %{_sysconfdir}/ctdb/ctdb-crash-cleanup.sh
+%config(noreplace) %{_sysconfdir}/ctdb/gcore_trace.sh
 %config(noreplace) %{_sysconfdir}/ctdb/functions
 %attr(755,root,root) %{initdir}/ctdb
 
@@ -145,6 +146,8 @@ development libraries for ctdb
 
 %changelog
 
+* Wed Sep 12 2012 : Version 1.2.50
+  - Add utility function to dump core and use it for "unknown family, 0" error
 * Tue Aug 21 2012 : Version 1.2.49
  - logging: Close unix socket /tmp/ctdb.socket in syslogd process
  - Initscript: Kill any existing ctdbd processes if the ping succeeds


-- 
CTDB repository

Reply via email to