The branch, 1.2.40 has been updated via a17316ab3a5ee3cd748b26863c55e82937edf013 (commit) via dc2bf2f7d27913f29f5b6c92e41c6b59fa31d163 (commit) from 7c4998cad4d3debb06f62108fb099b6427310419 (commit)
http://gitweb.samba.org/?p=ctdb.git;a=shortlog;h=1.2.40 - Log ----------------------------------------------------------------- commit a17316ab3a5ee3cd748b26863c55e82937edf013 Author: Amitay Isaacs <ami...@gmail.com> Date: Wed Sep 12 15:02:30 2012 +1000 New version 1.2.50 Signed-off-by: Amitay Isaacs <ami...@gmail.com> commit dc2bf2f7d27913f29f5b6c92e41c6b59fa31d163 Author: Martin Schwenke <mar...@meltin.net> Date: Thu Sep 6 20:22:38 2012 +1000 common: Debug ctdb_addr_to_str() using new function ctdb_external_trace() We've seen this function report "Unknown family, 0" and then CTDB disappeared without a trace. If we can reproduce it then this might help us to debug it. The idea is that you do something like the following in /etc/sysconfig/ctdb: export CTDB_EXTERNAL_TRACE="/etc/ctdb/config/gcore_trace.sh" When we hit this error than we call out to gcore to get a core file so we can do forensics. This might block CTDB for a few seconds. Signed-off-by: Martin Schwenke <mar...@meltin.net> ----------------------------------------------------------------------- Summary of changes: Makefile.in | 1 + common/ctdb_util.c | 25 +++++++++++++++++++++++++ config/gcore_trace.sh | 3 +++ include/ctdb_private.h | 1 + packaging/RPM/ctdb.spec.in | 5 ++++- 5 files changed, 34 insertions(+), 1 deletions(-) create mode 100755 config/gcore_trace.sh Changeset truncated at 500 lines: diff --git a/Makefile.in b/Makefile.in index c97a9ca..e3d6d5b 100755 --- a/Makefile.in +++ b/Makefile.in @@ -291,6 +291,7 @@ install: all if [ -f doc/ltdbtool.1 ]; then ${INSTALLCMD} -m 644 doc/ltdbtool.1 $(DESTDIR)$(mandir)/man1; fi if [ ! -f $(DESTDIR)$(etcdir)/ctdb/notify.sh ];then ${INSTALLCMD} -m 755 config/notify.sh $(DESTDIR)$(etcdir)/ctdb; fi if [ ! -f $(DESTDIR)$(etcdir)/ctdb/ctdb-crash-cleanup.sh ];then ${INSTALLCMD} -m 755 config/ctdb-crash-cleanup.sh $(DESTDIR)$(etcdir)/ctdb; fi + if [ ! -f $(DESTDIR)$(etcdir)/ctdb/gcore_trace.sh ];then ${INSTALLCMD} -m 755 config/gcore_trace.sh $(DESTDIR)$(etcdir)/ctdb; fi test: all tests/run_tests.sh diff --git a/common/ctdb_util.c b/common/ctdb_util.c index 061c16d..bb212f5 100644 --- a/common/ctdb_util.c +++ b/common/ctdb_util.c @@ -60,6 +60,30 @@ void ctdb_fatal(struct ctdb_context *ctdb, const char *msg) abort(); } +/* Invoke an external program to do some sort of tracing on the CTDB + * process. This might block for a little while. The external + * program is specified by the environment variable + * CTDB_EXTERNAL_TRACE. This program should take one argument: the + * pid of the process to trace. Commonly, the program would be a + * wrapper script around gcore. + */ +void ctdb_external_trace(void) +{ + + const char * t = getenv("CTDB_EXTERNAL_TRACE"); + char * cmd; + + if (t == NULL) { + return; + } + + cmd = talloc_asprintf(NULL, "%s %lu", t, (unsigned long) getpid()); + DEBUG(DEBUG_WARNING,("begin external trace: %s\n", cmd)); + system(cmd); + DEBUG(DEBUG_WARNING,("end external trace: %s\n", cmd)); + talloc_free(cmd); +} + /* parse a IP:port pair */ @@ -574,6 +598,7 @@ char *ctdb_addr_to_str(ctdb_sock_addr *addr) break; default: DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family %u\n", addr->sa.sa_family)); + ctdb_external_trace(); } return cip; diff --git a/config/gcore_trace.sh b/config/gcore_trace.sh new file mode 100755 index 0000000..4d3e1d1 --- /dev/null +++ b/config/gcore_trace.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +gcore -o "/var/log/core" "$1" 2>&1 | logger -t "ctdb:gcore_trace" diff --git a/include/ctdb_private.h b/include/ctdb_private.h index 3a5d3cf..b5bd45c 100644 --- a/include/ctdb_private.h +++ b/include/ctdb_private.h @@ -655,6 +655,7 @@ struct ctdb_fetch_handle { /* internal prototypes */ void ctdb_set_error(struct ctdb_context *ctdb, const char *fmt, ...) PRINTF_ATTRIBUTE(2,3); void ctdb_fatal(struct ctdb_context *ctdb, const char *msg); +void ctdb_external_trace(void); bool ctdb_same_address(struct ctdb_address *a1, struct ctdb_address *a2); int ctdb_parse_address(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, const char *str, diff --git a/packaging/RPM/ctdb.spec.in b/packaging/RPM/ctdb.spec.in index 2d744b2..2548eba 100644 --- a/packaging/RPM/ctdb.spec.in +++ b/packaging/RPM/ctdb.spec.in @@ -3,7 +3,7 @@ Name: ctdb Summary: Clustered TDB Vendor: Samba Team Packager: Samba Team <sa...@samba.org> -Version: 1.2.49 +Version: 1.2.50 Release: 1GITHASH Epoch: 0 License: GNU GPL version 3 @@ -89,6 +89,7 @@ rm -rf $RPM_BUILD_ROOT %config(noreplace) %{_sysconfdir}/sysconfig/ctdb %config(noreplace) %{_sysconfdir}/ctdb/notify.sh %config(noreplace) %{_sysconfdir}/ctdb/ctdb-crash-cleanup.sh +%config(noreplace) %{_sysconfdir}/ctdb/gcore_trace.sh %config(noreplace) %{_sysconfdir}/ctdb/functions %attr(755,root,root) %{initdir}/ctdb @@ -145,6 +146,8 @@ development libraries for ctdb %changelog +* Wed Sep 12 2012 : Version 1.2.50 + - Add utility function to dump core and use it for "unknown family, 0" error * Tue Aug 21 2012 : Version 1.2.49 - logging: Close unix socket /tmp/ctdb.socket in syslogd process - Initscript: Kill any existing ctdbd processes if the ping succeeds -- CTDB repository