The branch, master has been updated via c0bb147ca09e82019b05ec22995623cffc3184e2 (commit) via 36de63843de10a1f2a9ccdbbee24cc1d08542984 (commit) via ea5576071b22e1877903ec0921d375626a23e13b (commit) via d8a76cf79f07dfb5a93c6c9a13f16e3268c7dd57 (commit) via d4e206fb818048b7fab4797c877b854bdbb1ab70 (commit) via 8753a094b97340deb26dd44f6ea345ca0a642a95 (commit) via 4a388fc6bf54636b7e1f6da8e6aa451cddd574f7 (commit) via 16fcff0d1993b7a0479341862ea44d10bd5c6d6d (commit) from 09940255011b119dc6af3304f5d3e9568e6006fd (commit)
http://gitweb.samba.org/?p=ctdb.git;a=shortlog;h=master - Log ----------------------------------------------------------------- commit c0bb147ca09e82019b05ec22995623cffc3184e2 Author: Martin Schwenke <mar...@meltin.net> Date: Mon Sep 9 16:16:24 2013 +1000 common: Make parse_ip() valgrind-clean Signed-off-by: Martin Schwenke <mar...@meltin.net> Pair-programmed-with: Amitay Isaacs <ami...@gmail.com> commit 36de63843de10a1f2a9ccdbbee24cc1d08542984 Author: Martin Schwenke <mar...@meltin.net> Date: Tue Aug 27 15:27:30 2013 +1000 recoverd: Remove an orphaned comment This should have been removed with the associated code in commit 14bd0b6961ef1294e9cba74ce875386b7dfbf446. Signed-off-by: Martin Schwenke <mar...@meltin.net> commit ea5576071b22e1877903ec0921d375626a23e13b Author: Martin Schwenke <mar...@meltin.net> Date: Tue Aug 27 15:24:17 2013 +1000 recoverd: Update a comment to use current terminology Signed-off-by: Martin Schwenke <mar...@meltin.net> commit d8a76cf79f07dfb5a93c6c9a13f16e3268c7dd57 Author: Martin Schwenke <mar...@meltin.net> Date: Tue Aug 27 15:16:51 2013 +1000 client: Remove unused function list_of_active_nodes_except_pnn() Signed-off-by: Martin Schwenke <mar...@meltin.net> commit d4e206fb818048b7fab4797c877b854bdbb1ab70 Author: Martin Schwenke <mar...@meltin.net> Date: Tue Aug 27 15:14:10 2013 +1000 tools/ctdb: list_of_active_nodes_except_pnn() -> list_of_nodes() list_of_active_nodes_except_pnn() is only used here and can be removed if we remove this call. Less is more... Signed-off-by: Martin Schwenke <mar...@meltin.net> commit 8753a094b97340deb26dd44f6ea345ca0a642a95 Author: Martin Schwenke <mar...@meltin.net> Date: Wed Aug 28 15:36:27 2013 +1000 tools/ctdb: Fix a memory leak in parse_nodestring() Signed-off-by: Martin Schwenke <mar...@meltin.net> commit 4a388fc6bf54636b7e1f6da8e6aa451cddd574f7 Author: Martin Schwenke <mar...@meltin.net> Date: Fri Sep 6 16:37:52 2013 +1000 tests/eventscripts: Tests for memory checking in 00.ctdb ... plus updates to test infrastructure to support. Signed-off-by: Martin Schwenke <mar...@meltin.net> commit 16fcff0d1993b7a0479341862ea44d10bd5c6d6d Author: Martin Schwenke <mar...@meltin.net> Date: Fri Sep 6 12:13:31 2013 +1000 eventscripts: Clean up monitoring of system memory in 00.ctdb Signed-off-by: Martin Schwenke <mar...@meltin.net> ----------------------------------------------------------------------- Summary of changes: client/ctdb_client.c | 8 --- common/ctdb_util.c | 2 + config/events.d/00.ctdb | 71 +++++++++++++++++------------ include/ctdb_client.h | 4 -- server/ctdb_takeover.c | 11 ++--- tests/eventscripts/00.ctdb.monitor.001.sh | 15 ++++++ tests/eventscripts/00.ctdb.monitor.002.sh | 15 ++++++ tests/eventscripts/00.ctdb.monitor.003.sh | 19 ++++++++ tests/eventscripts/00.ctdb.monitor.004.sh | 17 +++++++ tests/eventscripts/00.ctdb.monitor.005.sh | 21 +++++++++ tests/eventscripts/etc-ctdb/rc.local | 3 + tests/eventscripts/scripts/local.sh | 45 ++++++++++++++++++ tests/eventscripts/stubs/ctdb | 12 ++++- tests/eventscripts/stubs/free | 9 ++++ tests/eventscripts/stubs/ps | 12 +++++ tools/ctdb.c | 7 ++- 16 files changed, 217 insertions(+), 54 deletions(-) create mode 100755 tests/eventscripts/00.ctdb.monitor.001.sh create mode 100755 tests/eventscripts/00.ctdb.monitor.002.sh create mode 100755 tests/eventscripts/00.ctdb.monitor.003.sh create mode 100755 tests/eventscripts/00.ctdb.monitor.004.sh create mode 100755 tests/eventscripts/00.ctdb.monitor.005.sh create mode 100755 tests/eventscripts/stubs/free create mode 100755 tests/eventscripts/stubs/ps Changeset truncated at 500 lines: diff --git a/client/ctdb_client.c b/client/ctdb_client.c index 8bab9bb..77147c6 100644 --- a/client/ctdb_client.c +++ b/client/ctdb_client.c @@ -3521,14 +3521,6 @@ uint32_t *list_of_active_nodes(struct ctdb_context *ctdb, include_self ? -1 : ctdb->pnn); } -uint32_t *list_of_active_nodes_except_pnn(struct ctdb_context *ctdb, - struct ctdb_node_map *node_map, - TALLOC_CTX *mem_ctx, - uint32_t pnn) -{ - return list_of_nodes(ctdb, node_map, mem_ctx, NODE_FLAGS_INACTIVE, pnn); -} - uint32_t *list_of_connected_nodes(struct ctdb_context *ctdb, struct ctdb_node_map *node_map, TALLOC_CTX *mem_ctx, diff --git a/common/ctdb_util.c b/common/ctdb_util.c index 0274a76..11f9d83 100644 --- a/common/ctdb_util.c +++ b/common/ctdb_util.c @@ -467,6 +467,8 @@ bool parse_ip(const char *addr, const char *ifaces, unsigned port, ctdb_sock_add char *p; bool ret; + ZERO_STRUCTP(saddr); /* valgrind :-) */ + /* now is this a ipv4 or ipv6 address ?*/ p = index(addr, ':'); if (p == NULL) { diff --git a/config/events.d/00.ctdb b/config/events.d/00.ctdb index d56c7c4..6bd35ed 100755 --- a/config/events.d/00.ctdb +++ b/config/events.d/00.ctdb @@ -136,6 +136,46 @@ set_ctdb_variables () { done } +monitor_system_memory () +{ + # If monitoring free memory then calculate how much there is + if [ -n "$CTDB_MONITOR_FREE_MEMORY_WARN" -o \ + -n "$CTDB_MONITOR_FREE_MEMORY" ] ; then + free_mem=$(free -m | awk '$2 == "buffers/cache:" { print $4 }') + fi + + # Shutdown CTDB when memory is below the configured limit + if [ -n "$CTDB_MONITOR_FREE_MEMORY" ] ; then + if [ $free_mem -le $CTDB_MONITOR_FREE_MEMORY ] ; then + echo "CRITICAL: OOM - ${free_mem}MB free <= ${CTDB_MONITOR_FREE_MEMORY}MB (CTDB threshold)" + echo "CRITICAL: Shutting down CTDB!!!" + get_proc "meminfo" + ps auxfww + set_proc "sysrq-trigger" "m" + ctdb disable + sleep 3 + ctdb shutdown + fi + fi + + # Warn when low on memory + if [ -n "$CTDB_MONITOR_FREE_MEMORY_WARN" ] ; then + if [ $free_mem -le $CTDB_MONITOR_FREE_MEMORY_WARN ] ; then + echo "WARNING: free memory is low - ${free_mem}MB free <= ${CTDB_MONITOR_FREE_MEMORY_WARN}MB (CTDB threshold)" + fi + fi + + # We should never enter swap, so SwapTotal == SwapFree. + if [ "$CTDB_CHECK_SWAP_IS_NOT_USED" = "yes" ] ; then + set -- $(get_proc "meminfo" | awk '$1 ~ /Swap(Total|Free):/ { print $2 }') + if [ "$1" != "$2" ] ; then + echo We are swapping: + get_proc "meminfo" + ps auxfww + fi + fi +} + ############################################################ ctdb_check_args "$@" @@ -171,36 +211,7 @@ case "$1" in update_config_from_tdb & ;; monitor) - # We should never enter swap, so SwapTotal == SwapFree. - [ "$CTDB_CHECK_SWAP_IS_NOT_USED" = "yes" ] && { - if [ -n "`grep '^Swap\(Total\|Free\)' /proc/meminfo | uniq -s 10 -u`" ]; then - echo We are swapping: - cat /proc/meminfo - ps auxfww - fi - } - - # warn when we get low on memory - [ -z "$CTDB_MONITOR_FREE_MEMORY_WARN" ] || { - FREE_MEM=`free -m | grep "buffers/cache" | while read A B C D ;do echo -n $D ; done` - [ `expr "$FREE_MEM" "<" "$CTDB_MONITOR_FREE_MEMORY_WARN"` != "0" ] && { - echo "Running low on memory. Free:$FREE_MEM while CTDB treshold is $CTDB_MONITOR_FREE_MEMORY_WARN" - } - } - - # monitor that we are not running out of memory - [ -z "$CTDB_MONITOR_FREE_MEMORY" ] || { - FREE_MEM=`free -m | grep "buffers/cache" | while read A B C D ;do echo -n $D ; done` - [ `expr "$FREE_MEM" "<" "$CTDB_MONITOR_FREE_MEMORY"` != "0" ] && { - echo "OOM. Free:$FREE_MEM while CTDB treshold is $CTDB_MONITOR_FREE_MEMORY" - cat /proc/meminfo - ps auxfww - echo m > /proc/sysrq-trigger - ctdb disable - sleep 3 - ctdb shutdown - } - } + monitor_system_memory ;; *) diff --git a/include/ctdb_client.h b/include/ctdb_client.h index 8739923..28f0aae 100644 --- a/include/ctdb_client.h +++ b/include/ctdb_client.h @@ -528,10 +528,6 @@ uint32_t *list_of_vnnmap_nodes(struct ctdb_context *ctdb, struct ctdb_vnn_map *vnn_map, TALLOC_CTX *mem_ctx, bool include_self); -uint32_t *list_of_active_nodes_except_pnn(struct ctdb_context *ctdb, - struct ctdb_node_map *node_map, - TALLOC_CTX *mem_ctx, - uint32_t pnn); int ctdb_read_pnn_lock(int fd, int32_t pnn); diff --git a/server/ctdb_takeover.c b/server/ctdb_takeover.c index f5d2d50..130df8a 100644 --- a/server/ctdb_takeover.c +++ b/server/ctdb_takeover.c @@ -2704,10 +2704,6 @@ int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, /* Do the IP reassignment calculations */ ctdb_takeover_run_core(ctdb, ipflags, &all_ips); - /* The IP flags need to be cleared because they should never - * be seen outside the IP allocation code. - */ - /* The recovery daemon does regular sanity checks of the IPs. * However, sometimes it is overzealous and thinks changes are * required when they're already underway. This stops the @@ -2721,9 +2717,10 @@ int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, DEBUG(DEBUG_INFO,("Failed to disable ip verification\n")); } - /* now tell all nodes to delete any alias that they should not - have. This will be a NOOP on nodes that don't currently - hold the given alias */ + /* Now tell all nodes to release any public IPs should not + * host. This will be a NOOP on nodes that don't currently + * hold the given IP. + */ takeover_data = talloc_zero(tmp_ctx, struct takeover_callback_data); CTDB_NO_MEMORY_FATAL(ctdb, takeover_data); diff --git a/tests/eventscripts/00.ctdb.monitor.001.sh b/tests/eventscripts/00.ctdb.monitor.001.sh new file mode 100755 index 0000000..4290d13 --- /dev/null +++ b/tests/eventscripts/00.ctdb.monitor.001.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +. "${TEST_SCRIPTS_DIR}/unit.sh" + +define_test "Memory check, bad situation, no checks enabled" + +setup_memcheck "bad" + +CTDB_MONITOR_FREE_MEMORY="" +CTDB_MONITOR_FREE_MEMORY_WARN="" +CTDB_CHECK_SWAP_IS_NOT_USED="no" + +ok_null + +simple_test diff --git a/tests/eventscripts/00.ctdb.monitor.002.sh b/tests/eventscripts/00.ctdb.monitor.002.sh new file mode 100755 index 0000000..6e94012 --- /dev/null +++ b/tests/eventscripts/00.ctdb.monitor.002.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +. "${TEST_SCRIPTS_DIR}/unit.sh" + +define_test "Memory check, good situation, all enabled" + +setup_memcheck + +CTDB_MONITOR_FREE_MEMORY="500" +CTDB_MONITOR_FREE_MEMORY_WARN="1000" +CTDB_CHECK_SWAP_IS_NOT_USED="yes" + +ok_null + +simple_test diff --git a/tests/eventscripts/00.ctdb.monitor.003.sh b/tests/eventscripts/00.ctdb.monitor.003.sh new file mode 100755 index 0000000..9e63ab5 --- /dev/null +++ b/tests/eventscripts/00.ctdb.monitor.003.sh @@ -0,0 +1,19 @@ +#!/bin/sh + +. "${TEST_SCRIPTS_DIR}/unit.sh" + +define_test "Memory check, bad situation, only swap check" + +setup_memcheck "bad" + +CTDB_MONITOR_FREE_MEMORY="" +CTDB_MONITOR_FREE_MEMORY_WARN="" +CTDB_CHECK_SWAP_IS_NOT_USED="yes" + +ok <<EOF +We are swapping: +$FAKE_PROC_MEMINFO +$(ps foobar) +EOF + +simple_test diff --git a/tests/eventscripts/00.ctdb.monitor.004.sh b/tests/eventscripts/00.ctdb.monitor.004.sh new file mode 100755 index 0000000..fdf2032 --- /dev/null +++ b/tests/eventscripts/00.ctdb.monitor.004.sh @@ -0,0 +1,17 @@ +#!/bin/sh + +. "${TEST_SCRIPTS_DIR}/unit.sh" + +define_test "Memory check, bad situation, only memory warning" + +setup_memcheck "bad" + +CTDB_MONITOR_FREE_MEMORY="" +CTDB_MONITOR_FREE_MEMORY_WARN="500" +CTDB_CHECK_SWAP_IS_NOT_USED="no" + +ok <<EOF +WARNING: free memory is low - 468MB free <= ${CTDB_MONITOR_FREE_MEMORY_WARN}MB (CTDB threshold) +EOF + +simple_test diff --git a/tests/eventscripts/00.ctdb.monitor.005.sh b/tests/eventscripts/00.ctdb.monitor.005.sh new file mode 100755 index 0000000..a46851a --- /dev/null +++ b/tests/eventscripts/00.ctdb.monitor.005.sh @@ -0,0 +1,21 @@ +#!/bin/sh + +. "${TEST_SCRIPTS_DIR}/unit.sh" + +define_test "Memory check, bad situation, only memory critical" + +setup_memcheck "bad" + +CTDB_MONITOR_FREE_MEMORY="500" +CTDB_MONITOR_FREE_MEMORY_WARN="" +CTDB_CHECK_SWAP_IS_NOT_USED="no" + +ok <<EOF +CRITICAL: OOM - 468MB free <= ${CTDB_MONITOR_FREE_MEMORY}MB (CTDB threshold) +CRITICAL: Shutting down CTDB!!! +$FAKE_PROC_MEMINFO +$(ps foobar) +CTDB says BYE! +EOF + +simple_test diff --git a/tests/eventscripts/etc-ctdb/rc.local b/tests/eventscripts/etc-ctdb/rc.local index b11c7ec..6052d87 100755 --- a/tests/eventscripts/etc-ctdb/rc.local +++ b/tests/eventscripts/etc-ctdb/rc.local @@ -39,6 +39,9 @@ get_proc () */stack) echo "[<ffffffff87654321>] fake_stack_trace_for_pid_${1}+0x0/0xff" ;; + meminfo) + echo "$FAKE_PROC_MEMINFO" + ;; *) echo "get_proc: \"$1\" not implemented" exit 1 diff --git a/tests/eventscripts/scripts/local.sh b/tests/eventscripts/scripts/local.sh index 00da773..a522e78 100644 --- a/tests/eventscripts/scripts/local.sh +++ b/tests/eventscripts/scripts/local.sh @@ -311,6 +311,51 @@ setup_ctdb () export CTDB_PARTIALLY_ONLINE_INTERFACES } +setup_memcheck () +{ + setup_ctdb + + _swap_total="5857276" + + if [ "$1" = "bad" ] ; then + _swap_free=" 4352" + _mem_cached=" 112" + _mem_free=" 468" + else + _swap_free="$_swap_total" + _mem_cached="1112" + _mem_free="1468" + fi + + export FAKE_PROC_MEMINFO="\ +MemTotal: 3940712 kB +MemFree: 225268 kB +Buffers: 146120 kB +Cached: 1139348 kB +SwapCached: 56016 kB +Active: 2422104 kB +Inactive: 1019928 kB +Active(anon): 1917580 kB +Inactive(anon): 523080 kB +Active(file): 504524 kB +Inactive(file): 496848 kB +Unevictable: 4844 kB +Mlocked: 4844 kB +SwapTotal: ${_swap_total} kB +SwapFree: ${_swap_free} kB +..." + + export FAKE_FREE_M="\ + total used free shared buffers cached +Mem: 3848 3634 213 0 142 ${_mem_cached} +-/+ buffers/cache: 2379 ${_mem_free} +Swap: 5719 246 5473" + + export CTDB_MONITOR_FREE_MEMORY + export CTDB_MONITOR_FREE_MEMORY_WARN + export CTDB_CHECK_SWAP_IS_NOT_USED +} + ctdb_get_interfaces () { # The echo/subshell forces all the output onto 1 line. diff --git a/tests/eventscripts/stubs/ctdb b/tests/eventscripts/stubs/ctdb index 58007f2..da84ed7 100755 --- a/tests/eventscripts/stubs/ctdb +++ b/tests/eventscripts/stubs/ctdb @@ -124,8 +124,8 @@ ip_reallocate () # Have non-zero flags _this=0 for _j in "$FAKE_CTDB_STATE/node-state/"*"/$_i" ; do - _t="${_j%/*}" # dirname - _f="${_t%/*}" # basename + _tf="${_j%/*}" # dirname + _f="${_tf##*/}" # basename _this=$(( $_this | $_f )) done else @@ -224,6 +224,13 @@ ctdb_disable () ###################################################################### +ctdb_shutdown () +{ + echo "CTDB says BYE!" +} + +###################################################################### + case "$1" in gettickles) setup_tickles @@ -322,5 +329,6 @@ case "$1" in enable) ctdb_enable "$@";; disable) ctdb_disable "$@";; moveip) ctdb_moveip "$@";; + shutdown) ctdb_shutdown "$@";; *) not_implemented "$1" ;; esac diff --git a/tests/eventscripts/stubs/free b/tests/eventscripts/stubs/free new file mode 100755 index 0000000..6453509 --- /dev/null +++ b/tests/eventscripts/stubs/free @@ -0,0 +1,9 @@ +#!/bin/sh + +if [ "$1" = "-m" ] ; then + echo "$FAKE_FREE_M" + exit 0 +else + echo "free: not implemented - $*" + exit 1 +fi diff --git a/tests/eventscripts/stubs/ps b/tests/eventscripts/stubs/ps new file mode 100755 index 0000000..5abeaf9 --- /dev/null +++ b/tests/eventscripts/stubs/ps @@ -0,0 +1,12 @@ +#!/bin/sh + +cat <<EOF +USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND +root 2 0.0 0.0 0 0 ? S Aug28 0:00 [kthreadd] +root 3 0.0 0.0 0 0 ? S Aug28 0:43 \_ [ksoftirqd/0] +... +root 1 0.0 0.0 2976 624 ? Ss Aug28 0:07 init [2] +root 495 0.0 0.0 3888 1640 ? Ss Aug28 0:00 udevd --daemon +... +[MORE FAKE ps OUTPUT] +EOF diff --git a/tools/ctdb.c b/tools/ctdb.c index f851c9e..6ca3407 100644 --- a/tools/ctdb.c +++ b/tools/ctdb.c @@ -176,6 +176,7 @@ static bool parse_nodestring(struct ctdb_context *ctdb, int n; uint32_t i; struct ctdb_node_map *nodemap; + bool ret = true; *nodes = NULL; @@ -255,13 +256,13 @@ static bool parse_nodestring(struct ctdb_context *ctdb, if (!ctdb_getpnn(ctdb_connection, current_pnn, &((*nodes)[0]))) { - return false; + ret = false; } } ctdb_free_nodemap(nodemap); - return true; + return ret; } /* @@ -1606,7 +1607,7 @@ static int move_ip(struct ctdb_context *ctdb, ctdb_sock_addr *addr, uint32_t pnn return ret; } - nodes = list_of_active_nodes_except_pnn(ctdb, nodemap, tmp_ctx, pnn); + nodes = list_of_nodes(ctdb, nodemap, tmp_ctx, NODE_FLAGS_INACTIVE, pnn); ret = ctdb_client_async_control(ctdb, CTDB_CONTROL_RELEASE_IP, nodes, 0, LONGTIMELIMIT(), -- CTDB repository