Re: [Pacemaker] [Partially SOLVED] pacemaker/dlm problems

Vladislav Bogdanov Sun, 15 Jan 2012 22:49:14 -0800

16.01.2012 09:20, Andrew Beekhof wrote:
[snip]
>>> At the same time, stonith_admin -B succeeds.
>>> The main difference I see is st_opt_sync_call in a latter case.
>>> Will try to experiment with it.
>>
>> Yeeeesssss!!!
>>
>> Now I see following:
>> Dec 19 11:53:34 vd01-a cluster-dlm: [2474]: info:
>> pacemaker_terminate_member: Requesting that node 1090782474/vd01-b be fenced
> 
> So the important question... what did you change?


Nice you're back ;)

+ rc = st->cmds->fence(st, *st_opt_sync_call*, node_uname, "reboot", 120);

attaching my resulting version of pacemaker.c (which still has a lot of
mess because of different approaches I tried to get the result and needs
a cleanup). Function you may look at is pacemaker_terminate_member()
which is almost one-to-one copy of crm_terminate_member_no_mainloop()
except rename of variable to compile without warnings and change of
->fence() arguments.

> 
>> Dec 19 11:53:34 vd01-a stonith-ng: [1905]: info:
>> initiate_remote_stonith_op: Initiating remote operation reboot for
>> vd01-b: 21425fc0-4311-40fa-9647-525c3f258471
>> Dec 19 11:53:34 vd01-a stonith-ng: [1905]: info: crm_get_peer: Node
>> vd01-c now has id: 1107559690
>> Dec 19 11:53:34 vd01-a stonith-ng: [1905]: info: stonith_command:
>> Processed st_query from vd01-c: rc=0
>> Dec 19 11:53:34 vd01-a stonith-ng: [1905]: info: crm_get_peer: Node
>> vd01-d now has id: 1124336906
>> Dec 19 11:53:34 vd01-a stonith-ng: [1905]: info: stonith_command:
>> Processed st_query from vd01-d: rc=0
>> Dec 19 11:53:34 vd01-a stonith-ng: [1905]: info: stonith_command:
>> Processed st_query from vd01-a: rc=0
>> Dec 19 11:53:34 vd01-a stonith-ng: [1905]: info: call_remote_stonith:
>> Requesting that vd01-c perform op reboot vd01-b
>> Dec 19 11:53:34 vd01-a stonith-ng: [1905]: info: crm_get_peer: Node
>> vd01-b now has id: 1090782474
>> ...
>> Dec 19 11:53:40 vd01-a stonith-ng: [1905]: info: stonith_command:
>> Processed st_fence_history from cluster-dlm: rc=0
>> Dec 19 11:53:40 vd01-a crmd: [1910]: info: tengine_stonith_notify: Peer
>> vd01-b was terminated (reboot) by vd01-c for vd01-a
>> (ref=21425fc0-4311-40fa-9647-525c3f258471): OK
>>
>> But, then I see minor issue that node is marked to be fenced again:
>> Dec 19 11:53:40 vd01-a pengine: [1909]: WARN: pe_fence_node: Node vd01-b
>> will be fenced because it is un-expectedly down
> 
> Do you have logs for that?
> tengine_stonith_notify() got called, that should have been enough to
> get the node cleaned up in the cib.

Ugh, seems like yes, but they are archived already. Will get them back
to nodes and try to compose hb_report for them (but pe inputs are
already lost, do you still need logs without them?)

> 
>> ...
>> Dec 19 11:53:40 vd01-a pengine: [1909]: WARN: stage6: Scheduling Node
>> vd01-b for STONITH
>> ...
>> Dec 19 11:53:40 vd01-a crmd: [1910]: info: te_fence_node: Executing
>> reboot fencing operation (249) on vd01-b (timeout=60000)
>> ...
>> Dec 19 11:53:40 vd01-a stonith-ng: [1905]: info: call_remote_stonith:
>> Requesting that vd01-c perform op reboot vd01-b
>>
>> And so on.
>>
>> I can't investigated this one in more depth, because I use fence_xvm in
>> this testing cluster, and it has issues when running more than one
>> stonith resource on a node. Also, my RA (in a cluster where this testing
>> cluster runs) undefines VM after failure, so fence_xvm does not see
>> fencing victim in a qpid and is unable to fence it again.
>>
>> May be it is possible to look if node was just fenced and skip redundant
>> fencing?
> 
> If the callbacks are being used correctly, it shouldn't be required

#include <syslog.h>

#include "config.h"
#include "dlm_daemon.h"

#include <glib.h>
#include <bzlib.h>
#include <heartbeat/ha_msg.h>

#include <pacemaker/crm_config.h>

#include <pacemaker/crm/crm.h>
#include <pacemaker/crm/ais.h>
#include <pacemaker/crm/attrd.h>
/* heartbeat support is irrelevant here */
#undef SUPPORT_HEARTBEAT 
#define SUPPORT_HEARTBEAT 0
#include <pacemaker/crm/common/cluster.h>
#include <pacemaker/crm/cluster/stack.h>
#include <pacemaker/crm/common/ipc.h>
#include <pacemaker/crm/msg_xml.h>
#include <pacemaker/crm/cib.h>
#include <pacemaker/crm/stonith-ng.h>

#define COMMS_DIR     "/sys/kernel/config/dlm/cluster/comms"

int setup_ccs(void)
{
    /* To avoid creating an additional place for the dlm to be configured,
     * only allow configuration from the command-line until CoroSync is stable
     * enough to be used with Pacemaker
     */
    cfgd_groupd_compat = 0; /* always use libcpg and disable backward compat */
    return 0;
}

void close_ccs(void) { return; }
int get_weight(int nodeid, char *lockspace) { return 1; }

/* TODO: Make this configurable
 * Can't use logging.c as-is as whitetank exposes a different logging API
 */
void init_logging(void) {
    openlog("cluster-dlm", LOG_PERROR|LOG_PID|LOG_CONS|LOG_NDELAY, LOG_DAEMON);
    /* cl_log_enable_stderr(TRUE); */
}

void setup_logging(void) { return; }
void close_logging(void) {
    closelog();
}

extern int ais_fd_async;

char *local_node_uname = NULL;
void dlm_process_node(gpointer key, gpointer value, gpointer user_data);

int setup_cluster(void)
{
    ais_fd_async = -1;
    crm_log_init("cluster-dlm", LOG_INFO, FALSE, TRUE, 0, NULL);

    if(init_ais_connection(NULL, NULL, NULL, &local_node_uname, &our_nodeid) == FALSE) {
        log_error("Connection to our AIS plugin failed");
        return -1;
    }

    /* Sign up for membership updates */
    send_ais_text(crm_class_notify, "true", TRUE, NULL, crm_msg_ais);

    /* Requesting the current list of known nodes */
    send_ais_text(crm_class_members, __FUNCTION__, TRUE, NULL, crm_msg_ais);

    return ais_fd_async;
}

void update_cluster(void)
{
    static uint64_t last_membership = 0;
    cluster_quorate = crm_have_quorum;
    if(last_membership < crm_peer_seq) {
        log_debug("Processing membership %llu", crm_peer_seq);
        g_hash_table_foreach(crm_peer_id_cache, dlm_process_node, &last_membership);
        last_membership = crm_peer_seq;
    }
}

void process_cluster(int ci)
{
    ais_dispatch(ais_fd_async, NULL);
    update_cluster();
}

void close_cluster(void) {
    terminate_ais_connection();
}

#include <arpa/inet.h>
#include <corosync/totem/totemip.h>

void dlm_process_node(gpointer key, gpointer value, gpointer user_data)
{
    int rc = 0;
    struct stat tmp;
    char path[PATH_MAX];
    crm_node_t *node = value;
    uint64_t *last = user_data;
    const char *action = "Skipped";

    gboolean do_add = FALSE;
    gboolean do_remove = FALSE;
    gboolean is_active = FALSE;

    memset(path, 0, PATH_MAX);
    snprintf(path, PATH_MAX, "%s/%d", COMMS_DIR, node->id);

    rc = stat(path, &tmp);
    is_active = crm_is_member_active(node);

    if(rc == 0 && is_active) {
        /* nothing to do?
         * maybe the node left and came back...
         */
    } else if(rc == 0) {
        do_remove = TRUE;

    } else if(is_active && node->addr) {
        do_add = TRUE;
    }

    if(do_remove) {
        action = "Removed";
        del_configfs_node(node->id);
    }

    if(do_add) {
        char *addr_copy = strdup(node->addr);
        char *addr_top = addr_copy;
        char *addr = NULL;

        if(do_remove) {
            action = "Re-added";
        } else {
            action = "Added";
        }

        do {
            char ipaddr[1024];
            int addr_family = AF_INET;
            int cna_len = 0;
            struct sockaddr_storage cna_addr;
            struct totem_ip_address totem_addr;

            addr = strsep(&addr_copy, " ");
            if(addr == NULL) {
                break;
            }

            /* do_cmd_get_node_addrs */
            if(strstr(addr, "ip(") == NULL) {
                continue;

            } else if(strchr(addr, ':')) {
                rc = sscanf(addr, "ip(%[0-9A-Fa-f:])", ipaddr);
                if(rc != 1) {
                    log_error("Could not extract IPv6 address from '%s'", addr);
                    continue;
                }
                addr_family = AF_INET6;

            } else {
                rc = sscanf(addr, "ip(%[0-9.]) ", ipaddr);
                if(rc != 1) {
                    log_error("Could not extract IPv4 address from '%s'", addr);
                    continue;
                }
            }

            rc = inet_pton(addr_family, ipaddr, &totem_addr);
            if(rc != 1) {
                log_error("Could not parse '%s' as in IPv%c address", ipaddr, (addr_family==AF_INET)?'4':'6');
                continue;
            }

            rc = totemip_parse(&totem_addr, ipaddr, addr_family);
            if(rc != 0) {
                log_error("Could not convert '%s' into a totem address", ipaddr);
                continue;
            }

            rc = totemip_totemip_to_sockaddr_convert(&totem_addr, 0, &cna_addr, &cna_len);
            if(rc != 0) {
                log_error("Could not convert totem address for '%s' into sockaddr", ipaddr);
                continue;
            }

            log_debug("Adding address %s to configfs for node %u", addr, node->id);
            add_configfs_node(node->id, ((char*)&cna_addr), cna_len, (node->id == our_nodeid));

        } while(addr != NULL);
        free(addr_top);
    }

    log_debug("%s %sctive node %u: born-on=%llu, last-seen=%llu, this-event=%llu, last-event=%llu",
               action, crm_is_member_active(value)?"a":"ina",
               node->id, node->born, node->last_seen,
               crm_peer_seq, (unsigned long long)*last);
}

int is_cluster_member(int nodeid)
{
    crm_node_t *node = crm_get_peer(nodeid, NULL);
    return crm_is_member_active(node);
}

char *nodeid2name(int nodeid) {
    crm_node_t *node = crm_get_peer(nodeid, NULL);
    if(node->uname == NULL) {
        return NULL;
    }
    return strdup(node->uname);
}

static int
pacemaker_terminate_member(int nodeid)
{
    int rc = stonith_ok;
    const char *node_uname = NULL;
    stonith_t *st = NULL;

    if (node_uname == NULL) {
        crm_node_t *node = crm_get_peer(nodeid, node_uname);
        if (node) {
            node_uname = node->uname;
        }
    }

    if (node_uname == NULL) {
        crm_err("Nothing known about node id=%d", nodeid);
        return -1;
    } else {
        st = stonith_api_new();
    }

    if (st) {
        rc = st->cmds->connect(st, crm_system_name, NULL);
    } else {
        crm_err("Could not connect to stonith subsystem");
        return -1;
    }

    if (rc == stonith_ok) {
        /* Default pacemaker fencing action is "reboot", and admin may remap it to "off" */
        crm_info("Requesting that node %d/%s be fenced", nodeid, node_uname);
        rc = st->cmds->fence(st, st_opt_sync_call, node_uname, "reboot", 120);
    }

    st->cmds->disconnect(st);
    stonith_api_delete(st);

    if (rc < stonith_ok) {
        crm_err("Could not fence node %d/%s: %s", nodeid, crm_str(node_uname), stonith_error2string(rc));
        rc = 1;
    } else {
        rc = 0;
    }

    return rc;
}

void kick_node_from_cluster(int nodeid)
{
    int rc = pacemaker_terminate_member(nodeid);
    switch(rc) {
        case 0:
            log_debug("Requested that node %d be kicked from the cluster", nodeid);
            break;
        case -1:
            log_error("Don't know how to kick node %d from the cluster", nodeid);
            break;
        case 1:
            log_error("Could not kick node %d from the cluster", nodeid);
            break;
        default:
            log_error("Unknown result when kicking node %d from the cluster", nodeid);
            break;
    }
    return;
}

int fence_in_progress(int *in_progress)
{
    *in_progress = 0;
    return 0;
}

int fence_node_time(int nodeid, uint64_t *last_fenced_time)
{
    int rc = 0;
    const char *node_uname = NULL;
    crm_node_t *node = crm_get_peer(nodeid, node_uname);
    stonith_history_t *history, *hp = NULL;
    stonith_t *st = NULL;
    int fencing_requested = 0;
    int refence = 0;

    if(last_fenced_time) {
        *last_fenced_time = 0;
    }

    if (node && node->uname) {
        node_uname = node->uname;
        st = stonith_api_new();

    } else {
        crm_err("Nothing known about node id=%d", nodeid);
        return 0;
    }

    if(st) {
        rc = st->cmds->connect(st, crm_system_name, NULL);
    }

    if(rc == stonith_ok) {
        int i = 0;
        st->cmds->history(st, st_opt_sync_call, node_uname, &history, 120);
        for(hp = history; hp; hp = hp->next, i++) {
            if(hp->state == st_done) {
                log_debug("Stonith history[%d]: Node %d/%s fenced at %u", i, nodeid, node_uname, hp->completed);
                *last_fenced_time = hp->completed;
                fencing_requested = 0;
            }else if (hp->state == st_failed) {
                log_debug("Stonith history[%d]: Fencing of node %d/%s failed, reqesting it again", i, nodeid, node_uname);
                kick_node_from_cluster(nodeid);
                refence = 1;
                fencing_requested = 0;
            } else if (hp->state == st_exec) {
                log_debug("Stonith history[%d]: Fencing of node %d/%s is in progress", i, nodeid, node_uname);
                fencing_requested = 1;
                rc = 2;
            } else {
                log_debug("Stonith history[%d]: Node %d/%s state %d at %d", i, nodeid, node_uname, hp->state, hp->completed);
            }
        }
    }

    rc = 0;
    if(*last_fenced_time != 0) {
        log_debug("Node %d/%s was last shot at: %d", nodeid, node_uname, *last_fenced_time);
    } else {
        if(!fencing_requested && !refence) {
            log_debug("It does not appear node %d/%s has been shot", nodeid, node_uname);
            rc = 1;
        }
    }

    if(st) {
        st->cmds->disconnect(st);
        stonith_api_delete(st);
    }

    return rc;
}

_______________________________________________
Pacemaker mailing list: Pacemaker@oss.clusterlabs.org
http://oss.clusterlabs.org/mailman/listinfo/pacemaker

Project Home: http://www.clusterlabs.org
Getting started: http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf
Bugs: http://bugs.clusterlabs.org

Re: [Pacemaker] [Partially SOLVED] pacemaker/dlm problems

Reply via email to