CVSROOT:        /cvs/cluster
Module name:    cluster
Branch:         RHEL5
Changes by:     [EMAIL PROTECTED]       2008-02-06 23:03:05

Modified files:
        cmirror/src    : clogd.c cluster.c functions.c functions.h 
                         local.c queues.c queues.h 

Log message:
        - change verbosity of various messages
        - fix bug where similtaneous mirror creations could result in checkpoint
        collisions

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/clogd.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.2.5&r2=1.1.2.6
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/cluster.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.2.14&r2=1.1.2.15
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/functions.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.2.12&r2=1.1.2.13
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/functions.h.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.2.3&r2=1.1.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/local.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.2.12&r2=1.1.2.13
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/queues.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.2.5&r2=1.1.2.6
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/queues.h.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.2.3&r2=1.1.2.4

--- cluster/cmirror/src/Attic/clogd.c   2008/01/23 21:21:06     1.1.2.5
+++ cluster/cmirror/src/Attic/clogd.c   2008/02/06 23:03:05     1.1.2.6
@@ -120,9 +120,13 @@
        case SIGQUIT:
        case SIGTERM:
        case SIGHUP:
-               r = queue_status();
-               r += log_status();
+               r = queue_status(0);
+               r += log_status(0);
                break;
+       case SIGUSR1:
+               queue_status(1);
+               log_status(1);
+               return;
        default:
                LOG_PRINT("Unknown signal received... ignoring");
                return;
@@ -229,6 +233,7 @@
        signal(SIGTERM, &sig_handler);
        signal(SIGHUP, &sig_handler);
        signal(SIGPIPE, SIG_IGN);
+       signal(SIGUSR1, &sig_handler);
        sigemptyset(&signal_mask);
        signal_received = 0;
 }
--- cluster/cmirror/src/Attic/cluster.c 2008/02/05 23:25:51     1.1.2.14
+++ cluster/cmirror/src/Attic/cluster.c 2008/02/06 23:03:05     1.1.2.15
@@ -23,7 +23,7 @@
 #define DM_CLOG_CHECKPOINT_READY ((uint32_t)-1)
 
 static uint32_t my_cluster_id = 0xDEAD;
-static SaCkptHandleT ckpt_handle;
+static SaCkptHandleT ckpt_handle = 0;
 static SaCkptCallbacksT callbacks = { 0, 0 };
 static SaVersionT version = { 'B', 1, 1 };
 
@@ -212,7 +212,7 @@
                list_for_each_safe(p, n, &l) {
                        list_del_init(p);
                        t = (struct clog_tfr *)p;
-                       LOG_ERROR("[%s]  %s:%llu", SHORT_UUID(t->uuid),
+                       LOG_ERROR("   [%s]  %s:%llu", SHORT_UUID(t->uuid),
                                  RQ_TYPE(t->request_type),
                                  (unsigned long long)t->seq);
                        queue_add(t, cluster_queue);
@@ -330,7 +330,8 @@
        ENTER();
        LOG_DBG("Sending checkpointed data to %u", cp->requester);
 
-       len = snprintf((char *)(name.value), SA_MAX_NAME_LENGTH, "bitmaps_%u", 
cp->requester);
+       len = snprintf((char *)(name.value), SA_MAX_NAME_LENGTH, 
"bitmaps_%s_%u",
+                      SHORT_UUID(cp->uuid), cp->requester);
        name.length = len;
 
        attr.creationFlags = SA_CKPT_WR_ALL_REPLICAS;
@@ -465,7 +466,8 @@
        if (!bitmap)
                return -ENOMEM;
 
-       len = snprintf((char *)(name.value), SA_MAX_NAME_LENGTH, "bitmaps_%u", 
my_cluster_id);
+       len = snprintf((char *)(name.value), SA_MAX_NAME_LENGTH, 
"bitmaps_%s_%u",
+                      SHORT_UUID(entry->name.value), my_cluster_id);
        name.length = len;
 
 open_retry:
@@ -620,7 +622,8 @@
                         */
                        switch (export_checkpoint(cp)) {
                        case -EEXIST:
-                               LOG_DBG("Checkpoint already handled by someone 
else");
+                               LOG_DBG("[%s] Checkpoint for %u already handled 
by someone else",
+                                       SHORT_UUID(entry->name.value), 
cp->requester);
                        case 0:
                                entry->checkpoint_list = cp->next;
                                free_checkpoint(cp);
@@ -663,13 +666,13 @@
 
        if (my_cluster_id == 0xDEAD) {
                LOG_DBG("Message before init... ignoring.\n");
-               goto out;
+               return;
        }
 
        match = find_clog_cpg(handle);
        if (!match) {
                LOG_ERROR("Unable to find clog_cpg for cluster message");
-               goto out;
+               return;
        }
        i_am_server = (my_cluster_id == match->lowest_id) ? 1 : 0;
 
@@ -681,7 +684,8 @@
                                /* Could we retry? */
                                goto out;
                        } else if (!match->valid) {
-                               LOG_DBG("Checkpoint data recieved.  Log is now 
valid");
+                               LOG_DBG("[%s] Checkpoint data recieved.  Log is 
now valid",
+                                       SHORT_UUID(match->name.value));
                                match->valid = 1;
                                while ((startup_tfr = 
queue_remove(match->startup_queue))) {
                                        LOG_DBG("Processing delayed request %d: 
%s",
@@ -757,9 +761,12 @@
        }
 
 out:
-       if (r)
-               LOG_ERROR("Error while processing CPG message");
-
+       if (r) {
+               LOG_ERROR("[%s] Error while processing CPG message, %s: %d",
+                         SHORT_UUID(tfr->uuid),
+                         RQ_TYPE(tfr->request_type & ~DM_CLOG_RESPONSE),
+                         r);
+       }
        EXIT();
 }
 
@@ -772,6 +779,7 @@
        int my_pid = getpid();
        int found = 0;
        struct clog_cpg *match, *tmp;
+       uint32_t lowest;
 
        ENTER();
 
@@ -844,8 +852,14 @@
 
                        cpg_finalize(match->handle);
 
-                       if (match->startup_queue->count)
-                               LOG_ERROR("Startup items remain in cluster 
log");
+                       if (match->startup_queue->count) {
+                               LOG_ERROR("%d startup items remain in cluster 
log",
+                                         match->startup_queue->count);
+                               while (!queue_empty(match->startup_queue)) {
+                                       tfr = 
queue_remove(match->startup_queue);
+                                       queue_add(tfr, free_queue);
+                               }
+                       }
 
                        free(match->startup_queue);
                        match->free_me = 1;
@@ -857,8 +871,8 @@
        if (!left_list_entries &&
            (member_list_entries == 1) && (joined_list_entries == 1) &&
            (member_list[0].nodeid == joined_list[0].nodeid)) {
-               LOG_DBG("I am the log server (and first to join) for %s",
-                       match->name.value);
+               LOG_DBG("[%s]  I am the log server (and first to join)",
+                       SHORT_UUID(match->name.value));
                match->lowest_id = my_cluster_id = joined_list[0].nodeid;
                match->valid = 1;
                goto out;
@@ -880,13 +894,16 @@
                }
        }
 
+       lowest = match->lowest_id;
        /* Find the lowest_id, i.e. the server */
        for (i = 0, match->lowest_id = member_list[0].nodeid;
             i < member_list_entries; i++)
                if (match->lowest_id > member_list[i].nodeid)
                        match->lowest_id = member_list[i].nodeid;
 
-       LOG_DBG("Server is now %u", match->lowest_id);
+       if (lowest != match->lowest_id)
+               LOG_DBG("[%s]  Server is now %u", SHORT_UUID(match->name.value),
+                       match->lowest_id);
 
        /*
         * If I am part of the joining list, I do not send checkpoints
--- cluster/cmirror/src/Attic/functions.c       2008/02/05 22:12:54     1.1.2.12
+++ cluster/cmirror/src/Attic/functions.c       2008/02/06 23:03:05     1.1.2.13
@@ -432,7 +432,7 @@
 
        if ((dup = get_log(lc->uuid)) ||
            (dup = get_pending_log(lc->uuid))) {
-               LOG_PRINT("[%s] Inc reference count on cluster log",
+               LOG_DBG("[%s] Inc reference count on cluster log",
                          SHORT_UUID(lc->uuid));
                free(lc);
                dup->ref_count++;
@@ -574,7 +574,7 @@
        if (r)
                LOG_ERROR("Failed to create cluster log (%s)", tfr->uuid);
        else
-               LOG_PRINT("[%s] Cluster log created",
+               LOG_DBG("[%s] Cluster log created",
                          SHORT_UUID(tfr->uuid));
 
        return r;
@@ -608,12 +608,12 @@
        }
 
        if (lc->ref_count) {
-               LOG_PRINT("[%s] Dec reference count on cluster log",
+               LOG_DBG("[%s] Dec reference count on cluster log",
                          SHORT_UUID(lc->uuid));
                return 0;
        }
 
-       LOG_PRINT("[%s] Cluster log removed", SHORT_UUID(lc->uuid));
+       LOG_DBG("[%s] Cluster log removed", SHORT_UUID(lc->uuid));
 
        list_del_init(&lc->list);
        if (lc->disk_fd != -1)
@@ -660,7 +660,7 @@
        if (!lc)
                return -EINVAL;
 
-       LOG_PRINT("[%s] clog_postsuspend: leaving CPG", SHORT_UUID(lc->uuid));
+       LOG_DBG("[%s] clog_postsuspend: leaving CPG", SHORT_UUID(lc->uuid));
        destroy_cluster_cpg(tfr->uuid);
 
        return 0;
@@ -678,7 +678,7 @@
        if (!lc)
                return -EINVAL;
 
-       LOG_PRINT("[%s] clog_postsuspend: finalizing", SHORT_UUID(lc->uuid));
+       LOG_DBG("[%s] clog_postsuspend: finalizing", SHORT_UUID(lc->uuid));
        lc->resume_override = 0;
 
        /* move log to pending list */
@@ -712,7 +712,7 @@
                          SHORT_UUID(tfr->uuid));
                return 0;
        case 0:
-               LOG_PRINT("[%s] Master resume: reading disk log",
+               LOG_DBG("[%s] Master resume: reading disk log",
                          SHORT_UUID(lc->uuid));
                lc->resume_override = 1000;
                break;
@@ -723,7 +723,7 @@
                LOG_ERROR("Error:: partial bit loading (just clean_bits)");
                return -EINVAL;
        case 3:
-               LOG_PRINT("[%s] Non-master resume: bits pre-loaded",
+               LOG_DBG("[%s] Non-master resume: bits pre-loaded",
                          SHORT_UUID(lc->uuid));
                lc->resume_override = 1000;
                lc->sync_count = count_bits32(lc->clean_bits, 
lc->bitset_uint32_count);
@@ -1545,7 +1545,7 @@
        for (i = 0; i < size; i++) {
                if (!(i % 16)) {
                        if (outbuf[0] != '\0')
-                               LOG_PRINT("%s", outbuf);
+                               LOG_DBG("%s", outbuf);
                        memset(outbuf, 0, sizeof(outbuf));
                        sprintf(outbuf, "[%3d - %3d]", i, i+15);
                }
@@ -1636,36 +1636,42 @@
        return lc->state;
 }
 
-int log_status(void)
+int log_status(int output_wanted)
 {
        int found = 0;
        struct list_head *l;
        struct log_c *lc;
 
        /* FIXME: Need prefetch to do this right */
-       LOG_DBG("Official log list:");
+       if (output_wanted)
+               LOG_PRINT("Official log list:");
        __list_for_each(l, &log_list) {
                found = 1;
                lc = list_entry(l, struct log_c, list);
-               LOG_DBG("%s", lc->uuid);
-               LOG_DBG("sync_bits:");
-               print_bits((char *)lc->sync_bits,
-                          lc->bitset_uint32_count * sizeof(*lc->sync_bits));
-               LOG_DBG("clean_bits:");
-               print_bits((char *)lc->clean_bits,
-                          lc->bitset_uint32_count * sizeof(*lc->clean_bits));
+               if (output_wanted) {
+                       LOG_PRINT("%s", lc->uuid);
+                       LOG_DBG("sync_bits:");
+                       print_bits((char *)lc->sync_bits,
+                                  lc->bitset_uint32_count * 
sizeof(*lc->sync_bits));
+                       LOG_DBG("clean_bits:");
+                       print_bits((char *)lc->clean_bits,
+                                  lc->bitset_uint32_count * 
sizeof(*lc->clean_bits));
+               }
        }
-       LOG_DBG("Pending log list:");
+       if (output_wanted)
+               LOG_PRINT("Pending log list:");
        __list_for_each(l, &log_pending_list) {
                found = 1;
                lc = list_entry(l, struct log_c, list);
-               LOG_DBG("%s", lc->uuid);
-               LOG_DBG("sync_bits:");
-               print_bits((char *)lc->sync_bits,
-                          lc->bitset_uint32_count * sizeof(*lc->sync_bits));
-               LOG_DBG("clean_bits:");
-               print_bits((char *)lc->clean_bits,
-                          lc->bitset_uint32_count * sizeof(*lc->clean_bits));
+               if (output_wanted) {
+                       LOG_PRINT("%s", lc->uuid);
+                       LOG_DBG("sync_bits:");
+                       print_bits((char *)lc->sync_bits,
+                                  lc->bitset_uint32_count * 
sizeof(*lc->sync_bits));
+                       LOG_DBG("clean_bits:");
+                       print_bits((char *)lc->clean_bits,
+                                  lc->bitset_uint32_count * 
sizeof(*lc->clean_bits));
+               }
        }
        return found;
 }
--- cluster/cmirror/src/Attic/functions.h       2008/01/14 22:52:17     1.1.2.3
+++ cluster/cmirror/src/Attic/functions.h       2008/02/06 23:03:05     1.1.2.4
@@ -13,5 +13,5 @@
 int store_bits(const char *uuid, const char *which, char **buf);
 int load_bits(const char *uuid, const char *which, char *buf, int size);
 int log_get_state(struct clog_tfr *tfr);
-int log_status(void);
+int log_status(int);
 #endif /* __CLOG_FUNCTIONS_DOT_H__ */
--- cluster/cmirror/src/Attic/local.c   2008/02/05 22:12:54     1.1.2.12
+++ cluster/cmirror/src/Attic/local.c   2008/02/06 23:03:05     1.1.2.13
@@ -309,6 +309,9 @@
                return EXIT_KERNEL_TFR_SOCKET;
        }
 
+       /* memset to fix valgrind complaint */
+       memset(&addr, 0, sizeof(struct sockaddr_nl));
+
        addr.nl_family = AF_NETLINK;
        addr.nl_groups = 0x4;
        addr.nl_pid = 0;
--- cluster/cmirror/src/Attic/queues.c  2008/01/25 16:24:47     1.1.2.5
+++ cluster/cmirror/src/Attic/queues.c  2008/02/06 23:03:05     1.1.2.6
@@ -79,21 +79,23 @@
        return 0;
 }
 
-int queue_status(void)
+int queue_status(int output_wanted)
 {
        int i=1;
        struct clog_tfr *tfr;
        struct list_head *p, *n;
 
-       LOG_DBG("cluster_queue: %d", cluster_queue->count);
-       list_for_each_safe(p, n, &cluster_queue->list) {
-               tfr = (struct clog_tfr *)p;
-               LOG_DBG("  %d) %s, originator = %u",
-                       i++, RQ_TYPE(tfr->request_type),
-                       tfr->originator);
-       }
+       if (output_wanted) {
+               LOG_PRINT("cluster_queue: %d", cluster_queue->count);
+               list_for_each_safe(p, n, &cluster_queue->list) {
+                       tfr = (struct clog_tfr *)p;
+                       LOG_PRINT("  %d) %s, originator = %u",
+                                 i++, RQ_TYPE(tfr->request_type),
+                                 tfr->originator);
+               }
                
-       LOG_DBG("free_queue   : %d", free_queue->count);
+               LOG_PRINT("free_queue   : %d", free_queue->count);
+       }
 
        return cluster_queue->count;
 }
--- cluster/cmirror/src/Attic/queues.h  2008/01/23 21:21:06     1.1.2.3
+++ cluster/cmirror/src/Attic/queues.h  2008/02/06 23:03:05     1.1.2.4
@@ -17,7 +17,7 @@
 
 int init_queues(void);
 void cleanup_queues(void);
-int queue_status(void);
+int queue_status(int);
 void queue_add_tail(struct clog_tfr *tfr, struct queue *q);
 void queue_add(struct clog_tfr *tfr, struct queue *q);
 struct clog_tfr *queue_remove(struct queue *q);

Reply via email to