CVSROOT: /cvs/cluster Module name: cluster Branch: RHEL5 Changes by: [EMAIL PROTECTED] 2008-02-06 23:03:05
Modified files: cmirror/src : clogd.c cluster.c functions.c functions.h local.c queues.c queues.h Log message: - change verbosity of various messages - fix bug where similtaneous mirror creations could result in checkpoint collisions Patches: http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/clogd.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.2.5&r2=1.1.2.6 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/cluster.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.2.14&r2=1.1.2.15 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/functions.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.2.12&r2=1.1.2.13 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/functions.h.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.2.3&r2=1.1.2.4 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/local.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.2.12&r2=1.1.2.13 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/queues.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.2.5&r2=1.1.2.6 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/queues.h.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.2.3&r2=1.1.2.4 --- cluster/cmirror/src/Attic/clogd.c 2008/01/23 21:21:06 1.1.2.5 +++ cluster/cmirror/src/Attic/clogd.c 2008/02/06 23:03:05 1.1.2.6 @@ -120,9 +120,13 @@ case SIGQUIT: case SIGTERM: case SIGHUP: - r = queue_status(); - r += log_status(); + r = queue_status(0); + r += log_status(0); break; + case SIGUSR1: + queue_status(1); + log_status(1); + return; default: LOG_PRINT("Unknown signal received... ignoring"); return; @@ -229,6 +233,7 @@ signal(SIGTERM, &sig_handler); signal(SIGHUP, &sig_handler); signal(SIGPIPE, SIG_IGN); + signal(SIGUSR1, &sig_handler); sigemptyset(&signal_mask); signal_received = 0; } --- cluster/cmirror/src/Attic/cluster.c 2008/02/05 23:25:51 1.1.2.14 +++ cluster/cmirror/src/Attic/cluster.c 2008/02/06 23:03:05 1.1.2.15 @@ -23,7 +23,7 @@ #define DM_CLOG_CHECKPOINT_READY ((uint32_t)-1) static uint32_t my_cluster_id = 0xDEAD; -static SaCkptHandleT ckpt_handle; +static SaCkptHandleT ckpt_handle = 0; static SaCkptCallbacksT callbacks = { 0, 0 }; static SaVersionT version = { 'B', 1, 1 }; @@ -212,7 +212,7 @@ list_for_each_safe(p, n, &l) { list_del_init(p); t = (struct clog_tfr *)p; - LOG_ERROR("[%s] %s:%llu", SHORT_UUID(t->uuid), + LOG_ERROR(" [%s] %s:%llu", SHORT_UUID(t->uuid), RQ_TYPE(t->request_type), (unsigned long long)t->seq); queue_add(t, cluster_queue); @@ -330,7 +330,8 @@ ENTER(); LOG_DBG("Sending checkpointed data to %u", cp->requester); - len = snprintf((char *)(name.value), SA_MAX_NAME_LENGTH, "bitmaps_%u", cp->requester); + len = snprintf((char *)(name.value), SA_MAX_NAME_LENGTH, "bitmaps_%s_%u", + SHORT_UUID(cp->uuid), cp->requester); name.length = len; attr.creationFlags = SA_CKPT_WR_ALL_REPLICAS; @@ -465,7 +466,8 @@ if (!bitmap) return -ENOMEM; - len = snprintf((char *)(name.value), SA_MAX_NAME_LENGTH, "bitmaps_%u", my_cluster_id); + len = snprintf((char *)(name.value), SA_MAX_NAME_LENGTH, "bitmaps_%s_%u", + SHORT_UUID(entry->name.value), my_cluster_id); name.length = len; open_retry: @@ -620,7 +622,8 @@ */ switch (export_checkpoint(cp)) { case -EEXIST: - LOG_DBG("Checkpoint already handled by someone else"); + LOG_DBG("[%s] Checkpoint for %u already handled by someone else", + SHORT_UUID(entry->name.value), cp->requester); case 0: entry->checkpoint_list = cp->next; free_checkpoint(cp); @@ -663,13 +666,13 @@ if (my_cluster_id == 0xDEAD) { LOG_DBG("Message before init... ignoring.\n"); - goto out; + return; } match = find_clog_cpg(handle); if (!match) { LOG_ERROR("Unable to find clog_cpg for cluster message"); - goto out; + return; } i_am_server = (my_cluster_id == match->lowest_id) ? 1 : 0; @@ -681,7 +684,8 @@ /* Could we retry? */ goto out; } else if (!match->valid) { - LOG_DBG("Checkpoint data recieved. Log is now valid"); + LOG_DBG("[%s] Checkpoint data recieved. Log is now valid", + SHORT_UUID(match->name.value)); match->valid = 1; while ((startup_tfr = queue_remove(match->startup_queue))) { LOG_DBG("Processing delayed request %d: %s", @@ -757,9 +761,12 @@ } out: - if (r) - LOG_ERROR("Error while processing CPG message"); - + if (r) { + LOG_ERROR("[%s] Error while processing CPG message, %s: %d", + SHORT_UUID(tfr->uuid), + RQ_TYPE(tfr->request_type & ~DM_CLOG_RESPONSE), + r); + } EXIT(); } @@ -772,6 +779,7 @@ int my_pid = getpid(); int found = 0; struct clog_cpg *match, *tmp; + uint32_t lowest; ENTER(); @@ -844,8 +852,14 @@ cpg_finalize(match->handle); - if (match->startup_queue->count) - LOG_ERROR("Startup items remain in cluster log"); + if (match->startup_queue->count) { + LOG_ERROR("%d startup items remain in cluster log", + match->startup_queue->count); + while (!queue_empty(match->startup_queue)) { + tfr = queue_remove(match->startup_queue); + queue_add(tfr, free_queue); + } + } free(match->startup_queue); match->free_me = 1; @@ -857,8 +871,8 @@ if (!left_list_entries && (member_list_entries == 1) && (joined_list_entries == 1) && (member_list[0].nodeid == joined_list[0].nodeid)) { - LOG_DBG("I am the log server (and first to join) for %s", - match->name.value); + LOG_DBG("[%s] I am the log server (and first to join)", + SHORT_UUID(match->name.value)); match->lowest_id = my_cluster_id = joined_list[0].nodeid; match->valid = 1; goto out; @@ -880,13 +894,16 @@ } } + lowest = match->lowest_id; /* Find the lowest_id, i.e. the server */ for (i = 0, match->lowest_id = member_list[0].nodeid; i < member_list_entries; i++) if (match->lowest_id > member_list[i].nodeid) match->lowest_id = member_list[i].nodeid; - LOG_DBG("Server is now %u", match->lowest_id); + if (lowest != match->lowest_id) + LOG_DBG("[%s] Server is now %u", SHORT_UUID(match->name.value), + match->lowest_id); /* * If I am part of the joining list, I do not send checkpoints --- cluster/cmirror/src/Attic/functions.c 2008/02/05 22:12:54 1.1.2.12 +++ cluster/cmirror/src/Attic/functions.c 2008/02/06 23:03:05 1.1.2.13 @@ -432,7 +432,7 @@ if ((dup = get_log(lc->uuid)) || (dup = get_pending_log(lc->uuid))) { - LOG_PRINT("[%s] Inc reference count on cluster log", + LOG_DBG("[%s] Inc reference count on cluster log", SHORT_UUID(lc->uuid)); free(lc); dup->ref_count++; @@ -574,7 +574,7 @@ if (r) LOG_ERROR("Failed to create cluster log (%s)", tfr->uuid); else - LOG_PRINT("[%s] Cluster log created", + LOG_DBG("[%s] Cluster log created", SHORT_UUID(tfr->uuid)); return r; @@ -608,12 +608,12 @@ } if (lc->ref_count) { - LOG_PRINT("[%s] Dec reference count on cluster log", + LOG_DBG("[%s] Dec reference count on cluster log", SHORT_UUID(lc->uuid)); return 0; } - LOG_PRINT("[%s] Cluster log removed", SHORT_UUID(lc->uuid)); + LOG_DBG("[%s] Cluster log removed", SHORT_UUID(lc->uuid)); list_del_init(&lc->list); if (lc->disk_fd != -1) @@ -660,7 +660,7 @@ if (!lc) return -EINVAL; - LOG_PRINT("[%s] clog_postsuspend: leaving CPG", SHORT_UUID(lc->uuid)); + LOG_DBG("[%s] clog_postsuspend: leaving CPG", SHORT_UUID(lc->uuid)); destroy_cluster_cpg(tfr->uuid); return 0; @@ -678,7 +678,7 @@ if (!lc) return -EINVAL; - LOG_PRINT("[%s] clog_postsuspend: finalizing", SHORT_UUID(lc->uuid)); + LOG_DBG("[%s] clog_postsuspend: finalizing", SHORT_UUID(lc->uuid)); lc->resume_override = 0; /* move log to pending list */ @@ -712,7 +712,7 @@ SHORT_UUID(tfr->uuid)); return 0; case 0: - LOG_PRINT("[%s] Master resume: reading disk log", + LOG_DBG("[%s] Master resume: reading disk log", SHORT_UUID(lc->uuid)); lc->resume_override = 1000; break; @@ -723,7 +723,7 @@ LOG_ERROR("Error:: partial bit loading (just clean_bits)"); return -EINVAL; case 3: - LOG_PRINT("[%s] Non-master resume: bits pre-loaded", + LOG_DBG("[%s] Non-master resume: bits pre-loaded", SHORT_UUID(lc->uuid)); lc->resume_override = 1000; lc->sync_count = count_bits32(lc->clean_bits, lc->bitset_uint32_count); @@ -1545,7 +1545,7 @@ for (i = 0; i < size; i++) { if (!(i % 16)) { if (outbuf[0] != '\0') - LOG_PRINT("%s", outbuf); + LOG_DBG("%s", outbuf); memset(outbuf, 0, sizeof(outbuf)); sprintf(outbuf, "[%3d - %3d]", i, i+15); } @@ -1636,36 +1636,42 @@ return lc->state; } -int log_status(void) +int log_status(int output_wanted) { int found = 0; struct list_head *l; struct log_c *lc; /* FIXME: Need prefetch to do this right */ - LOG_DBG("Official log list:"); + if (output_wanted) + LOG_PRINT("Official log list:"); __list_for_each(l, &log_list) { found = 1; lc = list_entry(l, struct log_c, list); - LOG_DBG("%s", lc->uuid); - LOG_DBG("sync_bits:"); - print_bits((char *)lc->sync_bits, - lc->bitset_uint32_count * sizeof(*lc->sync_bits)); - LOG_DBG("clean_bits:"); - print_bits((char *)lc->clean_bits, - lc->bitset_uint32_count * sizeof(*lc->clean_bits)); + if (output_wanted) { + LOG_PRINT("%s", lc->uuid); + LOG_DBG("sync_bits:"); + print_bits((char *)lc->sync_bits, + lc->bitset_uint32_count * sizeof(*lc->sync_bits)); + LOG_DBG("clean_bits:"); + print_bits((char *)lc->clean_bits, + lc->bitset_uint32_count * sizeof(*lc->clean_bits)); + } } - LOG_DBG("Pending log list:"); + if (output_wanted) + LOG_PRINT("Pending log list:"); __list_for_each(l, &log_pending_list) { found = 1; lc = list_entry(l, struct log_c, list); - LOG_DBG("%s", lc->uuid); - LOG_DBG("sync_bits:"); - print_bits((char *)lc->sync_bits, - lc->bitset_uint32_count * sizeof(*lc->sync_bits)); - LOG_DBG("clean_bits:"); - print_bits((char *)lc->clean_bits, - lc->bitset_uint32_count * sizeof(*lc->clean_bits)); + if (output_wanted) { + LOG_PRINT("%s", lc->uuid); + LOG_DBG("sync_bits:"); + print_bits((char *)lc->sync_bits, + lc->bitset_uint32_count * sizeof(*lc->sync_bits)); + LOG_DBG("clean_bits:"); + print_bits((char *)lc->clean_bits, + lc->bitset_uint32_count * sizeof(*lc->clean_bits)); + } } return found; } --- cluster/cmirror/src/Attic/functions.h 2008/01/14 22:52:17 1.1.2.3 +++ cluster/cmirror/src/Attic/functions.h 2008/02/06 23:03:05 1.1.2.4 @@ -13,5 +13,5 @@ int store_bits(const char *uuid, const char *which, char **buf); int load_bits(const char *uuid, const char *which, char *buf, int size); int log_get_state(struct clog_tfr *tfr); -int log_status(void); +int log_status(int); #endif /* __CLOG_FUNCTIONS_DOT_H__ */ --- cluster/cmirror/src/Attic/local.c 2008/02/05 22:12:54 1.1.2.12 +++ cluster/cmirror/src/Attic/local.c 2008/02/06 23:03:05 1.1.2.13 @@ -309,6 +309,9 @@ return EXIT_KERNEL_TFR_SOCKET; } + /* memset to fix valgrind complaint */ + memset(&addr, 0, sizeof(struct sockaddr_nl)); + addr.nl_family = AF_NETLINK; addr.nl_groups = 0x4; addr.nl_pid = 0; --- cluster/cmirror/src/Attic/queues.c 2008/01/25 16:24:47 1.1.2.5 +++ cluster/cmirror/src/Attic/queues.c 2008/02/06 23:03:05 1.1.2.6 @@ -79,21 +79,23 @@ return 0; } -int queue_status(void) +int queue_status(int output_wanted) { int i=1; struct clog_tfr *tfr; struct list_head *p, *n; - LOG_DBG("cluster_queue: %d", cluster_queue->count); - list_for_each_safe(p, n, &cluster_queue->list) { - tfr = (struct clog_tfr *)p; - LOG_DBG(" %d) %s, originator = %u", - i++, RQ_TYPE(tfr->request_type), - tfr->originator); - } + if (output_wanted) { + LOG_PRINT("cluster_queue: %d", cluster_queue->count); + list_for_each_safe(p, n, &cluster_queue->list) { + tfr = (struct clog_tfr *)p; + LOG_PRINT(" %d) %s, originator = %u", + i++, RQ_TYPE(tfr->request_type), + tfr->originator); + } - LOG_DBG("free_queue : %d", free_queue->count); + LOG_PRINT("free_queue : %d", free_queue->count); + } return cluster_queue->count; } --- cluster/cmirror/src/Attic/queues.h 2008/01/23 21:21:06 1.1.2.3 +++ cluster/cmirror/src/Attic/queues.h 2008/02/06 23:03:05 1.1.2.4 @@ -17,7 +17,7 @@ int init_queues(void); void cleanup_queues(void); -int queue_status(void); +int queue_status(int); void queue_add_tail(struct clog_tfr *tfr, struct queue *q); void queue_add(struct clog_tfr *tfr, struct queue *q); struct clog_tfr *queue_remove(struct queue *q);