Author: vlendec Date: 2007-08-29 11:46:44 +0000 (Wed, 29 Aug 2007) New Revision: 24773
WebSVN: http://websvn.samba.org/cgi-bin/viewcvs.cgi?view=rev&root=samba&rev=24773 Log: Fix a ctdb connection lockup The lockup could happen when packet_read_sync() gets two packets in a row, the first one being an async message, and the second one being the response to a ctdb request. Also add some debug msg to ctdb_conn.c, and cut off the "locking key" messages to only dump 20 hex chars at debug level 10. >10 will dump everything. Modified: branches/SAMBA_3_2/source/lib/ctdbd_conn.c branches/SAMBA_3_2/source/lib/dbwrap_ctdb.c branches/SAMBA_3_2/source/lib/dbwrap_tdb.c branches/SAMBA_3_2_0/source/lib/ctdbd_conn.c branches/SAMBA_3_2_0/source/lib/dbwrap_ctdb.c branches/SAMBA_3_2_0/source/lib/dbwrap_tdb.c Changeset: Modified: branches/SAMBA_3_2/source/lib/ctdbd_conn.c =================================================================== --- branches/SAMBA_3_2/source/lib/ctdbd_conn.c 2007-08-29 11:04:49 UTC (rev 24772) +++ branches/SAMBA_3_2/source/lib/ctdbd_conn.c 2007-08-29 11:46:44 UTC (rev 24773) @@ -61,6 +61,20 @@ } /* + * + */ +static void ctdb_packet_dump(struct ctdb_req_header *hdr) +{ + if (DEBUGLEVEL < 10) { + return; + } + DEBUGADD(10, ("len=%d, magic=%x, vers=%d, gen=%d, op=%d, reqid=%d\n", + (int)hdr->length, (int)hdr->ctdb_magic, + (int)hdr->ctdb_version, (int)hdr->generation, + (int)hdr->operation, (int)hdr->reqid)); +} + +/* * Register a srvid with ctdbd */ static NTSTATUS register_with_ctdbd(struct ctdbd_connection *conn, @@ -249,6 +263,11 @@ &blob, result, result, (ndr_pull_flags_fn_t)ndr_pull_messaging_rec); + if (DEBUGLEVEL >= 10) { + DEBUG(10, ("ctdb_pull_messaging_rec:\n")); + NDR_PRINT_DEBUG(messaging_rec, result); + } + if (!NT_STATUS_IS_OK(status)) { DEBUG(0, ("ndr_pull_struct_blob failed: %s\n", nt_errstr(status))); @@ -288,6 +307,8 @@ cluster_fatal("ctdbd died\n"); } + next_pkt: + ZERO_STRUCT(state); state.mem_ctx = mem_ctx; @@ -296,6 +317,7 @@ /* * Not enough data */ + DEBUG(10, ("not enough data from ctdb socket, retrying\n")); goto again; } @@ -306,6 +328,9 @@ hdr = (struct ctdb_req_header *)state.req.data; + DEBUG(10, ("Received ctdb packet\n")); + ctdb_packet_dump(hdr); + if (hdr->operation == CTDB_REQ_MESSAGE) { struct timed_event *evt; struct deferred_msg_state *msg_state; @@ -315,7 +340,7 @@ DEBUG(1, ("Got a message without having a msg ctx, " "dropping msg %llu\n", (long long unsigned)msg->srvid)); - goto again; + goto next_pkt; } if ((conn->release_ip_handler != NULL) @@ -325,13 +350,13 @@ conn->release_ip_handler((const char *)msg->data, conn->release_ip_priv); TALLOC_FREE(hdr); - goto again; + goto next_pkt; } if (!(msg_state = TALLOC_P(NULL, struct deferred_msg_state))) { DEBUG(0, ("talloc failed\n")); TALLOC_FREE(hdr); - goto again; + goto next_pkt; } if (!(msg_state->rec = ctdb_pull_messaging_rec( @@ -339,7 +364,7 @@ DEBUG(0, ("ctdbd_pull_messaging_rec failed\n")); TALLOC_FREE(msg_state); TALLOC_FREE(hdr); - goto again; + goto next_pkt; } TALLOC_FREE(hdr); @@ -360,10 +385,10 @@ DEBUG(0, ("event_add_timed failed\n")); TALLOC_FREE(msg_state); TALLOC_FREE(hdr); - goto again; + goto next_pkt; } - goto again; + goto next_pkt; } if (hdr->reqid != reqid) { @@ -622,6 +647,9 @@ r.srvid = dst_srvid; r.datalen = blob.length; + DEBUG(10, ("ctdbd_messaging_send: Sending ctdb packet\n")); + ctdb_packet_dump(&r.hdr); + status = packet_send( conn->pkt, 2, data_blob_const(&r, offsetof(struct ctdb_req_message, data)), @@ -682,6 +710,9 @@ req.srvid = srvid; req.datalen = data.dsize; + DEBUG(10, ("ctdbd_control: Sending ctdb packet\n")); + ctdb_packet_dump(&req.hdr); + status = packet_send( conn->pkt, 2, data_blob_const(&req, offsetof(struct ctdb_req_control, data)), @@ -850,6 +881,9 @@ req.db_id = db_id; req.keylen = key.dsize; + DEBUG(10, ("ctdbd_migrate: Sending ctdb packet\n")); + ctdb_packet_dump(&req.hdr); + status = packet_send( conn->pkt, 2, data_blob_const(&req, offsetof(struct ctdb_req_call, data)), Modified: branches/SAMBA_3_2/source/lib/dbwrap_ctdb.c =================================================================== --- branches/SAMBA_3_2/source/lib/dbwrap_ctdb.c 2007-08-29 11:04:49 UTC (rev 24772) +++ branches/SAMBA_3_2/source/lib/dbwrap_ctdb.c 2007-08-29 11:46:44 UTC (rev 24773) @@ -85,7 +85,9 @@ struct db_ctdb_rec *crec = talloc_get_type_abort( data->private_data, struct db_ctdb_rec); - DEBUG(10, ("Unlocking key %s\n", + DEBUG(10, (DEBUGLEVEL > 10 + ? "Unlocking db %u key %s\n" : "Unlocking db %u key %20s\n", + (int)crec->ctdb_ctx->db_id, hex_encode(data, (unsigned char *)data->key.dptr, data->key.dsize))); @@ -135,9 +137,14 @@ */ again: - DEBUG(10, ("Locking key %s\n", - hex_encode(result, (unsigned char *)key.dptr, - key.dsize))); + if (DEBUGLEVEL >= 10) { + char *keystr = hex_encode(result, key.dptr, key.dsize); + DEBUG(10, (DEBUGLEVEL > 10 + ? "Locking db %u key %s\n" + : "Locking db %u key %20s\n", + (int)crec->ctdb_ctx->db_id, keystr)); + TALLOC_FREE(keystr); + } if (tdb_chainlock(ctx->wtdb->tdb, key) != 0) { DEBUG(3, ("tdb_chainlock failed\n")); Modified: branches/SAMBA_3_2/source/lib/dbwrap_tdb.c =================================================================== --- branches/SAMBA_3_2/source/lib/dbwrap_tdb.c 2007-08-29 11:04:49 UTC (rev 24772) +++ branches/SAMBA_3_2/source/lib/dbwrap_tdb.c 2007-08-29 11:46:44 UTC (rev 24773) @@ -31,7 +31,8 @@ struct db_tdb_ctx *ctx = talloc_get_type_abort(data->private_data, struct db_tdb_ctx); - DEBUG(10, ("Unlocking key %s\n", + DEBUG(10, (DEBUGLEVEL > 10 + ? "Unlocking key %s\n" : "Unlocking key %20s\n", hex_encode(data, (unsigned char *)data->key.dptr, data->key.dsize))); @@ -70,10 +71,12 @@ result->store = db_tdb_store; result->delete_rec = db_tdb_delete; - { + if (DEBUGLEVEL > 10) { char *keystr = hex_encode(NULL, (unsigned char *)key.dptr, key.dsize); - DEBUG(10, ("Locking key %s\n", keystr)); + DEBUG(10, (DEBUGLEVEL > 10 + ? "Locking key %s\n" : "Locking key %20s\n", + keystr)); TALLOC_FREE(keystr); } Modified: branches/SAMBA_3_2_0/source/lib/ctdbd_conn.c =================================================================== --- branches/SAMBA_3_2_0/source/lib/ctdbd_conn.c 2007-08-29 11:04:49 UTC (rev 24772) +++ branches/SAMBA_3_2_0/source/lib/ctdbd_conn.c 2007-08-29 11:46:44 UTC (rev 24773) @@ -61,6 +61,20 @@ } /* + * + */ +static void ctdb_packet_dump(struct ctdb_req_header *hdr) +{ + if (DEBUGLEVEL < 10) { + return; + } + DEBUGADD(10, ("len=%d, magic=%x, vers=%d, gen=%d, op=%d, reqid=%d\n", + (int)hdr->length, (int)hdr->ctdb_magic, + (int)hdr->ctdb_version, (int)hdr->generation, + (int)hdr->operation, (int)hdr->reqid)); +} + +/* * Register a srvid with ctdbd */ static NTSTATUS register_with_ctdbd(struct ctdbd_connection *conn, @@ -249,6 +263,11 @@ &blob, result, result, (ndr_pull_flags_fn_t)ndr_pull_messaging_rec); + if (DEBUGLEVEL >= 10) { + DEBUG(10, ("ctdb_pull_messaging_rec:\n")); + NDR_PRINT_DEBUG(messaging_rec, result); + } + if (!NT_STATUS_IS_OK(status)) { DEBUG(0, ("ndr_pull_struct_blob failed: %s\n", nt_errstr(status))); @@ -288,6 +307,8 @@ cluster_fatal("ctdbd died\n"); } + next_pkt: + ZERO_STRUCT(state); state.mem_ctx = mem_ctx; @@ -296,6 +317,7 @@ /* * Not enough data */ + DEBUG(10, ("not enough data from ctdb socket, retrying\n")); goto again; } @@ -306,6 +328,9 @@ hdr = (struct ctdb_req_header *)state.req.data; + DEBUG(10, ("Received ctdb packet\n")); + ctdb_packet_dump(hdr); + if (hdr->operation == CTDB_REQ_MESSAGE) { struct timed_event *evt; struct deferred_msg_state *msg_state; @@ -315,7 +340,7 @@ DEBUG(1, ("Got a message without having a msg ctx, " "dropping msg %llu\n", (long long unsigned)msg->srvid)); - goto again; + goto next_pkt; } if ((conn->release_ip_handler != NULL) @@ -325,13 +350,13 @@ conn->release_ip_handler((const char *)msg->data, conn->release_ip_priv); TALLOC_FREE(hdr); - goto again; + goto next_pkt; } if (!(msg_state = TALLOC_P(NULL, struct deferred_msg_state))) { DEBUG(0, ("talloc failed\n")); TALLOC_FREE(hdr); - goto again; + goto next_pkt; } if (!(msg_state->rec = ctdb_pull_messaging_rec( @@ -339,7 +364,7 @@ DEBUG(0, ("ctdbd_pull_messaging_rec failed\n")); TALLOC_FREE(msg_state); TALLOC_FREE(hdr); - goto again; + goto next_pkt; } TALLOC_FREE(hdr); @@ -360,10 +385,10 @@ DEBUG(0, ("event_add_timed failed\n")); TALLOC_FREE(msg_state); TALLOC_FREE(hdr); - goto again; + goto next_pkt; } - goto again; + goto next_pkt; } if (hdr->reqid != reqid) { @@ -622,6 +647,9 @@ r.srvid = dst_srvid; r.datalen = blob.length; + DEBUG(10, ("ctdbd_messaging_send: Sending ctdb packet\n")); + ctdb_packet_dump(&r.hdr); + status = packet_send( conn->pkt, 2, data_blob_const(&r, offsetof(struct ctdb_req_message, data)), @@ -682,6 +710,9 @@ req.srvid = srvid; req.datalen = data.dsize; + DEBUG(10, ("ctdbd_control: Sending ctdb packet\n")); + ctdb_packet_dump(&req.hdr); + status = packet_send( conn->pkt, 2, data_blob_const(&req, offsetof(struct ctdb_req_control, data)), @@ -850,6 +881,9 @@ req.db_id = db_id; req.keylen = key.dsize; + DEBUG(10, ("ctdbd_migrate: Sending ctdb packet\n")); + ctdb_packet_dump(&req.hdr); + status = packet_send( conn->pkt, 2, data_blob_const(&req, offsetof(struct ctdb_req_call, data)), Modified: branches/SAMBA_3_2_0/source/lib/dbwrap_ctdb.c =================================================================== --- branches/SAMBA_3_2_0/source/lib/dbwrap_ctdb.c 2007-08-29 11:04:49 UTC (rev 24772) +++ branches/SAMBA_3_2_0/source/lib/dbwrap_ctdb.c 2007-08-29 11:46:44 UTC (rev 24773) @@ -85,7 +85,9 @@ struct db_ctdb_rec *crec = talloc_get_type_abort( data->private_data, struct db_ctdb_rec); - DEBUG(10, ("Unlocking key %s\n", + DEBUG(10, (DEBUGLEVEL > 10 + ? "Unlocking db %u key %s\n" : "Unlocking db %u key %20s\n", + (int)crec->ctdb_ctx->db_id, hex_encode(data, (unsigned char *)data->key.dptr, data->key.dsize))); @@ -135,9 +137,14 @@ */ again: - DEBUG(10, ("Locking key %s\n", - hex_encode(result, (unsigned char *)key.dptr, - key.dsize))); + if (DEBUGLEVEL >= 10) { + char *keystr = hex_encode(result, key.dptr, key.dsize); + DEBUG(10, (DEBUGLEVEL > 10 + ? "Locking db %u key %s\n" + : "Locking db %u key %20s\n", + (int)crec->ctdb_ctx->db_id, keystr)); + TALLOC_FREE(keystr); + } if (tdb_chainlock(ctx->wtdb->tdb, key) != 0) { DEBUG(3, ("tdb_chainlock failed\n")); Modified: branches/SAMBA_3_2_0/source/lib/dbwrap_tdb.c =================================================================== --- branches/SAMBA_3_2_0/source/lib/dbwrap_tdb.c 2007-08-29 11:04:49 UTC (rev 24772) +++ branches/SAMBA_3_2_0/source/lib/dbwrap_tdb.c 2007-08-29 11:46:44 UTC (rev 24773) @@ -31,7 +31,8 @@ struct db_tdb_ctx *ctx = talloc_get_type_abort(data->private_data, struct db_tdb_ctx); - DEBUG(10, ("Unlocking key %s\n", + DEBUG(10, (DEBUGLEVEL > 10 + ? "Unlocking key %s\n" : "Unlocking key %20s\n", hex_encode(data, (unsigned char *)data->key.dptr, data->key.dsize))); @@ -70,9 +71,11 @@ result->store = db_tdb_store; result->delete_rec = db_tdb_delete; - { + if (DEBUGLEVEL > 10) { char *keystr = hex_encode(NULL, key.dptr, key.dsize); - DEBUG(10, ("Locking key %s\n", keystr)); + DEBUG(10, (DEBUGLEVEL > 10 + ? "Locking key %s\n" : "Locking key %20s\n", + keystr)); TALLOC_FREE(keystr); }