Hello,

this patch adds deadlock detection (based on simple timeout) to current code. If (probable) deadlock is detected, current action is stopped with proper error.

It properly detects "Simo's deadlock" with 'connections' parameter == 1. (Described in https://fedorahosted.org/bind-dyndb-ldap/ticket/66)

Deadlock itself will be fixed by separate patch.

Petr^2 Spacek
>From ea961e11cbf67f5493f95ef47d317ad0c90ac0ba Mon Sep 17 00:00:00 2001
From: Petr Spacek <pspa...@redhat.com>
Date: Tue, 24 Apr 2012 15:09:32 +0200
Subject: [PATCH] Add simple semaphore deadlock detection logic.
 Signed-off-by: Petr Spacek <pspa...@redhat.com>

---
 src/ldap_helper.c |   61 +++++++++++++++++++++++++++++++---------------------
 src/semaphore.c   |   26 +++++++++++++++++++---
 src/semaphore.h   |    6 ++++-
 3 files changed, 63 insertions(+), 30 deletions(-)

diff --git a/src/ldap_helper.c b/src/ldap_helper.c
index 47c0559..9379c9d 100644
--- a/src/ldap_helper.c
+++ b/src/ldap_helper.c
@@ -296,7 +296,8 @@ ldap_delete_zone2(ldap_instance_t *inst, dns_name_t *name, isc_boolean_t lock);
 static isc_result_t ldap_pool_create(isc_mem_t *mctx, unsigned int connections,
 		ldap_pool_t **poolp);
 static void ldap_pool_destroy(ldap_pool_t **poolp);
-static ldap_connection_t * ldap_pool_getconnection(ldap_pool_t *pool);
+static isc_result_t ldap_pool_getconnection(ldap_pool_t *pool,
+		ldap_connection_t ** conn);
 static void ldap_pool_putconnection(ldap_pool_t *pool,
 		ldap_connection_t *ldap_conn);
 static isc_result_t ldap_pool_connect(ldap_pool_t *pool,
@@ -402,6 +403,10 @@ new_ldap_instance(isc_mem_t *mctx, const char *db_name,
 	ldap_settings[i++].target = &ldap_inst->dyn_update;
 	CHECK(set_settings(ldap_settings, argv));
 
+	/* Set timer for deadlock detection inside semaphore_wait_timed. */
+	if (semaphore_wait_timeout.seconds < ldap_inst->timeout+SEM_WAIT_TIMEOUT_ADD)
+		semaphore_wait_timeout.seconds = ldap_inst->timeout+SEM_WAIT_TIMEOUT_ADD;
+
 	/* Validate and check settings. */
 	str_toupper(ldap_inst->sasl_mech);
 	if (ldap_inst->connections < 1) {
@@ -1089,7 +1094,7 @@ isc_result_t
 refresh_zones_from_ldap(ldap_instance_t *ldap_inst)
 {
 	isc_result_t result = ISC_R_SUCCESS;
-	ldap_connection_t *ldap_conn;
+	ldap_connection_t *ldap_conn = NULL;
 	int zone_count = 0;
 	ldap_entry_t *entry;
 	dns_rbt_t *rbt = NULL;
@@ -1114,7 +1119,7 @@ refresh_zones_from_ldap(ldap_instance_t *ldap_inst)
 
 	log_debug(2, "refreshing list of zones for %s", ldap_inst->db_name);
 
-	ldap_conn = ldap_pool_getconnection(ldap_inst->pool);
+	CHECK(ldap_pool_getconnection(ldap_inst->pool, &ldap_conn));
 
 	CHECK(ldap_query(ldap_inst, ldap_conn, str_buf(ldap_inst->base),
 			 LDAP_SCOPE_SUBTREE, config_attrs, 0,
@@ -1381,7 +1386,7 @@ ldapdb_nodelist_get(isc_mem_t *mctx, ldap_instance_t *ldap_inst, dns_name_t *nam
 		     dns_name_t *origin, ldapdb_nodelist_t *nodelist)
 {
 	isc_result_t result;
-	ldap_connection_t *ldap_conn;
+	ldap_connection_t *ldap_conn = NULL;
 	ldap_entry_t *entry;
 	ld_string_t *string = NULL;
 	ldapdb_node_t *node;
@@ -1392,7 +1397,7 @@ ldapdb_nodelist_get(isc_mem_t *mctx, ldap_instance_t *ldap_inst, dns_name_t *nam
 	REQUIRE(nodelist != NULL);
 
 	/* RRs aren't in the cache, perform ordinary LDAP query */
-	ldap_conn = ldap_pool_getconnection(ldap_inst->pool);
+	CHECK(ldap_pool_getconnection(ldap_inst->pool, &ldap_conn));
 
 	INIT_LIST(*nodelist);
 	CHECK(str_new(mctx, &string));
@@ -1450,7 +1455,7 @@ ldapdb_rdatalist_get(isc_mem_t *mctx, ldap_instance_t *ldap_inst, dns_name_t *na
 		     dns_name_t *origin, ldapdb_rdatalist_t *rdatalist)
 {
 	isc_result_t result;
-	ldap_connection_t *ldap_conn;
+	ldap_connection_t *ldap_conn = NULL;
 	ldap_entry_t *entry;
 	ld_string_t *string = NULL;
 	ldap_cache_t *cache;
@@ -1468,12 +1473,11 @@ ldapdb_rdatalist_get(isc_mem_t *mctx, ldap_instance_t *ldap_inst, dns_name_t *na
 		return result;
 
 	/* RRs aren't in the cache, perform ordinary LDAP query */
-	ldap_conn = ldap_pool_getconnection(ldap_inst->pool);
-
 	INIT_LIST(*rdatalist);
 	CHECK(str_new(mctx, &string));
 	CHECK(dnsname_to_dn(ldap_inst->zone_register, name, string));
 
+	CHECK(ldap_pool_getconnection(ldap_inst->pool, &ldap_conn));
 	CHECK(ldap_query(ldap_inst, ldap_conn, str_buf(string),
 			 LDAP_SCOPE_BASE, NULL, 0, "(objectClass=idnsRecord)"));
 
@@ -2250,7 +2254,7 @@ modify_ldap_common(dns_name_t *owner, ldap_instance_t *ldap_inst,
 		zone_dn += 1; /* skip whitespace */
 	}
 
-	ldap_conn = ldap_pool_getconnection(ldap_inst->pool);
+	CHECK(ldap_pool_getconnection(ldap_inst->pool, &ldap_conn));
 	CHECK(ldap_query(ldap_inst, ldap_conn, zone_dn,
 					 LDAP_SCOPE_BASE, attrs, 0,
 					 "(&(objectClass=idnsZone)(idnsZoneActive=TRUE))"));
@@ -2481,9 +2485,7 @@ modify_ldap_common(dns_name_t *owner, ldap_instance_t *ldap_inst,
 	}
 	
 cleanup:
-	if (ldap_conn != NULL)
-		ldap_pool_putconnection(ldap_inst->pool, ldap_conn);
-
+	ldap_pool_putconnection(ldap_inst->pool, ldap_conn);
 	str_destroy(&owner_dn_ptr);
 	str_destroy(&owner_dn);
 	free_ldapmod(mctx, &change[0]);
@@ -2565,15 +2567,18 @@ ldap_pool_destroy(ldap_pool_t **poolp)
 	MEM_PUT_AND_DETACH(pool);
 }
 
-static ldap_connection_t *
-ldap_pool_getconnection(ldap_pool_t *pool)
+static isc_result_t
+ldap_pool_getconnection(ldap_pool_t *pool, ldap_connection_t ** conn)
 {
 	ldap_connection_t *ldap_conn = NULL;
 	unsigned int i;
+	isc_result_t result;
 
 	REQUIRE(pool != NULL);
+	REQUIRE(conn != NULL && *conn == NULL);
+	ldap_conn = *conn;
 
-	semaphore_wait(&pool->conn_semaphore);
+	CHECK(semaphore_wait_timed(&pool->conn_semaphore));
 	for (i = 0; i < pool->connections; i++) {
 		ldap_conn = pool->conns[i];
 		if (isc_mutex_trylock(&ldap_conn->lock) == ISC_R_SUCCESS)
@@ -2583,13 +2588,22 @@ ldap_pool_getconnection(ldap_pool_t *pool)
 	RUNTIME_CHECK(ldap_conn != NULL);
 
 	INIT_LIST(ldap_conn->ldap_entries);
+	*conn = ldap_conn;
 
-	return ldap_conn;
+cleanup:
+	if (result != ISC_R_SUCCESS) {
+		log_error("timeout in ldap_pool_getconnection(): try to raise "
+				"'connections' parameter; potential deadlock?");
+	}
+	return result;
 }
 
 static void
 ldap_pool_putconnection(ldap_pool_t *pool, ldap_connection_t *ldap_conn)
 {
+	if (ldap_conn == NULL)
+		return;
+
 	ldap_query_free(ldap_conn);
 	UNLOCK(&ldap_conn->lock);
 	semaphore_signal(&pool->conn_semaphore);
@@ -2719,7 +2733,7 @@ update_action(isc_task_t *task, isc_event_t *event)
 	ldap_psearchevent_t *pevent = (ldap_psearchevent_t *)event;
 	isc_result_t result ;
 	ldap_instance_t *inst = NULL;
-	ldap_connection_t *conn;
+	ldap_connection_t *conn = NULL;
 	ldap_entry_t *entry;
 	isc_boolean_t delete = ISC_TRUE;
 	isc_mem_t *mctx;
@@ -2736,7 +2750,7 @@ update_action(isc_task_t *task, isc_event_t *event)
 	if (result != ISC_R_SUCCESS)
 		goto cleanup;
 
-	conn = ldap_pool_getconnection(inst->pool);
+	CHECK(ldap_pool_getconnection(inst->pool, &conn));
 
 	CHECK(ldap_query(inst, conn, pevent->dn,
 			 LDAP_SCOPE_BASE, attrs, 0,
@@ -2754,14 +2768,13 @@ update_action(isc_task_t *task, isc_event_t *event)
 	if (delete)
 		CHECK(ldap_delete_zone(inst, pevent->dn, ISC_TRUE));
 
-        ldap_pool_putconnection(inst->pool, conn);
-
 cleanup:
 	if (result != ISC_R_SUCCESS)
 		log_error("update_action (psearch) failed for %s. "
 			  "Zones can be outdated, run `rndc reload`",
 			  pevent->dn);
 
+	ldap_pool_putconnection(inst->pool, conn);
 	isc_mem_free(mctx, pevent->dbname);
 	isc_mem_free(mctx, pevent->dn);
 	isc_mem_detach(&mctx);
@@ -2790,7 +2803,7 @@ update_config(isc_task_t *task, isc_event_t *event)
 	if (result != ISC_R_SUCCESS)
 		goto cleanup;
 
-	conn = ldap_pool_getconnection(inst->pool);
+	CHECK(ldap_pool_getconnection(inst->pool, &conn));
 
 	CHECK(ldap_query(inst, conn, pevent->dn,
 			 LDAP_SCOPE_BASE, attrs, 0,
@@ -2809,14 +2822,12 @@ update_config(isc_task_t *task, isc_event_t *event)
 
 
 cleanup:
-	if (conn != NULL)
-		ldap_pool_putconnection(inst->pool, conn);
-
 	if (result != ISC_R_SUCCESS)
 		log_error("update_config (psearch) failed for %s. "
 			  "Configuration can be outdated, run `rndc reload`",
 			  pevent->dn);
 
+	ldap_pool_putconnection(inst->pool, conn);
 	isc_mem_free(mctx, pevent->dbname);
 	isc_mem_free(mctx, pevent->dn);
 	isc_mem_detach(&mctx);
@@ -3079,7 +3090,7 @@ ldap_psearch_watcher(isc_threadarg_t arg)
 	tv.tv_usec = 0;
 
 	/* Pick connection, one is reserved purely for this thread */
-	conn = ldap_pool_getconnection(inst->pool);
+	CHECK(ldap_pool_getconnection(inst->pool, &conn));
 
 	/* Try to connect. */
 	while (conn->handle == NULL) {
diff --git a/src/semaphore.c b/src/semaphore.c
index 41d6a30..76e6aa2 100644
--- a/src/semaphore.c
+++ b/src/semaphore.c
@@ -27,8 +27,19 @@
 #include <isc/condition.h>
 #include <isc/result.h>
 #include <isc/util.h>
+#include <isc/time.h>
 
 #include "semaphore.h"
+#include "util.h"
+
+/*
+ * Timer setting for deadlock detection. Format: seconds, nanoseconds.
+ * These values will be overwriten during initialization
+ * from set_settings() with max(setting+SEM_WAIT_TIMEOUT_ADD, curr_value).
+ *
+ * Initial value can be useful in early phases of initialization.
+ */
+isc_interval_t semaphore_wait_timeout = { 3, 0 };
 
 /*
  * Initialize a semaphore.
@@ -74,20 +85,27 @@ semaphore_destroy(semaphore_t *sem)
 /*
  * Wait on semaphore. This operation will try to acquire a lock on the
  * semaphore. If the semaphore is already acquired as many times at it allows,
- * the function will block until someone releases the lock.
+ * the function will block until someone releases the lock OR timeout expire.
+ *
+ * @return ISC_R_SUCCESS or ISC_R_TIMEDOUT or other errors from ISC libs
  */
-void
-semaphore_wait(semaphore_t *sem)
+isc_result_t
+semaphore_wait_timed(semaphore_t *sem)
 {
+	isc_result_t result;
+	isc_time_t abs_timeout;
 	REQUIRE(sem != NULL);
 
 	LOCK(&sem->mutex);
+	CHECK(isc_time_nowplusinterval(&abs_timeout, &semaphore_wait_timeout));
 
 	while (sem->value <= 0)
-		WAIT(&sem->cond, &sem->mutex);
+		CHECK(WAITUNTIL(&sem->cond, &sem->mutex, &abs_timeout));
 	sem->value--;
 
+cleanup:
 	UNLOCK(&sem->mutex);
+	return result;
 }
 
 /*
diff --git a/src/semaphore.h b/src/semaphore.h
index 4ca4f65..5aef5d2 100644
--- a/src/semaphore.h
+++ b/src/semaphore.h
@@ -24,6 +24,10 @@
 #include <isc/condition.h>
 #include <isc/mutex.h>
 
+/* How many seconds will be added to user-defined connection parameter 'timeout'. */
+#define SEM_WAIT_TIMEOUT_ADD 2 /* seconds */
+extern isc_interval_t semaphore_wait_timeout;
+
 /*
  * Semaphore can be "acquired" multiple times. However, it has a maximum
  * number of times someone can acquire him. If a semaphore is already acquired
@@ -40,7 +44,7 @@ typedef struct semaphore	semaphore_t;
 /* Public functions. */
 isc_result_t	semaphore_init(semaphore_t *sem, int value);
 void		semaphore_destroy(semaphore_t *sem);
-void		semaphore_wait(semaphore_t *sem);
+isc_result_t	semaphore_wait_timed(semaphore_t *sem);
 void		semaphore_signal(semaphore_t *sem);
 
 #endif /* !_LD_SEMAPHORE_H_ */
-- 
1.7.7.6

_______________________________________________
Freeipa-devel mailing list
Freeipa-devel@redhat.com
https://www.redhat.com/mailman/listinfo/freeipa-devel

Reply via email to