Re: [Pacemaker] [patch] Seeking suggestions for cluster configuration of HA iSCSI target and initiators]

Phil Frost Mon, 16 Jul 2012 12:24:45 -0700

On 07/16/2012 01:34 PM, Phil Frost wrote:

I've been doing some study of the iscsi RA since my first post, and itseems to me now that the "failure" in the monitor action isn'tactually in the monitor action at all. Rather, it appears that for*all* actions, the RA does a "discovery" step, and that's what isfailing. I'm not really sure what this is, or why I need it. Is itsimply to find an unspecified portal for a given IQN? Is it thereforeuseless in my case, since I've explicitly specified the portal in theresource parameters?
If I were to disable the "discovery" step, what are people's thoughtson the case where the target is operational, but the initiator forsome reason (network failure) can't reach it? In this case, assumePacemaker knows the target is up; is there a way to encourage it todecide to attempt migrating the initiator to another node?

Well, after reading through the iscsi RA a dozen times, I could notformulate any reasonable idea of why the discovery step might benecessary. The portal parameter is required, so it couldn't be to locatethe portal. And, there is logic in the discovery function to handle thecase when a target returns multiple portals for the same target -- byfinding the one that was specified in the portal parameter. So it can'treally be discovering anything. It does raise an error in this case ifthe portal parameter isn't specified, but then the portal parameterisn't optional, so that case could never occur. It smelled like rottencode to me.

So, given all that, and given how it introduces a nasty race conditionin the case that the target isn't running (or is just in the process ofmigrating to another node), I decided it was better to just get rid ofit. Patch attached. I suppose I've introduced a different failure inthat an initiator that can't contact a running target won't be migrated,but I'd rather have one of my VMs trying to run, unsuccessfully, andable to automatically recover when the fault is cleared, than have anentire VM host shot in the head on the basis of a race condition innon-failure situations.

One minor nastiness was observed with my patch: if the portal isn'tspecified exactly as udev will format it, then the RA will wait foreverfor the device node to appear, expecting the wrong device filename.Maybe canonicalizing the portal was one useful function of the discoveryfunction, but in my opinion, not worth the other problems.

--- heartbeat/iscsi	2012-07-16 13:10:14.000000000 -0400
+++ macpros/iscsi	2012-07-16 14:50:57.000000000 -0400
@@ -31,7 +31,6 @@
 #	OCF_RESKEY_portal: the iSCSI portal address or host name (required)
 #	OCF_RESKEY_target: the iSCSI target (required)
 #	OCF_RESKEY_iscsiadm: iscsiadm program path (optional)
-#	OCF_RESKEY_discovery_type: discovery type (optional; default: sendtargets)
 #
 # Initialization:
 
@@ -41,11 +40,9 @@
 # Defaults
 OCF_RESKEY_udev_default="yes"
 OCF_RESKEY_iscsiadm_default="iscsiadm"
-OCF_RESKEY_discovery_type_default="sendtargets"
 
 : ${OCF_RESKEY_udev=${OCF_RESKEY_udev_default}}
 : ${OCF_RESKEY_iscsiadm=${OCF_RESKEY_iscsiadm_default}}
-: ${OCF_RESKEY_discovery_type=${OCF_RESKEY_discovery_type_default}}
 
 usage() {
   methods=`iscsi_methods`
@@ -96,15 +93,6 @@
 <content type="string" />
 </parameter>
 
-<parameter name="discovery_type" unique="0" required="0">
-<longdesc lang="en">
-Target discovery type. Check the open-iscsi documentation for
-supported discovery types.
-</longdesc>
-<shortdesc lang="en">Target discovery type</shortdesc>
-<content type="string" default="${OCF_RESKEY_discovery_type_default}" />
-</parameter>
-
 <parameter name="iscsiadm" unique="0" required="0">
 <longdesc lang="en">
 open-iscsi administration utility binary.
@@ -128,8 +116,8 @@
 </parameters>
 
 <actions>
-<action name="start" timeout="120" />
-<action name="stop" timeout="120" />
+<action name="start" timeout="60" />
+<action name="stop" timeout="60" />
 <action name="status" timeout="30" />
 <action name="monitor" depth="0" timeout="30" interval="120" />
 <action name="validate-all" timeout="5" />
@@ -166,7 +154,6 @@
 	fi
 }
 open_iscsi_setup() {
-	discovery=open_iscsi_discovery
 	add_disk=open_iscsi_add
 	remove_disk=open_iscsi_remove
 	disk_status=open_iscsi_status
@@ -179,72 +166,6 @@
 		return $OCF_ERR_INSTALLED
 }
 
-#
-# discovery return codes:
-#   0: ok (variable portal set)
-#   1: target not found
-#   2: target found but can't connect it unambigously
-#   3: iscsiadm returned error
-#
-# open-iscsi >= "2.0-872" changed discovery semantics
-# see http://www.mail-archive.com/open-iscsi@googlegroups.com/msg04883.html
-# there's a new discoverydb command which should be used instead discovery
- 
-open_iscsi_discovery() {
-	local output
-	local severity=err
-	local discovery_variant="discovery"
-	local options=""
-	local cmd
-	local version=`$iscsiadm --version | awk '{print $3}'`
-
-	ocf_version_cmp "$version" "2.0-871"
-	if [ $? -eq 2 ]; then # newer than 2.0-871?
-		discovery_variant="discoverydb"
-		[ "$discovery_type" = "sendtargets" ] &&
-			options="-D"
-	fi
-	cmd="$iscsiadm -m $discovery_variant -p $OCF_RESKEY_portal -t $discovery_type $options"
-	ocf_is_probe && severity=info
-	output=`$cmd`
-	if [ $? -ne 0 -o x = "x$output" ]; then
-		[ x != "x$output" ] && {
-			ocf_log $severity "$cmd FAILED"
-			echo "$output"
-		}
-		return 3
-	fi
-	portal=`echo "$output" |
-		awk -v target="$OCF_RESKEY_target" '
-		$NF==target{
-			if( NF==3 ) portal=$2; # sles compat mode
-			else portal=$1;
-			sub(",.*","",portal);
-			print portal;
-		}'`
-
-	case `echo "$portal" | wc -w` in
-	0) #target not found
-		echo "$output"
-		ocf_log $severity "target $OCF_RESKEY_target not found at portal $OCF_RESKEY_portal"
-		return 1
-	;;
-	1) #we're ok
-		return 0
-	;;
-	*) # handle multihome hosts reporting multiple portals
-		for p in $portal; do
-			if [ "$OCF_RESKEY_portal" = "$p" ]; then
-				portal="$OCF_RESKEY_portal"
-				return 0
-			fi
-		done
-		echo "$output"
-		ocf_log err "sorry, can't handle multihomed hosts unless you specify the portal exactly"
-		return 2
-	;;
-	esac
-}
 open_iscsi_add() {
 	$iscsiadm -m node -p $1 -T $2 -l
 }
@@ -259,7 +180,7 @@
 # NB: this is udev specific!
 #
 wait_for_udev() {
-	dev=/dev/disk/by-path/ip-$portal-iscsi-$OCF_RESKEY_target
+	dev=/dev/disk/by-path/ip-${OCF_RESKEY_portal}-iscsi-$OCF_RESKEY_target
 	while :; do
 		ls $dev* >/dev/null 2>&1 && break
 		ocf_log warning "waiting for udev to create $dev" 
@@ -267,7 +188,7 @@
 	done
 }
 iscsi_status() {
-	if $disk_status $portal $OCF_RESKEY_target; then
+	if $disk_status ${OCF_RESKEY_portal} $OCF_RESKEY_target; then
 		return $OCF_SUCCESS
 	else
 		return $OCF_NOT_RUNNING
@@ -275,10 +196,10 @@
 }
 iscsi_start() {
 	if iscsi_status; then
-		ocf_log info "iscsi $portal $OCF_RESKEY_target already running"
+		ocf_log info "iscsi ${OCF_RESKEY_portal} $OCF_RESKEY_target already running"
 		return $OCF_SUCCESS
 	else
-		$add_disk $portal $OCF_RESKEY_target ||
+		$add_disk ${OCF_RESKEY_portal} $OCF_RESKEY_target ||
 			return $OCF_ERR_GENERIC
 		case "$udev" in
 		[Yy]es) wait_for_udev ||
@@ -295,7 +216,7 @@
 }
 iscsi_stop() {
 	if iscsi_status; then
-		$remove_disk $portal $OCF_RESKEY_target ||
+		$remove_disk ${OCF_RESKEY_portal} $OCF_RESKEY_target ||
 			return $OCF_ERR_GENERIC
 		if iscsi_status; then
 			return $OCF_ERR_GENERIC
@@ -303,13 +224,13 @@
 			return $OCF_SUCCESS
 		fi
 	else
-		ocf_log info "iscsi $portal $OCF_RESKEY_target already stopped"
+		ocf_log info "iscsi ${OCF_RESKEY_portal} $OCF_RESKEY_target already stopped"
 		return $OCF_SUCCESS
 	fi
 }
 
 iscsi_monitor() {
-	if $disk_status $portal $OCF_RESKEY_target; then
+	if $disk_status ${OCF_RESKEY_portal} $OCF_RESKEY_target; then
 		return $OCF_SUCCESS
     else
 		return $OCF_NOT_RUNNING
@@ -371,9 +292,7 @@
 	exit $OCF_ERR_PERM
 fi
 
-discovery_type=${OCF_RESKEY_discovery_type}
 udev=${OCF_RESKEY_udev}
-$discovery  # discover and setup the real portal string (address)
 case $? in
 0) ;;
 1) [ "$1" = stop ] && exit $OCF_SUCCESS

_______________________________________________
Pacemaker mailing list: Pacemaker@oss.clusterlabs.org
http://oss.clusterlabs.org/mailman/listinfo/pacemaker

Project Home: http://www.clusterlabs.org
Getting started: http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf
Bugs: http://bugs.clusterlabs.org

Re: [Pacemaker] [patch] Seeking suggestions for cluster configuration of HA iSCSI target and initiators]

Reply via email to