Roy Golan has uploaded a new change for review.

Change subject: core: Monitoring - refactor Host newtork error handling
......................................................................

core: Monitoring - refactor Host newtork error handling

Host VDS network error should trigger a state change
and a treatment for auto-fencing as outlined in [1]

[1] http://www.ovirt.org/Automatic_Fencing

This handling is done at a single place [2] and is only called
from a single place, the VdsEventListener [3].

[2] VdsManager.handleNetworkException

[3] void onError(@Observes VdsNetworkException networkException)

*All* vds broker commands in action that throw VDS network exception is also
firing and event with the Exception as payload.

The event it caught by VdsEventListener and it invokes the handling on
using the assigned VdsManager in a separate thread.

Mo more VDSNetwork Exception throwing and re-throwing for flow control.

Change-Id: I34004b370740eabbfd2563473eaab82615ceb250
Bug-Url: https://bugzilla.redhat.com/??????
Signed-off-by: Roy Golan <[email protected]>
---
M 
backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/HostMonitoring.java
M 
backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java
M 
backend/manager/modules/vdsbroker/src/test/java/org/ovirt/engine/core/vdsbroker/HostMonitoringTest.java
3 files changed, 60 insertions(+), 88 deletions(-)


  git pull ssh://gerrit.ovirt.org:29418/ovirt-engine refs/changes/28/35628/1

diff --git 
a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/HostMonitoring.java
 
b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/HostMonitoring.java
index cb0c6cd..dcd4ef0 100644
--- 
a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/HostMonitoring.java
+++ 
b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/HostMonitoring.java
@@ -43,7 +43,6 @@
 import org.ovirt.engine.core.utils.transaction.TransactionMethod;
 import org.ovirt.engine.core.utils.transaction.TransactionSupport;
 import org.ovirt.engine.core.vdsbroker.irsbroker.IRSErrorException;
-import org.ovirt.engine.core.vdsbroker.vdsbroker.VDSNetworkException;
 import org.ovirt.engine.core.vdsbroker.vdsbroker.VDSRecoveringException;
 import org.ovirt.engine.core.vdsbroker.vdsbroker.entities.VmInternalData;
 import org.slf4j.Logger;
@@ -136,9 +135,6 @@
                 
ResourceManager.getInstance().runVdsCommand(VDSCommandType.SetVdsStatus,
                         new SetVdsStatusVDSCommandParameters(vds.getId(), 
VDSStatus.Error));
             }
-        } catch (VDSNetworkException e) {
-            saveVdsDynamic = vdsManager.handleNetworkException(e);
-            throw e;
         } catch (Throwable t) {
             log.error("Failure to refresh Vds runtime info: {}", 
t.getMessage());
             log.error("Exception", t);
@@ -475,21 +471,9 @@
         getResourceManager().getEventListener().updateSchedulingStats(vds);
         if (!statsReturnValue.getSucceeded()
                 && statsReturnValue.getExceptionObject() != null) {
-            VDSNetworkException ex =
-                    (VDSNetworkException) 
((statsReturnValue.getExceptionObject() instanceof VDSNetworkException)
-                            ? statsReturnValue.getExceptionObject()
-                            : null);
-            if (ex != null) {
-                if (vdsManager.handleNetworkException(ex)) {
-                    saveVdsDynamic = true;
-                }
-                log.error("vds::refreshVdsStats Failed getVdsStats,  
vds='{}'({}): {}",
-                        vds.getName(), vds.getId(), ex.getMessage());
-            } else {
-                log.error("vds::refreshVdsStats Failed getVdsStats,  
vds='{}'({}): {}",
-                        vds.getName(), vds.getId(), 
statsReturnValue.getExceptionString());
-            }
-            throw statsReturnValue.getExceptionObject();
+            log.error(" Failed getting vds stats,  vds='{}'({}): {}",
+                    vds.getName(), vds.getId(), 
statsReturnValue.getExceptionString());
+            return;
         }
         // save also dynamic because vm_count data and image_check getting with
         // statistics data
diff --git 
a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java
 
b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java
index 32a24fd..ffaab89 100644
--- 
a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java
+++ 
b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java
@@ -661,14 +661,6 @@
 
             return returnStatus;
         } else if (caps.getExceptionObject() != null) {
-            // if exception is VDSNetworkException then call to
-            // handleNetworkException
-            if (caps.getExceptionObject() instanceof VDSNetworkException
-                    && handleNetworkException((VDSNetworkException) caps
-                            .getExceptionObject())) {
-                updateDynamicData(vds.getDynamicData());
-                updateStatisticsData(vds.getStatisticsData());
-            }
             throw caps.getExceptionObject();
         } else {
             log.error("refreshCapabilities:GetCapabilitiesVDSCommand failed 
with no exception!");
@@ -696,56 +688,73 @@
     }
 
     /**
-     * Handle network exception, return true if save vdsDynamic to DB is 
needed.
+     * Handle network exception
      *
-     * @param ex
+     * @param ex exception to handle
      * @return
      */
-    public boolean handleNetworkException(VDSNetworkException ex) {
+    public void handleNetworkException(VDSNetworkException ex) {
+        boolean saveToDb = true;
         if (cachedVds.getStatus() != VDSStatus.Down) {
             long timeoutToFence = calcTimeoutToFence(cachedVds.getVmCount(), 
cachedVds.getSpmStatus());
-            log.warn("Host '{}' is not responding. It will stay in Connecting 
state for a grace period " +
-                            "of {} seconds and after that an attempt to fence 
the host will be issued.",
-                    cachedVds.getName(),
-                    TimeUnit.MILLISECONDS.toSeconds(timeoutToFence));
-            AuditLogableBase logable = new AuditLogableBase();
-            logable.setVdsId(cachedVds.getId());
-            logable.addCustomValue("Seconds", 
Long.toString(TimeUnit.MILLISECONDS.toSeconds(timeoutToFence)));
-            AuditLogDirector.log(logable, 
AuditLogType.VDS_HOST_NOT_RESPONDING_CONNECTING);
-            if (mUnrespondedAttempts.get() < 
Config.<Integer>getValue(ConfigValues.VDSAttemptsToResetCount)
-                    || (lastUpdate + timeoutToFence) > 
System.currentTimeMillis()) {
-                boolean result = false;
-                if (cachedVds.getStatus() != VDSStatus.Connecting && 
cachedVds.getStatus() != VDSStatus.PreparingForMaintenance
+            logHostNonResponding(timeoutToFence);
+            if (dontFenceYet(timeoutToFence)) {
+                if (cachedVds.getStatus() != VDSStatus.Connecting
+                        && cachedVds.getStatus() != 
VDSStatus.PreparingForMaintenance
                         && cachedVds.getStatus() != VDSStatus.NonResponsive) {
                     setStatus(VDSStatus.Connecting, cachedVds);
-                    result = true;
+                } else {
+                    saveToDb = false;
                 }
                 mUnrespondedAttempts.incrementAndGet();
-                return result;
+            } else {
+                if (cachedVds.getStatus() == VDSStatus.NonResponsive
+                        || cachedVds.getStatus() == VDSStatus.Maintenance) {
+                    setStatus(VDSStatus.NonResponsive, cachedVds);
+                } else {
+                    setStatus(VDSStatus.NonResponsive, cachedVds);
+                    moveVMsToUnknown();
+                    logHostFailToResponde(ex, timeoutToFence);
+                    
ResourceManager.getInstance().getEventListener().vdsNotResponding(
+                            cachedVds,
+                            !sshSoftFencingExecuted.getAndSet(true),
+                            lastUpdate);
+                }
             }
-
-            if (cachedVds.getStatus() == VDSStatus.NonResponsive || 
cachedVds.getStatus() == VDSStatus.Maintenance) {
-                setStatus(VDSStatus.NonResponsive, cachedVds);
-                return true;
-            }
-            setStatus(VDSStatus.NonResponsive, cachedVds);
-            moveVMsToUnknown();
-            log.info(
-                    "Server failed to respond, vds_id='{}', vds_name='{}', 
vm_count={}, " +
-                            "spm_status='{}', non-responsive_timeout 
(seconds)={}, error: {}",
-                    cachedVds.getId(), cachedVds.getName(), 
cachedVds.getVmCount(), cachedVds.getSpmStatus(),
-                    TimeUnit.MILLISECONDS.toSeconds(timeoutToFence), 
ex.getMessage());
-
-            logable = new AuditLogableBase(cachedVds.getId());
-            logable.updateCallStackFromThrowable(ex);
-            AuditLogDirector.log(logable, AuditLogType.VDS_FAILURE);
-            boolean executeSshSoftFencing = false;
-            if (!sshSoftFencingExecuted.getAndSet(true)) {
-                executeSshSoftFencing = true;
-            }
-            
ResourceManager.getInstance().getEventListener().vdsNotResponding(cachedVds, 
executeSshSoftFencing, lastUpdate);
         }
-        return true;
+        if (saveToDb) {
+            updateDynamicData(cachedVds.getDynamicData());
+            updateStatisticsData(cachedVds.getStatisticsData());
+        }
+    }
+
+    private boolean dontFenceYet(long timeoutToFence) {
+        return mUnrespondedAttempts.get() < 
Config.<Integer>getValue(ConfigValues.VDSAttemptsToResetCount)
+                || (lastUpdate + timeoutToFence) > System.currentTimeMillis();
+    }
+
+    private void logHostFailToResponde(VDSNetworkException ex, long 
timeoutToFence) {
+        log.info(
+                "Server failed to respond, vds_id='{}', vds_name='{}', 
vm_count={}, " +
+                        "spm_status='{}', non-responsive_timeout (seconds)={}, 
error: {}",
+                cachedVds.getId(), cachedVds.getName(), 
cachedVds.getVmCount(), cachedVds.getSpmStatus(),
+                TimeUnit.MILLISECONDS.toSeconds(timeoutToFence), 
ex.getMessage());
+
+        AuditLogableBase logable;
+        logable = new AuditLogableBase(cachedVds.getId());
+        logable.updateCallStackFromThrowable(ex);
+        AuditLogDirector.log(logable, AuditLogType.VDS_FAILURE);
+    }
+
+    private void logHostNonResponding(long timeoutToFence) {
+        log.warn("Host '{}' is not responding. It will stay in Connecting 
state for a grace period " +
+                        "of {} seconds and after that an attempt to fence the 
host will be issued.",
+                cachedVds.getName(),
+                TimeUnit.MILLISECONDS.toSeconds(timeoutToFence));
+        AuditLogableBase logable = new AuditLogableBase();
+        logable.setVdsId(cachedVds.getId());
+        logable.addCustomValue("Seconds", 
Long.toString(TimeUnit.MILLISECONDS.toSeconds(timeoutToFence)));
+        AuditLogDirector.log(logable, 
AuditLogType.VDS_HOST_NOT_RESPONDING_CONNECTING);
     }
 
     public void dispose() {
diff --git 
a/backend/manager/modules/vdsbroker/src/test/java/org/ovirt/engine/core/vdsbroker/HostMonitoringTest.java
 
b/backend/manager/modules/vdsbroker/src/test/java/org/ovirt/engine/core/vdsbroker/HostMonitoringTest.java
index 651a6cc..a4c0ccf 100644
--- 
a/backend/manager/modules/vdsbroker/src/test/java/org/ovirt/engine/core/vdsbroker/HostMonitoringTest.java
+++ 
b/backend/manager/modules/vdsbroker/src/test/java/org/ovirt/engine/core/vdsbroker/HostMonitoringTest.java
@@ -12,6 +12,7 @@
 import org.ovirt.engine.core.common.businessentities.IVdsEventListener;
 import org.ovirt.engine.core.common.businessentities.VDS;
 import org.ovirt.engine.core.common.businessentities.VDSGroup;
+import org.ovirt.engine.core.common.businessentities.VDSStatus;
 import 
org.ovirt.engine.core.common.businessentities.network.VdsNetworkInterface;
 import org.ovirt.engine.core.common.config.ConfigValues;
 import org.ovirt.engine.core.common.vdscommands.VDSCommandType;
@@ -111,27 +112,5 @@
         vds = new VDS();
         vds.setId(new Guid("00000000-0000-0000-0000-000000000012"));
         vds.setVdsGroupCompatibilityVersion(Version.v3_4);
-    }
-
-    /**
-     * test that a network error is handled correctly and is triggering a state
-     * change and a save to the DB
-     */
-    @Test
-    public void testErrorHandling() {
-        VDSReturnValue value = new VDSReturnValue();
-        value.setSucceeded(false);
-        value.setExceptionObject(new VDSNetworkException("unknown host"));
-        when(resourceManager.getEventListener()).thenReturn(vdsEventlistener);
-        when(resourceManager.runVdsCommand(any(VDSCommandType.class),
-                any(VDSParametersBase.class))).thenReturn(value);
-        when(updater.getResourceManager()).thenReturn(resourceManager);
-        try {
-            updater.refreshVdsStats();
-        } catch (Exception e) {
-            
verify(vdsManager).handleNetworkException(any(VDSNetworkException.class));
-            verify(vdsManager).updateDynamicData(vds.getDynamicData());
-        }
-
     }
 }


-- 
To view, visit http://gerrit.ovirt.org/35628
To unsubscribe, visit http://gerrit.ovirt.org/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I34004b370740eabbfd2563473eaab82615ceb250
Gerrit-PatchSet: 1
Gerrit-Project: ovirt-engine
Gerrit-Branch: master
Gerrit-Owner: Roy Golan <[email protected]>
_______________________________________________
Engine-patches mailing list
[email protected]
http://lists.ovirt.org/mailman/listinfo/engine-patches

Reply via email to