Eli Mesika has uploaded a new change for review.

Change subject: core: Skip fencing if host has connectivity issues
......................................................................

core: Skip fencing if host has connectivity issues

Change-Id: I7a9c7db43b50421414ce9596137767b00cbfc2ae
Bug-Url: https://bugzilla.redhat.com/show_bug.cgi?id=1120829
Signed-off-by: Eli Mesika <[email protected]>
---
M 
backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsNotRespondingTreatmentCommand.java
M 
backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/AuditLogType.java
M 
backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/businessentities/FencingPolicy.java
M 
backend/manager/modules/dal/src/main/java/org/ovirt/engine/core/dao/VdsGroupDAODbFacadeImpl.java
M 
backend/manager/modules/dal/src/main/resources/bundles/AuditLogMessages.properties
M backend/manager/modules/dal/src/test/resources/fixtures.xml
A 
packaging/dbscripts/upgrade/03_05_0900_skip_fencing_when_connectivity_broken.sql
M packaging/dbscripts/vds_groups_sp.sql
8 files changed, 114 insertions(+), 8 deletions(-)


  git pull ssh://gerrit.ovirt.org:29418/ovirt-engine refs/changes/15/31615/1

diff --git 
a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsNotRespondingTreatmentCommand.java
 
b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsNotRespondingTreatmentCommand.java
index 7f797b4..5d79940 100644
--- 
a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsNotRespondingTreatmentCommand.java
+++ 
b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsNotRespondingTreatmentCommand.java
@@ -1,6 +1,7 @@
 package org.ovirt.engine.core.bll;
 
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 import java.util.concurrent.TimeUnit;
 
@@ -17,10 +18,18 @@
 import org.ovirt.engine.core.common.action.VdcReturnValueBase;
 import org.ovirt.engine.core.common.businessentities.FencingPolicy;
 import org.ovirt.engine.core.common.businessentities.StoragePoolStatus;
+import org.ovirt.engine.core.common.businessentities.VDS;
+import org.ovirt.engine.core.common.businessentities.VDSGroup;
+import org.ovirt.engine.core.common.businessentities.VDSStatus;
 import org.ovirt.engine.core.common.businessentities.VdsSpmStatus;
 import org.ovirt.engine.core.common.config.Config;
 import org.ovirt.engine.core.common.config.ConfigValues;
+import org.ovirt.engine.core.dal.dbbroker.DbFacade;
+import org.ovirt.engine.core.dal.dbbroker.auditloghandling.AuditLogDirector;
+import org.ovirt.engine.core.dal.dbbroker.auditloghandling.AuditLogableBase;
 import org.ovirt.engine.core.utils.ThreadUtils;
+import org.ovirt.engine.core.utils.linq.LinqUtils;
+import org.ovirt.engine.core.utils.linq.Predicate;
 
 /**
  * @see RestartVdsCommand on why this command is requiring a lock
@@ -70,7 +79,9 @@
             getReturnValue().setSucceeded(false);
             return;
         }
-
+        if (isConnectivityBrokenThresholdReached(getVds())) {
+            return;
+        }
         VdsValidator validator = new VdsValidator(getVds());
         boolean shouldBeFenced = validator.shouldVdsBeFenced();
         if (shouldBeFenced) {
@@ -160,4 +171,35 @@
         }
         return jobProperties;
     }
+
+    private boolean isConnectivityBrokenThresholdReached(VDS vds) {
+        VDSGroup cluster = 
DbFacade.getInstance().getVdsGroupDao().get(vds.getVdsGroupId());
+        double percents = 0.0;
+        boolean result = false;
+        if (cluster.getFencingPolicy().isSkipFencingIfConnectivityBroken()) {
+            List<VDS> hosts = 
DbFacade.getInstance().getVdsDao().getAllForVdsGroup(cluster.getId());
+            double hostsNumber = hosts.size();
+            List<VDS> hostsWithBrokenConnectivity = LinqUtils.filter(hosts,
+                    new Predicate<VDS>() {
+                        @Override
+                        public boolean eval(VDS a) {
+                            return (a.getStatus() == VDSStatus.Connecting || 
a.getStatus() == VDSStatus.NonResponsive);
+                        }
+                    });
+            double hostsWithBrokenConnectivityNumber = 
hostsWithBrokenConnectivity.size();
+            percents = (hostsWithBrokenConnectivityNumber/hostsNumber)*100.0;
+            result = (percents >= 
cluster.getFencingPolicy().getHostsWithBrokenConnectivityThreshold());
+        }
+        if (result) {
+            logAlert(vds, percents);
+        }
+        return result;
+    }
+
+    private void logAlert(VDS host, Double percents) {
+        AuditLogableBase auditLogable = new AuditLogableBase();
+        auditLogable.addCustomValue("Percents", percents.toString());
+        auditLogable.setVdsId(host.getId());
+        AuditLogDirector.log(auditLogable, 
AuditLogType.VDS_ALERT_FENCE_OPERATION_SKIPPED_BROKEN_CONNECTIVITY);
+    }
 }
diff --git 
a/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/AuditLogType.java
 
b/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/AuditLogType.java
index ff56c1c..5387baf 100644
--- 
a/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/AuditLogType.java
+++ 
b/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/AuditLogType.java
@@ -902,6 +902,7 @@
     VDS_ALERT_PM_HEALTH_CHECK_FAILED_FOR_SEQ_SECONDARY_AGENT(9010, 
AuditLogSeverity.ALERT),
     VDS_ALERT_PM_HEALTH_CHECK_FAILED_FOR_CON_PRIMARY_AGENT(9011, 
AuditLogSeverity.ALERT),
     VDS_ALERT_PM_HEALTH_CHECK_FAILED_FOR_CON_SECONDARY_AGENT(9012, 
AuditLogSeverity.ALERT),
+    VDS_ALERT_FENCE_OPERATION_SKIPPED_BROKEN_CONNECTIVITY(9013, 
AuditLogSeverity.ALERT),
 
     TASK_STOPPING_ASYNC_TASK(9500, AuditLogTimeInterval.MINUTE.getValue()),
     TASK_CLEARING_ASYNC_TASK(9501, AuditLogTimeInterval.MINUTE.getValue()),
diff --git 
a/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/businessentities/FencingPolicy.java
 
b/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/businessentities/FencingPolicy.java
index 7f90645..edf0055 100644
--- 
a/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/businessentities/FencingPolicy.java
+++ 
b/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/businessentities/FencingPolicy.java
@@ -11,16 +11,23 @@
      * Skip fencing of host of it's connected to at least one storage domain.
      */
     private boolean skipFencingIfSDActive;
+    private boolean skipFencingIfConnectivityBroken;
+    private int hostsWithBrokenConnectivityThreshold;
 
     public FencingPolicy() {
         skipFencingIfSDActive = false;
+        skipFencingIfConnectivityBroken = false;
+        hostsWithBrokenConnectivityThreshold = 50;
     }
 
     public FencingPolicy(FencingPolicy fencingPolicy) {
         if (fencingPolicy == null) {
             skipFencingIfSDActive = false;
+            skipFencingIfConnectivityBroken = false;
         } else {
             skipFencingIfSDActive = fencingPolicy.skipFencingIfSDActive;
+            skipFencingIfConnectivityBroken = 
fencingPolicy.skipFencingIfConnectivityBroken;
+            hostsWithBrokenConnectivityThreshold = 
fencingPolicy.hostsWithBrokenConnectivityThreshold;
         }
     }
 
@@ -32,6 +39,21 @@
         this.skipFencingIfSDActive = skipFencingIfSDActive;
     }
 
+    public boolean isSkipFencingIfConnectivityBroken() {
+        return skipFencingIfConnectivityBroken;
+    }
+
+    public void setSkipFencingIfConnectivityBroken(boolean 
skipFencingIfConnectivityBroken) {
+        this.skipFencingIfConnectivityBroken = skipFencingIfConnectivityBroken;
+    }
+
+    public int getHostsWithBrokenConnectivityThreshold() {
+        return hostsWithBrokenConnectivityThreshold;
+    }
+
+    public void setHostsWithBrokenConnectivityThreshold(int 
hostsWithBrokenConnectivityThreshold) {
+        this.hostsWithBrokenConnectivityThreshold = 
hostsWithBrokenConnectivityThreshold;
+    }
     @Override
     public boolean equals(Object obj) {
         if (this == obj) {
@@ -42,7 +64,9 @@
         }
         FencingPolicy other = (FencingPolicy) obj;
 
-        return skipFencingIfSDActive == other.skipFencingIfSDActive;
+        return skipFencingIfSDActive == other.skipFencingIfSDActive &&
+                skipFencingIfConnectivityBroken == 
other.skipFencingIfConnectivityBroken &&
+                hostsWithBrokenConnectivityThreshold == 
other.hostsWithBrokenConnectivityThreshold;
     }
 
     @Override
@@ -50,6 +74,8 @@
         final int prime = 31;
         int result = 1;
         result = prime * result + (skipFencingIfSDActive ? 1231 : 1237);
+        result = prime * result + (skipFencingIfConnectivityBroken ? 1231 : 
1237);
+        result = prime * result + hostsWithBrokenConnectivityThreshold;
         return result;
     }
 
@@ -57,6 +83,10 @@
     public String toString() {
         StringBuilder sb = new StringBuilder("{ skipFencingIfSDActive=");
         sb.append(skipFencingIfSDActive);
+        sb.append(", skipFencingIfConnectivityBroken=");
+        sb.append(skipFencingIfConnectivityBroken);
+        sb.append(", hostsWithBrokenConnectivityThreshold=");
+        sb.append(hostsWithBrokenConnectivityThreshold);
         sb.append(" }");
         return sb.toString();
     }
diff --git 
a/backend/manager/modules/dal/src/main/java/org/ovirt/engine/core/dao/VdsGroupDAODbFacadeImpl.java
 
b/backend/manager/modules/dal/src/main/java/org/ovirt/engine/core/dao/VdsGroupDAODbFacadeImpl.java
index abfc3f2..7c6d778 100644
--- 
a/backend/manager/modules/dal/src/main/java/org/ovirt/engine/core/dao/VdsGroupDAODbFacadeImpl.java
+++ 
b/backend/manager/modules/dal/src/main/java/org/ovirt/engine/core/dao/VdsGroupDAODbFacadeImpl.java
@@ -229,7 +229,9 @@
                 .addValue("spice_proxy", group.getSpiceProxy())
                 .addValue("serial_number_policy", 
group.getSerialNumberPolicy() == null ? null : 
group.getSerialNumberPolicy().getValue())
                 .addValue("custom_serial_number", 
group.getCustomSerialNumber())
-                .addValue("skip_fencing_if_sd_active", 
group.getFencingPolicy().isSkipFencingIfSDActive());
+                .addValue("skip_fencing_if_sd_active", 
group.getFencingPolicy().isSkipFencingIfSDActive())
+                .addValue("skip_fencing_if_connectivity_broken", 
group.getFencingPolicy().isSkipFencingIfConnectivityBroken())
+                .addValue("hosts_with_broken_connectivity_threshold", 
group.getFencingPolicy().getHostsWithBrokenConnectivityThreshold());
 
         return parameterSource;
     }
@@ -291,6 +293,8 @@
             entity.setSerialNumberPolicy(SerialNumberPolicy.forValue((Integer) 
rs.getObject("serial_number_policy")));
             entity.setCustomSerialNumber(rs.getString("custom_serial_number"));
             
entity.getFencingPolicy().setSkipFencingIfSDActive(rs.getBoolean("skip_fencing_if_sd_active"));
+            
entity.getFencingPolicy().setSkipFencingIfConnectivityBroken(rs.getBoolean("skip_fencing_if_connectivity_broken"));
+            
entity.getFencingPolicy().setHostsWithBrokenConnectivityThreshold(rs.getInt("hosts_with_broken_connectivity_threshold"));
 
             return entity;
         }
diff --git 
a/backend/manager/modules/dal/src/main/resources/bundles/AuditLogMessages.properties
 
b/backend/manager/modules/dal/src/main/resources/bundles/AuditLogMessages.properties
index 0dcf127..c621aa0 100644
--- 
a/backend/manager/modules/dal/src/main/resources/bundles/AuditLogMessages.properties
+++ 
b/backend/manager/modules/dal/src/main/resources/bundles/AuditLogMessages.properties
@@ -617,6 +617,7 @@
 VDS_ALERT_PM_HEALTH_CHECK_FAILED_FOR_SEQ_PRIMARY_AGENT=Health check failed on 
Host ${VdsName} primary sequential agent, future fence operations may fail is 
secondary agent if not defined properly.
 VDS_ALERT_PM_HEALTH_CHECK_FAILED_FOR_CON_SECONDARY_AGENT=Health check failed 
on Host ${VdsName} secondary concurrent agent, future fence operations may fail 
on this Host.
 VDS_ALERT_PM_HEALTH_CHECK_FAILED_FOR_SEQ_SECONDARY_AGENT=Health check failed 
on Host ${VdsName} secondary sequential agent, future fence operations may fail 
is primary agent if not defined properly.
+VDS_ALERT_FENCE_OPERATION_SKIPPED_BROKEN_CONNECTIVITY=Host ${VdsName} became 
non responsive and was not restarted due to Fencing Policy: ${Percents} 
percents of the Hosts in the Cluster have connectivity issues.
 VDS_HOST_NOT_RESPONDING_CONNECTING=Host ${VdsName} is not responding. It will 
stay in Connecting state for a grace period of ${Seconds} seconds and after 
that an attempt to fence the host will be issued.
 TASK_STOPPING_ASYNC_TASK=Stopping async task ${CommandName} that started at 
${Date}
 REFRESH_REPOSITORY_IMAGE_LIST_FAILED=Refresh image list failed for domain(s): 
${imageDomains}. Please check domain activity.
diff --git a/backend/manager/modules/dal/src/test/resources/fixtures.xml 
b/backend/manager/modules/dal/src/test/resources/fixtures.xml
index 2f218e3..f02e928 100644
--- a/backend/manager/modules/dal/src/test/resources/fixtures.xml
+++ b/backend/manager/modules/dal/src/test/resources/fixtures.xml
@@ -669,6 +669,8 @@
         <column>custom_serial_number</column>
         <column>required_rng_sources</column>
         <column>skip_fencing_if_sd_active</column>
+        <column>skip_fencing_if_connectivity_broken</column>
+        <column>hosts_with_broken_connectivity_threshold</column>
         <row>
             <value>b399944a-81ab-4ec5-8266-e19ba7c3c9d1</value>
             <value>rhel6.iscsi</value>
@@ -695,6 +697,8 @@
             <null />
             <value>RANDOM</value>
             <value>false</value>
+            <value>false</value>
+            <value>50</value>
         </row>
         <row>
             <value>b399944a-81ab-4ec5-8266-e19ba7c3c9d2</value>
@@ -722,6 +726,8 @@
             <null />
             <value></value>
             <value>true</value>
+            <value>false</value>
+            <value>50</value>
         </row>
         <row>
             <value>b399944a-81ab-4ec5-8266-e19ba7c3c9d3</value>
@@ -749,6 +755,8 @@
             <null />
             <value>HWRNG</value>
             <value>false</value>
+            <value>false</value>
+            <value>50</value>
         </row>
         <row>
             <value>0e57070e-2469-4b38-84a2-f111aaabd49d</value>
@@ -776,6 +784,8 @@
             <value>my custom serial number</value>
             <value>HWRNG,RANDOM</value>
             <value>true</value>
+            <value>false</value>
+            <value>50</value>
         </row>
         <row>
             <value>eba797fb-8e3b-4777-b63c-92e7a5957d7c</value>
@@ -803,6 +813,8 @@
             <null />
             <value>HWRNG,RANDOM</value>
             <value>false</value>
+            <value>false</value>
+            <value>50</value>
         </row>
         <row>
             <value>eba797fb-8e3b-4777-b63c-92e7a5957d7d</value>
@@ -830,6 +842,8 @@
             <null />
             <value>HWRNG,RANDOM</value>
             <value>false</value>
+            <value>false</value>
+            <value>50</value>
         </row>
         <row>
             <value>eba797fb-8e3b-4777-b63c-92e7a5957d7e</value>
@@ -857,6 +871,8 @@
             <null />
             <value>RANDOM</value>
             <value>true</value>
+            <value>false</value>
+            <value>50</value>
         </row>
         <row>
             <value>eba797fb-8e3b-4777-b63c-92e7a5957d7f</value>
@@ -884,6 +900,8 @@
             <null />
             <value>RANDOM</value>
             <value>false</value>
+            <value>false</value>
+            <value>50</value>
         </row>
         <row>
             <value>ae956031-6be2-43d6-bb8f-5191c9253314</value>
@@ -911,6 +929,8 @@
             <null />
             <value>RANDOM</value>
             <value>true</value>
+            <value>true</value>
+            <value>50</value>
         </row>
     </table>
 
diff --git 
a/packaging/dbscripts/upgrade/03_05_0900_skip_fencing_when_connectivity_broken.sql
 
b/packaging/dbscripts/upgrade/03_05_0900_skip_fencing_when_connectivity_broken.sql
new file mode 100644
index 0000000..dc7f07a
--- /dev/null
+++ 
b/packaging/dbscripts/upgrade/03_05_0900_skip_fencing_when_connectivity_broken.sql
@@ -0,0 +1,2 @@
+SELECT fn_db_add_column('vds_groups', 'skip_fencing_if_connectivity_broken', 
'boolean DEFAULT false');
+SELECT fn_db_add_column('vds_groups', 
'hosts_with_broken_connectivity_threshold', 'smallint DEFAULT 50');
diff --git a/packaging/dbscripts/vds_groups_sp.sql 
b/packaging/dbscripts/vds_groups_sp.sql
index 8831824..aae26fe 100644
--- a/packaging/dbscripts/vds_groups_sp.sql
+++ b/packaging/dbscripts/vds_groups_sp.sql
@@ -37,17 +37,19 @@
         v_serial_number_policy SMALLINT,
         v_custom_serial_number VARCHAR(255),
         v_required_rng_sources varchar(255),
-        v_skip_fencing_if_sd_active BOOLEAN
+        v_skip_fencing_if_sd_active BOOLEAN,
+        v_skip_fencing_if_connectivity_broken BOOLEAN,
+        v_hosts_with_broken_connectivity_threshold SMALLINT
 )
 RETURNS VOID
    AS $procedure$
 BEGIN
       INSERT INTO vds_groups(vds_group_id,description, name, 
free_text_comment, cpu_name, storage_pool_id,  max_vds_memory_over_commit, 
count_threads_as_cores, compatibility_version,
         transparent_hugepages, migrate_on_error, virt_service, 
gluster_service, tunnel_migration, emulated_machine, detect_emulated_machine, 
trusted_service, ha_reservation, optional_reason, cluster_policy_id,
-        cluster_policy_custom_properties, enable_balloon, architecture, 
optimization_type, spice_proxy, enable_ksm, serial_number_policy, 
custom_serial_number, required_rng_sources, skip_fencing_if_sd_active)
+        cluster_policy_custom_properties, enable_balloon, architecture, 
optimization_type, spice_proxy, enable_ksm, serial_number_policy, 
custom_serial_number, required_rng_sources, skip_fencing_if_sd_active, 
skip_fencing_if_connectivity_broken, hosts_with_broken_connectivity_threshold)
        VALUES(v_vds_group_id,v_description, v_name, v_free_text_comment, 
v_cpu_name, v_storage_pool_id,  v_max_vds_memory_over_commit, 
v_count_threads_as_cores, v_compatibility_version,
     v_transparent_hugepages, v_migrate_on_error, v_virt_service, 
v_gluster_service, v_tunnel_migration, v_emulated_machine, 
v_detect_emulated_machine, v_trusted_service, v_ha_reservation, 
v_optional_reason, v_cluster_policy_id, v_cluster_policy_custom_properties, 
v_enable_balloon,
-    v_architecture, v_optimization_type, v_spice_proxy, v_enable_ksm, 
v_serial_number_policy, v_custom_serial_number, v_required_rng_sources, 
v_skip_fencing_if_sd_active);
+    v_architecture, v_optimization_type, v_spice_proxy, v_enable_ksm, 
v_serial_number_policy, v_custom_serial_number, v_required_rng_sources, 
v_skip_fencing_if_sd_active, v_skip_fencing_if_connectivity_broken, 
v_hosts_with_broken_connectivity_threshold);
 END; $procedure$
 LANGUAGE plpgsql;
 
@@ -84,7 +86,9 @@
         v_serial_number_policy SMALLINT,
         v_custom_serial_number VARCHAR(255),
         v_required_rng_sources varchar(255),
-        v_skip_fencing_if_sd_active BOOLEAN
+        v_skip_fencing_if_sd_active BOOLEAN,
+        v_skip_fencing_if_connectivity_broken BOOLEAN,
+        v_hosts_with_broken_connectivity_threshold SMALLINT
 )
 RETURNS VOID
 
@@ -104,7 +108,9 @@
       optimization_type = v_optimization_type, spice_proxy = v_spice_proxy, 
enable_ksm = v_enable_ksm,
       serial_number_policy = v_serial_number_policy, custom_serial_number = 
v_custom_serial_number,
       required_rng_sources = v_required_rng_sources,
-      skip_fencing_if_sd_active = v_skip_fencing_if_sd_active
+      skip_fencing_if_sd_active = v_skip_fencing_if_sd_active,
+      skip_fencing_if_connectivity_broken = 
v_skip_fencing_if_connectivity_broken,
+      hosts_with_broken_connectivity_threshold = 
v_hosts_with_broken_connectivity_threshold
       WHERE vds_group_id = v_vds_group_id;
 END; $procedure$
 LANGUAGE plpgsql;


-- 
To view, visit http://gerrit.ovirt.org/31615
To unsubscribe, visit http://gerrit.ovirt.org/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I7a9c7db43b50421414ce9596137767b00cbfc2ae
Gerrit-PatchSet: 1
Gerrit-Project: ovirt-engine
Gerrit-Branch: ovirt-engine-3.5
Gerrit-Owner: Eli Mesika <[email protected]>
_______________________________________________
Engine-patches mailing list
[email protected]
http://lists.ovirt.org/mailman/listinfo/engine-patches

Reply via email to