Eli Mesika has uploaded a new change for review. Change subject: core: Skip fencing if host has connectivity issues ......................................................................
core: Skip fencing if host has connectivity issues Change-Id: I7a9c7db43b50421414ce9596137767b00cbfc2ae Bug-Url: https://bugzilla.redhat.com/show_bug.cgi?id=1120829 Signed-off-by: Eli Mesika <[email protected]> --- M backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsNotRespondingTreatmentCommand.java M backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/AuditLogType.java M backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/businessentities/FencingPolicy.java M backend/manager/modules/dal/src/main/java/org/ovirt/engine/core/dao/VdsGroupDAODbFacadeImpl.java M backend/manager/modules/dal/src/main/resources/bundles/AuditLogMessages.properties M backend/manager/modules/dal/src/test/resources/fixtures.xml A packaging/dbscripts/upgrade/03_05_0900_skip_fencing_when_connectivity_broken.sql M packaging/dbscripts/vds_groups_sp.sql 8 files changed, 114 insertions(+), 8 deletions(-) git pull ssh://gerrit.ovirt.org:29418/ovirt-engine refs/changes/15/31615/1 diff --git a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsNotRespondingTreatmentCommand.java b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsNotRespondingTreatmentCommand.java index 7f797b4..5d79940 100644 --- a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsNotRespondingTreatmentCommand.java +++ b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsNotRespondingTreatmentCommand.java @@ -1,6 +1,7 @@ package org.ovirt.engine.core.bll; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.concurrent.TimeUnit; @@ -17,10 +18,18 @@ import org.ovirt.engine.core.common.action.VdcReturnValueBase; import org.ovirt.engine.core.common.businessentities.FencingPolicy; import org.ovirt.engine.core.common.businessentities.StoragePoolStatus; +import org.ovirt.engine.core.common.businessentities.VDS; +import org.ovirt.engine.core.common.businessentities.VDSGroup; +import org.ovirt.engine.core.common.businessentities.VDSStatus; import org.ovirt.engine.core.common.businessentities.VdsSpmStatus; import org.ovirt.engine.core.common.config.Config; import org.ovirt.engine.core.common.config.ConfigValues; +import org.ovirt.engine.core.dal.dbbroker.DbFacade; +import org.ovirt.engine.core.dal.dbbroker.auditloghandling.AuditLogDirector; +import org.ovirt.engine.core.dal.dbbroker.auditloghandling.AuditLogableBase; import org.ovirt.engine.core.utils.ThreadUtils; +import org.ovirt.engine.core.utils.linq.LinqUtils; +import org.ovirt.engine.core.utils.linq.Predicate; /** * @see RestartVdsCommand on why this command is requiring a lock @@ -70,7 +79,9 @@ getReturnValue().setSucceeded(false); return; } - + if (isConnectivityBrokenThresholdReached(getVds())) { + return; + } VdsValidator validator = new VdsValidator(getVds()); boolean shouldBeFenced = validator.shouldVdsBeFenced(); if (shouldBeFenced) { @@ -160,4 +171,35 @@ } return jobProperties; } + + private boolean isConnectivityBrokenThresholdReached(VDS vds) { + VDSGroup cluster = DbFacade.getInstance().getVdsGroupDao().get(vds.getVdsGroupId()); + double percents = 0.0; + boolean result = false; + if (cluster.getFencingPolicy().isSkipFencingIfConnectivityBroken()) { + List<VDS> hosts = DbFacade.getInstance().getVdsDao().getAllForVdsGroup(cluster.getId()); + double hostsNumber = hosts.size(); + List<VDS> hostsWithBrokenConnectivity = LinqUtils.filter(hosts, + new Predicate<VDS>() { + @Override + public boolean eval(VDS a) { + return (a.getStatus() == VDSStatus.Connecting || a.getStatus() == VDSStatus.NonResponsive); + } + }); + double hostsWithBrokenConnectivityNumber = hostsWithBrokenConnectivity.size(); + percents = (hostsWithBrokenConnectivityNumber/hostsNumber)*100.0; + result = (percents >= cluster.getFencingPolicy().getHostsWithBrokenConnectivityThreshold()); + } + if (result) { + logAlert(vds, percents); + } + return result; + } + + private void logAlert(VDS host, Double percents) { + AuditLogableBase auditLogable = new AuditLogableBase(); + auditLogable.addCustomValue("Percents", percents.toString()); + auditLogable.setVdsId(host.getId()); + AuditLogDirector.log(auditLogable, AuditLogType.VDS_ALERT_FENCE_OPERATION_SKIPPED_BROKEN_CONNECTIVITY); + } } diff --git a/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/AuditLogType.java b/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/AuditLogType.java index ff56c1c..5387baf 100644 --- a/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/AuditLogType.java +++ b/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/AuditLogType.java @@ -902,6 +902,7 @@ VDS_ALERT_PM_HEALTH_CHECK_FAILED_FOR_SEQ_SECONDARY_AGENT(9010, AuditLogSeverity.ALERT), VDS_ALERT_PM_HEALTH_CHECK_FAILED_FOR_CON_PRIMARY_AGENT(9011, AuditLogSeverity.ALERT), VDS_ALERT_PM_HEALTH_CHECK_FAILED_FOR_CON_SECONDARY_AGENT(9012, AuditLogSeverity.ALERT), + VDS_ALERT_FENCE_OPERATION_SKIPPED_BROKEN_CONNECTIVITY(9013, AuditLogSeverity.ALERT), TASK_STOPPING_ASYNC_TASK(9500, AuditLogTimeInterval.MINUTE.getValue()), TASK_CLEARING_ASYNC_TASK(9501, AuditLogTimeInterval.MINUTE.getValue()), diff --git a/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/businessentities/FencingPolicy.java b/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/businessentities/FencingPolicy.java index 7f90645..edf0055 100644 --- a/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/businessentities/FencingPolicy.java +++ b/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/businessentities/FencingPolicy.java @@ -11,16 +11,23 @@ * Skip fencing of host of it's connected to at least one storage domain. */ private boolean skipFencingIfSDActive; + private boolean skipFencingIfConnectivityBroken; + private int hostsWithBrokenConnectivityThreshold; public FencingPolicy() { skipFencingIfSDActive = false; + skipFencingIfConnectivityBroken = false; + hostsWithBrokenConnectivityThreshold = 50; } public FencingPolicy(FencingPolicy fencingPolicy) { if (fencingPolicy == null) { skipFencingIfSDActive = false; + skipFencingIfConnectivityBroken = false; } else { skipFencingIfSDActive = fencingPolicy.skipFencingIfSDActive; + skipFencingIfConnectivityBroken = fencingPolicy.skipFencingIfConnectivityBroken; + hostsWithBrokenConnectivityThreshold = fencingPolicy.hostsWithBrokenConnectivityThreshold; } } @@ -32,6 +39,21 @@ this.skipFencingIfSDActive = skipFencingIfSDActive; } + public boolean isSkipFencingIfConnectivityBroken() { + return skipFencingIfConnectivityBroken; + } + + public void setSkipFencingIfConnectivityBroken(boolean skipFencingIfConnectivityBroken) { + this.skipFencingIfConnectivityBroken = skipFencingIfConnectivityBroken; + } + + public int getHostsWithBrokenConnectivityThreshold() { + return hostsWithBrokenConnectivityThreshold; + } + + public void setHostsWithBrokenConnectivityThreshold(int hostsWithBrokenConnectivityThreshold) { + this.hostsWithBrokenConnectivityThreshold = hostsWithBrokenConnectivityThreshold; + } @Override public boolean equals(Object obj) { if (this == obj) { @@ -42,7 +64,9 @@ } FencingPolicy other = (FencingPolicy) obj; - return skipFencingIfSDActive == other.skipFencingIfSDActive; + return skipFencingIfSDActive == other.skipFencingIfSDActive && + skipFencingIfConnectivityBroken == other.skipFencingIfConnectivityBroken && + hostsWithBrokenConnectivityThreshold == other.hostsWithBrokenConnectivityThreshold; } @Override @@ -50,6 +74,8 @@ final int prime = 31; int result = 1; result = prime * result + (skipFencingIfSDActive ? 1231 : 1237); + result = prime * result + (skipFencingIfConnectivityBroken ? 1231 : 1237); + result = prime * result + hostsWithBrokenConnectivityThreshold; return result; } @@ -57,6 +83,10 @@ public String toString() { StringBuilder sb = new StringBuilder("{ skipFencingIfSDActive="); sb.append(skipFencingIfSDActive); + sb.append(", skipFencingIfConnectivityBroken="); + sb.append(skipFencingIfConnectivityBroken); + sb.append(", hostsWithBrokenConnectivityThreshold="); + sb.append(hostsWithBrokenConnectivityThreshold); sb.append(" }"); return sb.toString(); } diff --git a/backend/manager/modules/dal/src/main/java/org/ovirt/engine/core/dao/VdsGroupDAODbFacadeImpl.java b/backend/manager/modules/dal/src/main/java/org/ovirt/engine/core/dao/VdsGroupDAODbFacadeImpl.java index abfc3f2..7c6d778 100644 --- a/backend/manager/modules/dal/src/main/java/org/ovirt/engine/core/dao/VdsGroupDAODbFacadeImpl.java +++ b/backend/manager/modules/dal/src/main/java/org/ovirt/engine/core/dao/VdsGroupDAODbFacadeImpl.java @@ -229,7 +229,9 @@ .addValue("spice_proxy", group.getSpiceProxy()) .addValue("serial_number_policy", group.getSerialNumberPolicy() == null ? null : group.getSerialNumberPolicy().getValue()) .addValue("custom_serial_number", group.getCustomSerialNumber()) - .addValue("skip_fencing_if_sd_active", group.getFencingPolicy().isSkipFencingIfSDActive()); + .addValue("skip_fencing_if_sd_active", group.getFencingPolicy().isSkipFencingIfSDActive()) + .addValue("skip_fencing_if_connectivity_broken", group.getFencingPolicy().isSkipFencingIfConnectivityBroken()) + .addValue("hosts_with_broken_connectivity_threshold", group.getFencingPolicy().getHostsWithBrokenConnectivityThreshold()); return parameterSource; } @@ -291,6 +293,8 @@ entity.setSerialNumberPolicy(SerialNumberPolicy.forValue((Integer) rs.getObject("serial_number_policy"))); entity.setCustomSerialNumber(rs.getString("custom_serial_number")); entity.getFencingPolicy().setSkipFencingIfSDActive(rs.getBoolean("skip_fencing_if_sd_active")); + entity.getFencingPolicy().setSkipFencingIfConnectivityBroken(rs.getBoolean("skip_fencing_if_connectivity_broken")); + entity.getFencingPolicy().setHostsWithBrokenConnectivityThreshold(rs.getInt("hosts_with_broken_connectivity_threshold")); return entity; } diff --git a/backend/manager/modules/dal/src/main/resources/bundles/AuditLogMessages.properties b/backend/manager/modules/dal/src/main/resources/bundles/AuditLogMessages.properties index 0dcf127..c621aa0 100644 --- a/backend/manager/modules/dal/src/main/resources/bundles/AuditLogMessages.properties +++ b/backend/manager/modules/dal/src/main/resources/bundles/AuditLogMessages.properties @@ -617,6 +617,7 @@ VDS_ALERT_PM_HEALTH_CHECK_FAILED_FOR_SEQ_PRIMARY_AGENT=Health check failed on Host ${VdsName} primary sequential agent, future fence operations may fail is secondary agent if not defined properly. VDS_ALERT_PM_HEALTH_CHECK_FAILED_FOR_CON_SECONDARY_AGENT=Health check failed on Host ${VdsName} secondary concurrent agent, future fence operations may fail on this Host. VDS_ALERT_PM_HEALTH_CHECK_FAILED_FOR_SEQ_SECONDARY_AGENT=Health check failed on Host ${VdsName} secondary sequential agent, future fence operations may fail is primary agent if not defined properly. +VDS_ALERT_FENCE_OPERATION_SKIPPED_BROKEN_CONNECTIVITY=Host ${VdsName} became non responsive and was not restarted due to Fencing Policy: ${Percents} percents of the Hosts in the Cluster have connectivity issues. VDS_HOST_NOT_RESPONDING_CONNECTING=Host ${VdsName} is not responding. It will stay in Connecting state for a grace period of ${Seconds} seconds and after that an attempt to fence the host will be issued. TASK_STOPPING_ASYNC_TASK=Stopping async task ${CommandName} that started at ${Date} REFRESH_REPOSITORY_IMAGE_LIST_FAILED=Refresh image list failed for domain(s): ${imageDomains}. Please check domain activity. diff --git a/backend/manager/modules/dal/src/test/resources/fixtures.xml b/backend/manager/modules/dal/src/test/resources/fixtures.xml index 2f218e3..f02e928 100644 --- a/backend/manager/modules/dal/src/test/resources/fixtures.xml +++ b/backend/manager/modules/dal/src/test/resources/fixtures.xml @@ -669,6 +669,8 @@ <column>custom_serial_number</column> <column>required_rng_sources</column> <column>skip_fencing_if_sd_active</column> + <column>skip_fencing_if_connectivity_broken</column> + <column>hosts_with_broken_connectivity_threshold</column> <row> <value>b399944a-81ab-4ec5-8266-e19ba7c3c9d1</value> <value>rhel6.iscsi</value> @@ -695,6 +697,8 @@ <null /> <value>RANDOM</value> <value>false</value> + <value>false</value> + <value>50</value> </row> <row> <value>b399944a-81ab-4ec5-8266-e19ba7c3c9d2</value> @@ -722,6 +726,8 @@ <null /> <value></value> <value>true</value> + <value>false</value> + <value>50</value> </row> <row> <value>b399944a-81ab-4ec5-8266-e19ba7c3c9d3</value> @@ -749,6 +755,8 @@ <null /> <value>HWRNG</value> <value>false</value> + <value>false</value> + <value>50</value> </row> <row> <value>0e57070e-2469-4b38-84a2-f111aaabd49d</value> @@ -776,6 +784,8 @@ <value>my custom serial number</value> <value>HWRNG,RANDOM</value> <value>true</value> + <value>false</value> + <value>50</value> </row> <row> <value>eba797fb-8e3b-4777-b63c-92e7a5957d7c</value> @@ -803,6 +813,8 @@ <null /> <value>HWRNG,RANDOM</value> <value>false</value> + <value>false</value> + <value>50</value> </row> <row> <value>eba797fb-8e3b-4777-b63c-92e7a5957d7d</value> @@ -830,6 +842,8 @@ <null /> <value>HWRNG,RANDOM</value> <value>false</value> + <value>false</value> + <value>50</value> </row> <row> <value>eba797fb-8e3b-4777-b63c-92e7a5957d7e</value> @@ -857,6 +871,8 @@ <null /> <value>RANDOM</value> <value>true</value> + <value>false</value> + <value>50</value> </row> <row> <value>eba797fb-8e3b-4777-b63c-92e7a5957d7f</value> @@ -884,6 +900,8 @@ <null /> <value>RANDOM</value> <value>false</value> + <value>false</value> + <value>50</value> </row> <row> <value>ae956031-6be2-43d6-bb8f-5191c9253314</value> @@ -911,6 +929,8 @@ <null /> <value>RANDOM</value> <value>true</value> + <value>true</value> + <value>50</value> </row> </table> diff --git a/packaging/dbscripts/upgrade/03_05_0900_skip_fencing_when_connectivity_broken.sql b/packaging/dbscripts/upgrade/03_05_0900_skip_fencing_when_connectivity_broken.sql new file mode 100644 index 0000000..dc7f07a --- /dev/null +++ b/packaging/dbscripts/upgrade/03_05_0900_skip_fencing_when_connectivity_broken.sql @@ -0,0 +1,2 @@ +SELECT fn_db_add_column('vds_groups', 'skip_fencing_if_connectivity_broken', 'boolean DEFAULT false'); +SELECT fn_db_add_column('vds_groups', 'hosts_with_broken_connectivity_threshold', 'smallint DEFAULT 50'); diff --git a/packaging/dbscripts/vds_groups_sp.sql b/packaging/dbscripts/vds_groups_sp.sql index 8831824..aae26fe 100644 --- a/packaging/dbscripts/vds_groups_sp.sql +++ b/packaging/dbscripts/vds_groups_sp.sql @@ -37,17 +37,19 @@ v_serial_number_policy SMALLINT, v_custom_serial_number VARCHAR(255), v_required_rng_sources varchar(255), - v_skip_fencing_if_sd_active BOOLEAN + v_skip_fencing_if_sd_active BOOLEAN, + v_skip_fencing_if_connectivity_broken BOOLEAN, + v_hosts_with_broken_connectivity_threshold SMALLINT ) RETURNS VOID AS $procedure$ BEGIN INSERT INTO vds_groups(vds_group_id,description, name, free_text_comment, cpu_name, storage_pool_id, max_vds_memory_over_commit, count_threads_as_cores, compatibility_version, transparent_hugepages, migrate_on_error, virt_service, gluster_service, tunnel_migration, emulated_machine, detect_emulated_machine, trusted_service, ha_reservation, optional_reason, cluster_policy_id, - cluster_policy_custom_properties, enable_balloon, architecture, optimization_type, spice_proxy, enable_ksm, serial_number_policy, custom_serial_number, required_rng_sources, skip_fencing_if_sd_active) + cluster_policy_custom_properties, enable_balloon, architecture, optimization_type, spice_proxy, enable_ksm, serial_number_policy, custom_serial_number, required_rng_sources, skip_fencing_if_sd_active, skip_fencing_if_connectivity_broken, hosts_with_broken_connectivity_threshold) VALUES(v_vds_group_id,v_description, v_name, v_free_text_comment, v_cpu_name, v_storage_pool_id, v_max_vds_memory_over_commit, v_count_threads_as_cores, v_compatibility_version, v_transparent_hugepages, v_migrate_on_error, v_virt_service, v_gluster_service, v_tunnel_migration, v_emulated_machine, v_detect_emulated_machine, v_trusted_service, v_ha_reservation, v_optional_reason, v_cluster_policy_id, v_cluster_policy_custom_properties, v_enable_balloon, - v_architecture, v_optimization_type, v_spice_proxy, v_enable_ksm, v_serial_number_policy, v_custom_serial_number, v_required_rng_sources, v_skip_fencing_if_sd_active); + v_architecture, v_optimization_type, v_spice_proxy, v_enable_ksm, v_serial_number_policy, v_custom_serial_number, v_required_rng_sources, v_skip_fencing_if_sd_active, v_skip_fencing_if_connectivity_broken, v_hosts_with_broken_connectivity_threshold); END; $procedure$ LANGUAGE plpgsql; @@ -84,7 +86,9 @@ v_serial_number_policy SMALLINT, v_custom_serial_number VARCHAR(255), v_required_rng_sources varchar(255), - v_skip_fencing_if_sd_active BOOLEAN + v_skip_fencing_if_sd_active BOOLEAN, + v_skip_fencing_if_connectivity_broken BOOLEAN, + v_hosts_with_broken_connectivity_threshold SMALLINT ) RETURNS VOID @@ -104,7 +108,9 @@ optimization_type = v_optimization_type, spice_proxy = v_spice_proxy, enable_ksm = v_enable_ksm, serial_number_policy = v_serial_number_policy, custom_serial_number = v_custom_serial_number, required_rng_sources = v_required_rng_sources, - skip_fencing_if_sd_active = v_skip_fencing_if_sd_active + skip_fencing_if_sd_active = v_skip_fencing_if_sd_active, + skip_fencing_if_connectivity_broken = v_skip_fencing_if_connectivity_broken, + hosts_with_broken_connectivity_threshold = v_hosts_with_broken_connectivity_threshold WHERE vds_group_id = v_vds_group_id; END; $procedure$ LANGUAGE plpgsql; -- To view, visit http://gerrit.ovirt.org/31615 To unsubscribe, visit http://gerrit.ovirt.org/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I7a9c7db43b50421414ce9596137767b00cbfc2ae Gerrit-PatchSet: 1 Gerrit-Project: ovirt-engine Gerrit-Branch: ovirt-engine-3.5 Gerrit-Owner: Eli Mesika <[email protected]> _______________________________________________ Engine-patches mailing list [email protected] http://lists.ovirt.org/mailman/listinfo/engine-patches
