Greg Padgett has uploaded a new change for review.

Change subject: agent: enable global maintenance mode
......................................................................

agent: enable global maintenance mode

Setting the maintenance flag ("maintenance=1") in the cluster-wide
shared metadata block will now cause all aware HA agents to ignore the
HA VM state, allowing the admin to perform arbitrary operations on/with
the VM.

To facilitate setting/unsetting maintenance mode, the HAClient class has
a new method:

  HAClient.set_global_md_flag('maintenance', <true|false>)

The boolean argument can be anything typically parsed as a boolean,
such as True, 'true', 'yes', 1, False, 'false', 'no', 0.

Change-Id: I796d768e197c5fe73646e67297714392bc6e8201
Signed-off-by: Greg Padgett <[email protected]>
---
M doc/agent-fsm.gv
M ovirt_hosted_engine_ha/agent/hosted_engine.py
M ovirt_hosted_engine_ha/broker/constants.py.in
M ovirt_hosted_engine_ha/broker/storage_broker.py
M ovirt_hosted_engine_ha/client/client.py
M ovirt_hosted_engine_ha/env/constants.py.in
M ovirt_hosted_engine_ha/lib/metadata.py
7 files changed, 155 insertions(+), 33 deletions(-)


  git pull ssh://gerrit.ovirt.org:29418/ovirt-hosted-engine-ha 
refs/changes/11/19711/1

diff --git a/doc/agent-fsm.gv b/doc/agent-fsm.gv
index 6109d8c..8100c0d 100644
--- a/doc/agent-fsm.gv
+++ b/doc/agent-fsm.gv
@@ -20,6 +20,7 @@
     OFF -> OFF [ label = "VM down locally" ];
     OFF -> ON [ label = "VM unexpectedly  \nrunning locally  " ];
     OFF -> START [ label = "VM down  \nglobally, host has  \nhighest score  " 
];
+    OFF -> MAINTENANCE [ label = "Maintenance\nmode enabled" ];
 
     START -> OFF [ label = "VM startup  \nfailed  " ];
     START -> ON [ label = "VM powered on" ];
@@ -28,6 +29,7 @@
     ON -> OFF [ label = "VM died  \nunexpectedly  " ];
     ON -> MIGRATE [ label = "VM up locally,  \nother host has  \nmuch better 
score  " ];
     ON -> STOP [ label = "VM timed out  \nwith bad health  \nstatus  " ];
+    ON -> MAINTENANCE [ label = "Maintenance\nmode enabled" ];
 
     STOP -> STOP [ label = "VM shutdown\nin progress" ];
     STOP -> OFF [ label = "VM shutdown\nsuccessful" ];
@@ -36,5 +38,7 @@
     MIGRATE -> MIGRATE [ label = "VM migration\nin progress" ];
     MIGRATE -> OFF [ label = "VM migration\nsuccessful" ];
     MIGRATE -> STOP [ label = "VM migration\nfailed" ];
-}
 
+    MAINTENANCE -> MAINTENANCE [ label = "Maintenance\nmode enabled" ];
+    MAINTENANCE -> ENTRY [ label = "Maintenance\nmode disabled" ];
+}
diff --git a/ovirt_hosted_engine_ha/agent/hosted_engine.py 
b/ovirt_hosted_engine_ha/agent/hosted_engine.py
index b54ad8d..0733156 100644
--- a/ovirt_hosted_engine_ha/agent/hosted_engine.py
+++ b/ovirt_hosted_engine_ha/agent/hosted_engine.py
@@ -54,6 +54,9 @@
     LF_HOST_UPDATE = 'LF_HOST_UPDATE'
     LF_HOST_UPDATE_DETAIL = 'LF_HOST_UPDATE_DETAIL'
     LF_ENGINE_HEALTH = 'LF_ENGINE_HEALTH'
+    LF_GLOBAL_MD_ERROR = 'LF_GLOBAL_MD_ERROR'
+    LF_GLOBAL_MD_UPDATE_DETAIL = 'LF_GLOBAL_MD_UPDATE_DETAIL'
+    LF_MAINTENANCE = 'LF_MAINTENANCE'
 
     MIGRATION_THRESHOLD_SCORE = 800
 
@@ -71,6 +74,7 @@
         ON = 'ON'
         STOP = 'STOP'
         MIGRATE = 'MIGRATE'
+        MAINTENANCE = 'MAINTENANCE'
 
     class MigrationStatus(object):
         PENDING = 'PENDING'
@@ -97,6 +101,7 @@
         self._rinfo = {}
         self._init_runtime_info()
         self._all_host_stats = {}
+        self._global_stats = {}
 
         self._sd_path = None
         self._metadata_path = None
@@ -110,6 +115,7 @@
             self.States.ON: self._handle_on,
             self.States.STOP: self._handle_stop,
             self.States.MIGRATE: self._handle_migrate,
+            self.States.MAINTENANCE: self._handle_maintenance,
         }
 
     def _get_required_monitors(self):
@@ -547,6 +553,22 @@
             self._metadata_dir,
             constants.SERVICE_TYPE)
         local_ts = time.time()
+
+        # host_id 0 is a special case, representing global metadata
+        data = all_stats.pop('0', None)
+        md = {}
+        if data is not None:
+            try:
+                md = metadata.parse_global_metadata_to_dict(self._log, data)
+            except ex.MetadataError as e:
+                self._log.error(
+                    str(e),
+                    extra=self._get_lf_args(self.LF_GLOBAL_MD_ERROR))
+        self._log.info(
+            'Global metadata: {0}'.format(md),
+            extra=self._get_lf_args(self.LF_GLOBAL_MD_UPDATE_DETAIL))
+        self._global_stats = md
+
         for host_str, data in all_stats.iteritems():
             try:
                 md = metadata.parse_metadata_to_dict(host_str, data)
@@ -651,8 +673,7 @@
                 rinfo['best-score'] = stats['score']
                 rinfo['best-score-host-id'] = host_id
 
-        # FIXME set maintenance flag
-        rinfo['maintenance'] = False
+        rinfo['maintenance'] = self._global_stats.get('maintenance', False)
 
         self._rinfo.update(rinfo)
 
@@ -684,6 +705,7 @@
         ENTRY state.  Determine current vm state and switch appropriately.
         """
         local_host_id = self._rinfo['host-id']
+        self._log.info("Determining initial state for host")
         if self._all_host_stats[local_host_id]['engine-status'][:5] == 'vm-up':
             return self.States.ON, False
         else:
@@ -713,7 +735,9 @@
 
         # FIXME remote db down, other statuses
 
-        # FIXME cluster-wide engine maintenance bit
+        if self._rinfo['maintenance']:
+            self._log.info("HA maintenance enabled")
+            return self.States.MAINTENANCE, True
 
         if self._rinfo['best-score-host-id'] != local_host_id:
             self._log.info("Engine down, local host does not have best score",
@@ -782,7 +806,9 @@
             self._log.error("Engine vm unexpectedly running on other host")
             return self.States.OFF, True
 
-        # FIXME maintenance bit should cause transition to STOP
+        if self._rinfo['maintenance']:
+            self._log.info("HA maintenance enabled")
+            return self.States.MAINTENANCE, True
 
         best_host_id = self._rinfo['best-score-host-id']
         if (best_host_id != local_host_id
@@ -949,3 +975,15 @@
             del self._rinfo['migration-host-id']
         if 'migration-status' in self._rinfo:
             del self._rinfo['migration-status']
+
+    def _handle_maintenance(self):
+        """
+        MAINTENANCE state.  Allow arbitrary HA VM state while in maintenance
+        mode (i.e. ignore it), and re-init in ENTRY state once complete.
+        """
+        if self._rinfo['maintenance']:
+            self._log.info("HA maintenance enabled",
+                           extra=self._get_lf_args(self.LF_MAINTENANCE))
+            return self.States.MAINTENANCE, True
+        else:
+            return self.States.ENTRY, False
diff --git a/ovirt_hosted_engine_ha/broker/constants.py.in 
b/ovirt_hosted_engine_ha/broker/constants.py.in
index 1ec3fff..a1efeae 100644
--- a/ovirt_hosted_engine_ha/broker/constants.py.in
+++ b/ovirt_hosted_engine_ha/broker/constants.py.in
@@ -34,7 +34,3 @@
 VDSM_GROUP = '@VDSM_GROUP@'
 
 HOSTED_ENGINE_BINARY = '@ENGINE_SETUP_BINDIR@/hosted-engine'
-
-MD_EXTENSION = 'metadata'
-HOST_SEGMENT_BYTES = 4096
-MAX_HOST_ID_SCAN = 64
diff --git a/ovirt_hosted_engine_ha/broker/storage_broker.py 
b/ovirt_hosted_engine_ha/broker/storage_broker.py
index 8a1e019..d9ba219 100644
--- a/ovirt_hosted_engine_ha/broker/storage_broker.py
+++ b/ovirt_hosted_engine_ha/broker/storage_broker.py
@@ -23,7 +23,7 @@
 import os
 import threading
 
-from . import constants
+from ..env import constants
 from ..lib.exceptions import RequestError
 from ..lib import util
 
diff --git a/ovirt_hosted_engine_ha/client/client.py 
b/ovirt_hosted_engine_ha/client/client.py
index e9b703b..7c6b436 100644
--- a/ovirt_hosted_engine_ha/client/client.py
+++ b/ovirt_hosted_engine_ha/client/client.py
@@ -40,11 +40,7 @@
         self._log = logging.getLogger("HAClient")
         self._config = None
 
-    def get_all_host_stats(self):
-        """
-        Connects to HA broker, reads stats for all hosts, and returns
-        them in a dictionary as {host_id: = {key: value, ...}}
-        """
+    def get_all_stats(self, mode='all'):
         if self._config is None:
             self._config = config.Config()
         broker = brokerlink.BrokerLink()
@@ -53,35 +49,84 @@
                 path.get_metadata_path(self._config),
                 constants.SERVICE_TYPE)
 
-        output = {}
-        for host_str, data in stats.iteritems():
-            try:
-                md = metadata.parse_metadata_to_dict(host_str, data)
-            except MetadataError as e:
-                self._log.error(str(e))
-                continue
-            else:
-                output[md['host-id']] = md
-        return output
+        return self._parse_stats(stats, mode)
 
-    def get_all_host_stats_direct(self, dom_path, service_type):
-        """
-        Connects to HA broker, reads stats for all hosts, and returns
-        them in a dictionary as {host_id: = {key: value, ...}}
-        """
+    def get_all_stats_direct(self, dom_path, service_type, mode='all'):
         from ..broker import storage_broker
 
         sb = storage_broker.StorageBroker()
         path = os.path.join(dom_path, constants.SD_METADATA_DIR)
         stats = sb.get_raw_stats_for_service_type(path, service_type)
 
+        return self._parse_stats(stats, mode)
+
+    def _parse_stats(self, stats, mode):
         output = {}
         for host_str, data in stats.iteritems():
             try:
-                md = metadata.parse_metadata_to_dict(host_str, data)
+                if host_str == '0' and mode != 'host':
+                    md = metadata.parse_global_metadata_to_dict(self._log,
+                                                                data)
+                    output[0] = md
+                elif host_str != '0' and mode != 'global':
+                    md = metadata.parse_metadata_to_dict(host_str, data)
+                    output[md['host-id']] = md
+                else:
+                    continue
             except MetadataError as e:
                 self._log.error(str(e))
                 continue
-            else:
-                output[md['host-id']] = md
         return output
+
+    def get_all_host_stats(self):
+        """
+        Connects to HA broker, reads stats for all hosts, and returns
+        them in a dictionary as {host_id: = {key: value, ...}}
+        """
+        return self.get_all_stats('host')
+
+    def get_all_host_stats_direct(self, dom_path, service_type):
+        """
+        Directly accessing storage, reads stats for all hosts, and returns
+        them in a dictionary as {host_id: = {key: value, ...}}
+        """
+        return self.get_all_stats_direct(dom_path, service_type, 'host')
+
+    def set_global_md_flag(self, flag, value):
+        """
+        Connects to HA broker and sets flags in global metadata, leaving
+        any other flags unaltered.
+        """
+        try:
+            transform_fn = metadata.global_flags[flag]
+        except:
+            raise Exception('Unknown metadata flag: {0}'.format(flag))
+        if transform_fn:
+            put_val = transform_fn(value)
+        else:
+            put_val = value
+
+        if self._config is None:
+            self._config = config.Config()
+
+        broker = brokerlink.BrokerLink()
+        with broker.connection():
+            all_stats = broker.get_stats_from_storage(
+                path.get_metadata_path(self._config),
+                constants.SERVICE_TYPE)
+
+            global_stats = all_stats.get('0')
+            if global_stats and len(global_stats):
+                md_dict = metadata.parse_global_metadata_to_dict(self._log,
+                                                                 global_stats)
+            else:
+                md_dict = {}
+
+            md_dict[flag] = put_val
+            block = metadata.create_global_metadata_from_dict(self._log,
+                                                              md_dict)
+            broker.put_stats_on_storage(
+                path.get_metadata_path(self._config),
+                constants.SERVICE_TYPE,
+                0,
+                block)
diff --git a/ovirt_hosted_engine_ha/env/constants.py.in 
b/ovirt_hosted_engine_ha/env/constants.py.in
index 13e6694..ce965d9 100644
--- a/ovirt_hosted_engine_ha/env/constants.py.in
+++ b/ovirt_hosted_engine_ha/env/constants.py.in
@@ -22,6 +22,9 @@
 METADATA_FEATURE_VERSION = 1
 METADATA_PARSE_VERSION = 1
 
+MD_EXTENSION = 'metadata'
+MAX_HOST_ID_SCAN = 64
+HOST_SEGMENT_BYTES = 4096
 METADATA_BLOCK_BYTES = 512
 SERVICE_TYPE = 'hosted-engine'
 
diff --git a/ovirt_hosted_engine_ha/lib/metadata.py 
b/ovirt_hosted_engine_ha/lib/metadata.py
index f0746f7..8e6d357 100644
--- a/ovirt_hosted_engine_ha/lib/metadata.py
+++ b/ovirt_hosted_engine_ha/lib/metadata.py
@@ -18,7 +18,17 @@
 #
 
 from ..env import constants
+from ..lib import util
 from exceptions import MetadataError
+
+
+def to_bool_rep(value):
+    return 1 if util.to_bool(value) else 0
+
+
+global_flags = {
+    'maintenance': to_bool_rep,
+}
 
 
 def parse_metadata_to_dict(host_str, data):
@@ -67,3 +77,29 @@
         ret['extra'] = extra
 
     return ret
+
+
+def parse_global_metadata_to_dict(log, data):
+    if len(data) < constants.HOST_SEGMENT_BYTES:
+        raise MetadataError("Malformed global metadata:"
+                            " received {0} of {1} expected bytes"
+                            .format(len(data), constants.HOST_SEGMENT_BYTES))
+
+    ret = {}
+    tokens = data[:constants.HOST_SEGMENT_BYTES].rstrip('\0').split('\n')
+    for token in tokens:
+        k, v = token.split('=')
+        if k == 'maintenance':
+            ret['maintenance'] = util.to_bool(v)
+        else:
+            log.error("Invalid global metadata key: {0}".format(token))
+
+    return ret
+
+
+def create_global_metadata_from_dict(log, md_dict):
+    block = ''
+    for k, v in md_dict.iteritems():
+        block += '{k}={v}'.format(k=k, v=v)
+    block = block.ljust(constants.HOST_SEGMENT_BYTES, '\0')
+    return block


-- 
To view, visit http://gerrit.ovirt.org/19711
To unsubscribe, visit http://gerrit.ovirt.org/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I796d768e197c5fe73646e67297714392bc6e8201
Gerrit-PatchSet: 1
Gerrit-Project: ovirt-hosted-engine-ha
Gerrit-Branch: master
Gerrit-Owner: Greg Padgett <[email protected]>
_______________________________________________
Engine-patches mailing list
[email protected]
http://lists.ovirt.org/mailman/listinfo/engine-patches

Reply via email to