This is an automated email from the ASF dual-hosted git repository.

msingh pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/trunk by this push:
     new 13aa939  HDDS-997. Add blockade Tests for scm isolation and mixed node 
isolation. Contributed by Nilotpal Nandi.
13aa939 is described below

commit 13aa93922458af2ba9e6db1454d5034305b682b9
Author: Mukul Kumar Singh <msi...@apache.org>
AuthorDate: Fri Feb 1 13:10:08 2019 +0530

    HDDS-997. Add blockade Tests for scm isolation and mixed node isolation. 
Contributed by Nilotpal Nandi.
---
 hadoop-ozone/dist/src/main/blockade/README.md      |  20 +++-
 .../src/main/blockade/blockadeUtils/blockade.py    |  16 ++-
 .../main/blockade/clusterUtils/cluster_utils.py    |  86 +++++++++------
 hadoop-ozone/dist/src/main/blockade/conftest.py    |  32 ++++--
 .../blockade/test_blockade_datanode_isolation.py   |  51 ++++++---
 .../dist/src/main/blockade/test_blockade_flaky.py  |  10 +-
 .../main/blockade/test_blockade_mixed_failure.py   | 116 +++++++++++++++++++++
 ...isolation.py => test_blockade_scm_isolation.py} |  67 +++++++-----
 .../main/compose/ozoneblockade/docker-compose.yaml |  49 +++++++++
 .../src/main/compose/ozoneblockade/docker-config   |  77 ++++++++++++++
 10 files changed, 437 insertions(+), 87 deletions(-)

diff --git a/hadoop-ozone/dist/src/main/blockade/README.md 
b/hadoop-ozone/dist/src/main/blockade/README.md
index 20bdc83..9ece997 100644
--- a/hadoop-ozone/dist/src/main/blockade/README.md
+++ b/hadoop-ozone/dist/src/main/blockade/README.md
@@ -18,9 +18,27 @@ Following python packages need to be installed before 
running the tests :
 1. blockade
 2. pytest==2.8.7
 
-You can execute the tests with following command-lines:
+You can execute all blockade tests with following command-lines:
 
 ```
 cd $DIRECTORY_OF_OZONE
 python -m pytest -s  blockade/
 ```
+
+You can also execute fewer blockade tests with following command-lines:
+
+```
+cd $DIRECTORY_OF_OZONE
+python -m pytest -s  blockade/<PATH_TO_PYTHON_FILE>
+e.g: python -m pytest -s blockade/test_blockade_datanode_isolation.py
+```
+
+You can change the default 'sleep' interval in the tests with following
+command-lines:
+
+```
+cd $DIRECTORY_OF_OZONE
+python -m pytest -s  blockade/ --containerStatusSleep=<SECONDS>
+
+e.g: python -m pytest -s  blockade/ --containerStatusSleep=720
+```
\ No newline at end of file
diff --git a/hadoop-ozone/dist/src/main/blockade/blockadeUtils/blockade.py 
b/hadoop-ozone/dist/src/main/blockade/blockadeUtils/blockade.py
index 432562e..c3d1bbb 100644
--- a/hadoop-ozone/dist/src/main/blockade/blockadeUtils/blockade.py
+++ b/hadoop-ozone/dist/src/main/blockade/blockadeUtils/blockade.py
@@ -30,6 +30,7 @@ class Blockade(object):
 
     @classmethod
     def blockade_destroy(cls):
+        logger.info("Running blockade destroy")
         call(["blockade", "destroy"])
 
     @classmethod
@@ -68,13 +69,16 @@ class Blockade(object):
         nodes = ""
         for node_list in args:
             nodes = nodes + ','.join(node_list) + " "
-        exit_code, output = ClusterUtils.run_cmd("blockade partition %s" % 
nodes)
-        assert exit_code == 0, "blockade partition command failed with exit 
code=[%s]" % output
+        exit_code, output = \
+            ClusterUtils.run_cmd("blockade partition %s" % nodes)
+        assert exit_code == 0, \
+            "blockade partition command failed with exit code=[%s]" % output
 
     @classmethod
     def blockade_join(cls):
         output = call(["blockade", "join"])
-        assert output == 0, "blockade join command failed with exit code=[%s]" 
% output
+        assert output == 0, "blockade join command failed with exit code=[%s]" 
\
+                            % output
 
     @classmethod
     def blockade_stop(cls, node, all_nodes=False):
@@ -82,7 +86,8 @@ class Blockade(object):
             output = call(["blockade", "stop", "--all"])
         else:
             output = call(["blockade", "stop", node])
-        assert output == 0, "blockade stop command failed with exit code=[%s]" 
% output
+        assert output == 0, "blockade stop command failed with exit code=[%s]" 
\
+                            % output
 
     @classmethod
     def blockade_start(cls, node, all_nodes=False):
@@ -90,4 +95,5 @@ class Blockade(object):
             output = call(["blockade", "start", "--all"])
         else:
             output = call(["blockade", "start", node])
-        assert output == 0, "blockade start command failed with exit 
code=[%s]" % output
\ No newline at end of file
+        assert output == 0, "blockade start command failed with " \
+                            "exit code=[%s]" % output
\ No newline at end of file
diff --git a/hadoop-ozone/dist/src/main/blockade/clusterUtils/cluster_utils.py 
b/hadoop-ozone/dist/src/main/blockade/clusterUtils/cluster_utils.py
index 26342c7..f590f77 100644
--- a/hadoop-ozone/dist/src/main/blockade/clusterUtils/cluster_utils.py
+++ b/hadoop-ozone/dist/src/main/blockade/clusterUtils/cluster_utils.py
@@ -15,7 +15,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""This module has apis to create and remove a blockade cluster"""
 
 from subprocess import call
 import subprocess
@@ -36,11 +35,13 @@ class ClusterUtils(object):
         logger.info("compose file :%s", docker_compose_file)
         logger.info("number of DNs :%d", datanode_count)
         call(["docker-compose", "-f", docker_compose_file, "down"])
-        call(["docker-compose", "-f", docker_compose_file, "up", "-d", 
"--scale", "datanode=" + str(datanode_count)])
+        call(["docker-compose", "-f", docker_compose_file, "up", "-d",
+              "--scale", "datanode=" + str(datanode_count)])
 
         logger.info("Waiting 30s for cluster start up...")
         time.sleep(30)
-        output = subprocess.check_output(["docker-compose", "-f", 
docker_compose_file, "ps"])
+        output = subprocess.check_output(["docker-compose", "-f",
+                                          docker_compose_file, "ps"])
         output_array = output.split("\n")[2:-1]
 
         container_list = []
@@ -51,23 +52,31 @@ class ClusterUtils(object):
             time.sleep(2)
 
         assert container_list, "no container found!"
-        logger.info("blockade created with containers %s", ' 
'.join(container_list))
+        logger.info("blockade created with containers %s",
+                    ' '.join(container_list))
 
         return container_list
 
     @classmethod
     def cluster_destroy(cls, docker_compose_file):
+        logger.info("Running docker-compose -f %s down", docker_compose_file)
         call(["docker-compose", "-f", docker_compose_file, "down"])
 
     @classmethod
-    def run_freon(cls, docker_compose_file, num_volumes, num_buckets, 
num_keys, key_size,
-                  replication_type, replication_factor):
+    def run_freon(cls, docker_compose_file, num_volumes, num_buckets,
+                  num_keys, key_size, replication_type, replication_factor):
         # run freon
-        cmd = "docker-compose -f %s exec ozoneManager /opt/hadoop/bin/ozone 
freon rk " \
-              "--numOfVolumes %s --numOfBuckets %s --numOfKeys %s --keySize %s 
" \
-              "--replicationType %s --factor %s" % (docker_compose_file, 
num_volumes,
-                                                    num_buckets, num_keys, 
key_size, replication_type,
-                                                    replication_factor)
+        cmd = "docker-compose -f %s " \
+              "exec ozoneManager /opt/hadoop/bin/ozone " \
+              "freon rk " \
+              "--numOfVolumes %s " \
+              "--numOfBuckets %s " \
+              "--numOfKeys %s " \
+              "--keySize %s " \
+              "--replicationType %s " \
+              "--factor %s" % (docker_compose_file, num_volumes,
+                               num_buckets, num_keys, key_size,
+                               replication_type, replication_factor)
         exit_code, output = cls.run_cmd(cmd)
         return exit_code, output
 
@@ -78,7 +87,8 @@ class ClusterUtils(object):
             command = ' '.join(cmd)
         logger.info(" RUNNING: " + command)
         all_output = ""
-        myprocess = subprocess.Popen(cmd, stdout=subprocess.PIPE, 
stderr=subprocess.STDOUT, shell=True)
+        myprocess = subprocess.Popen(cmd, stdout=subprocess.PIPE,
+                                     stderr=subprocess.STDOUT, shell=True)
         while myprocess.poll() is None:
             op = myprocess.stdout.readline()
             if op:
@@ -97,38 +107,49 @@ class ClusterUtils(object):
 
     @classmethod
     def get_ozone_confkey_value(cls, docker_compose_file, key_name):
-        cmd = "docker-compose -f %s exec ozoneManager /opt/hadoop/bin/ozone 
getconf -confKey %s" \
-              %(docker_compose_file, key_name)
+        cmd = "docker-compose -f %s " \
+              "exec ozoneManager /opt/hadoop/bin/ozone " \
+              "getconf -confKey %s" \
+              % (docker_compose_file, key_name)
         exit_code, output = cls.run_cmd(cmd)
-        assert exit_code == 0, "getconf of key=[%s] failed with output=[%s]" 
%(key_name, output)
-        return output
+        assert exit_code == 0, "getconf of key=[%s] failed with output=[%s]" \
+                               %(key_name, output)
+        return str(output).strip()
 
     @classmethod
     def find_scm_uuid(cls, docker_compose_file):
         """
         This function returns scm uuid.
         """
-        ozone_metadata_dir = cls.get_ozone_confkey_value(docker_compose_file, 
"ozone.metadata.dirs")
-        cmd = "docker-compose -f %s exec scm cat %s/scm/current/VERSION" % 
(docker_compose_file, ozone_metadata_dir)
+        ozone_metadata_dir = cls.get_ozone_confkey_value(docker_compose_file,
+                                                         "ozone.metadata.dirs")
+        cmd = "docker-compose -f %s exec scm cat %s/scm/current/VERSION" % \
+              (docker_compose_file, ozone_metadata_dir)
         exit_code, output = cls.run_cmd(cmd)
         assert exit_code == 0, "get scm UUID failed with output=[%s]" % output
         output_list = output.split("\n")
-        output_list = list(filter(lambda x: re.search("\w+=\w+", x), 
output_list))
+        output_list = list(filter(lambda x: re.search("\w+=\w+", x),
+                                  output_list))
         output_dict = dict(map(lambda x: x.split("="), output_list))
         return str(output_dict['scmUuid']).strip()
 
     @classmethod
-    def find_datanode_container_status(cls, docker_compose_file, 
datanode_index):
+    def find_datanode_container_status(cls, docker_compose_file,
+                                       datanode_index):
         """
         This function returns the datanode's container replica state.
         """
-        datanode_dir = cls.get_ozone_confkey_value(docker_compose_file, 
"hdds.datanode.dir")
+        datanode_dir = cls.get_ozone_confkey_value(docker_compose_file,
+                                                   "hdds.datanode.dir")
         scm_uuid = cls.find_scm_uuid(docker_compose_file)
-        container_parent_path = "%s/hdds/%s/current/containerDir0" 
%(datanode_dir, scm_uuid)
-        cmd = "docker-compose -f %s exec --index=%s datanode find %s -type f 
-name '*.container'" \
+        container_parent_path = "%s/hdds/%s/current/containerDir0" % \
+                                (datanode_dir, scm_uuid)
+        cmd = "docker-compose -f %s exec --index=%s datanode find %s -type f " 
\
+              "-name '*.container'" \
               % (docker_compose_file, datanode_index, container_parent_path)
         exit_code, output = cls.run_cmd(cmd)
-        assert exit_code == 0, "command=[%s] failed with output=[%s]" % (cmd, 
output)
+        assert exit_code == 0, "command=[%s] failed with output=[%s]" % \
+                               (cmd, output)
         assert output, "No container info present"
         container_list = map(str.strip, output.split("\n"))
         container_state = None
@@ -136,9 +157,12 @@ class ClusterUtils(object):
             cmd = "docker-compose -f %s exec --index=%s datanode cat %s" \
                   % (docker_compose_file, datanode_index, container_path)
             exit_code, output = cls.run_cmd(cmd)
-            assert exit_code == 0, "command=[%s] failed with output=[%s]" % 
(cmd, output)
+            assert exit_code == 0, "command=[%s] failed with output=[%s]" % \
+                                   (cmd, output)
             container_db_list = output.split("\n")
-            container_db_list = list(filter(lambda x: re.search("\w+:\s\w+", 
x), container_db_list))
+            container_db_list = \
+                list(filter(lambda x: re.search("\w+:\s\w+", x),
+                            container_db_list))
             container_db_info = "\n".join(container_db_list)
             container_db_dict = yaml.load(container_db_info)
             for key, value in container_db_dict.items():
@@ -146,7 +170,8 @@ class ClusterUtils(object):
             if not container_state:
                 container_state = container_db_dict['state']
             else:
-                assert container_db_dict['state'] == container_state, "all 
containers are not in same state"
+                assert container_db_dict['state'] == container_state, \
+                    "all containers are not in same state"
 
         return container_state
 
@@ -157,7 +182,10 @@ class ClusterUtils(object):
         """
         all_datanode_container_status = []
         for index in range(scale):
-            
all_datanode_container_status.append(cls.find_datanode_container_status(docker_compose_file,
 index+1))
-        logger.info("All datanodes container status: %s", ' 
'.join(all_datanode_container_status))
+            all_datanode_container_status.append(
+                cls.find_datanode_container_status(docker_compose_file,
+                                                   index+1))
+        logger.info("All datanodes container status: %s",
+                    ' '.join(all_datanode_container_status))
 
         return all_datanode_container_status
\ No newline at end of file
diff --git a/hadoop-ozone/dist/src/main/blockade/conftest.py 
b/hadoop-ozone/dist/src/main/blockade/conftest.py
index d538e6c..31e2ccd 100644
--- a/hadoop-ozone/dist/src/main/blockade/conftest.py
+++ b/hadoop-ozone/dist/src/main/blockade/conftest.py
@@ -18,18 +18,29 @@ import os
 
 
 def pytest_addoption(parser):
-    parser.addoption("--output-dir", action="store",
+    parser.addoption("--output-dir",
+                     action="store",
                      default="/tmp/BlockadeTests",
-                     help="location of output directory where output log and 
plot files will be created")
+                     help="location of output directory where output log "
+                          "and plot files will be created")
     parser.addoption("--log-format",
                      action="store",
-                     
default="%(asctime)s|%(levelname)s|%(threadName)s|%(filename)s:%(lineno)s -"
+                     default="%(asctime)s|%(levelname)s|%(threadName)s|"
+                             "%(filename)s:%(lineno)s -"
                              " %(funcName)s()|%(message)s",
                      help="specify log format")
-    parser.addoption("--log-level", action="store", default="info", 
help="specify log level")
+    parser.addoption("--log-level",
+                     action="store",
+                     default="info",
+                     help="specify log level")
+    parser.addoption("--containerStatusSleep",
+                     action="store",
+                     default="900",
+                     help="sleep time before checking container status")
 
 
 def pytest_configure(config):
+    os.environ["CONTAINER_STATUS_SLEEP"] = config.option.containerStatusSleep
     outputdir = config.option.output_dir
     try:
         os.makedirs(outputdir)
@@ -42,7 +53,10 @@ def pytest_configure(config):
     else:
         loglevel = eval("logging." + config.option.log_level.upper())
     logformatter = logging.Formatter(config.option.log_format)
-    logging.basicConfig(filename=log_file, filemode='w', level=loglevel, 
format=config.option.log_format)
+    logging.basicConfig(filename=log_file,
+                        filemode='w',
+                        level=loglevel,
+                        format=config.option.log_format)
     console = logging.StreamHandler()
     console.setLevel(loglevel)
     console.setFormatter(logformatter)
@@ -55,11 +69,13 @@ def pytest_report_teststatus(report):
     if report.outcome == 'skipped':
         pass
     elif report.when == 'setup':
-        logger.info("RUNNING TEST \"%s\" at location \"%s\" at line number 
\"%s\"" % (name, loc, str(line)))
+        logger.info("RUNNING TEST \"%s\" at location \"%s\" at line number"
+                    " \"%s\"" % (name, loc, str(line)))
     elif report.when == 'call':
-        logger.info("TEST \"%s\" %s in %3.2f seconds" % (name, 
report.outcome.upper(), report.duration))
+        logger.info("TEST \"%s\" %s in %3.2f seconds" %
+                    (name, report.outcome.upper(), report.duration))
 
 
 def pytest_sessionfinish(session):
     logger = logging.getLogger('main')
-    logger.info("ALL TESTS FINISHED")
+    logger.info("ALL TESTS FINISHED")
\ No newline at end of file
diff --git 
a/hadoop-ozone/dist/src/main/blockade/test_blockade_datanode_isolation.py 
b/hadoop-ozone/dist/src/main/blockade/test_blockade_datanode_isolation.py
index cc2bddc..eecc7ea 100644
--- a/hadoop-ozone/dist/src/main/blockade/test_blockade_datanode_isolation.py
+++ b/hadoop-ozone/dist/src/main/blockade/test_blockade_datanode_isolation.py
@@ -15,7 +15,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""This module has apis to create and remove a blockade cluster"""
 import os
 import time
 import logging
@@ -25,7 +24,8 @@ from clusterUtils.cluster_utils import ClusterUtils
 
 logger = logging.getLogger(__name__)
 parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
-FILE = os.path.join(parent_dir, "compose", "ozone", "docker-compose.yaml")
+FILE = os.path.join(parent_dir, "compose", "ozoneblockade",
+                    "docker-compose.yaml")
 SCALE = 3
 CONTAINER_LIST = []
 OM = []
@@ -38,12 +38,14 @@ def setup():
     Blockade.blockade_destroy()
     CONTAINER_LIST = ClusterUtils.cluster_setup(FILE, SCALE)
     exit_code, output = Blockade.blockade_status()
-    assert exit_code == 0, "blockade status command failed with output=[%s]" % 
output
+    assert exit_code == 0, "blockade status command failed with output=[%s]" % 
\
+                           output
     OM = filter(lambda x: 'ozoneManager' in x, CONTAINER_LIST)
     SCM = filter(lambda x: 'scm' in x, CONTAINER_LIST)
     DATANODES = sorted(list(filter(lambda x: 'datanode' in x, CONTAINER_LIST)))
 
-    exit_code, output = ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", 
"THREE")
+    exit_code, output = ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS",
+                                               "THREE")
     assert exit_code == 0, "freon run failed with output=[%s]" % output
 
 
@@ -58,25 +60,38 @@ def teardown_module():
 
 def test_datanode_isolation_one_node():
     """
-    In this test, one of the datanodes cannot communicate with other two 
datanodes.
+    In this test, one of the datanodes (first datanode) cannot communicate
+    with other two datanodes.
     All datanodes can communicate with SCM.
+    Expectation :
+    The container replica state in first datanode should be quasi-closed.
+    The container replica state in other datanodes should be closed.
     """
     first_set = [OM[0], SCM[0], DATANODES[0]]
     second_set = [OM[0], SCM[0], DATANODES[1], DATANODES[2]]
     Blockade.blockade_create_partition(first_set, second_set)
     Blockade.blockade_status()
     ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
-    logger.info("Waiting for 480 seconds before checking container status")
-    time.sleep(480)
-    all_datanodes_container_status = 
ClusterUtils.find_all_datanodes_container_status(FILE, SCALE)
-    count_closed_container_datanodes = filter(lambda x: x == 'CLOSED', 
all_datanodes_container_status)
-    assert len(count_closed_container_datanodes) == 3, "The container should 
have three closed replicas."
+    logger.info("Waiting for %s seconds before checking container status",
+                os.environ["CONTAINER_STATUS_SLEEP"])
+    time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
+    all_datanodes_container_status = \
+        ClusterUtils.find_all_datanodes_container_status(FILE, SCALE)
+    first_datanode_status = all_datanodes_container_status[0]
+    count_closed_container_datanodes = filter(lambda x: x == 'CLOSED',
+                                              all_datanodes_container_status)
+    assert first_datanode_status == 'QUASI_CLOSED'
+    assert len(count_closed_container_datanodes) == 2, \
+        "The container should have three closed replicas."
 
 
 def test_datanode_isolation_all():
     """
-    In this test, none of the datanodes can communicate with other two 
datanodes.
+    In this test, none of the datanodes can communicate with other two
+    datanodes.
     All datanodes can communicate with SCM.
+    Expectation : The container should eventually have at least two closed
+    replicas.
     """
     first_set = [OM[0], SCM[0], DATANODES[0]]
     second_set = [OM[0], SCM[0], DATANODES[1]]
@@ -84,8 +99,12 @@ def test_datanode_isolation_all():
     Blockade.blockade_create_partition(first_set, second_set, third_set)
     Blockade.blockade_status()
     ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
-    logger.info("Waiting for 480 seconds before checking container status")
-    time.sleep(480)
-    all_datanodes_container_status = 
ClusterUtils.find_all_datanodes_container_status(FILE, SCALE)
-    count_closed_container_datanodes = filter(lambda x: x == 'CLOSED', 
all_datanodes_container_status)
-    assert len(count_closed_container_datanodes) >= 2, "The container should 
have at least two closed replicas."
\ No newline at end of file
+    logger.info("Waiting for %s seconds before checking container status",
+                os.environ["CONTAINER_STATUS_SLEEP"])
+    time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
+    all_datanodes_container_status = \
+        ClusterUtils.find_all_datanodes_container_status(FILE, SCALE)
+    count_closed_container_datanodes = filter(lambda x: x == 'CLOSED',
+                                              all_datanodes_container_status)
+    assert len(count_closed_container_datanodes) >= 2, \
+        "The container should have at least two closed replicas."
\ No newline at end of file
diff --git a/hadoop-ozone/dist/src/main/blockade/test_blockade_flaky.py 
b/hadoop-ozone/dist/src/main/blockade/test_blockade_flaky.py
index 01aa449..3da7164 100644
--- a/hadoop-ozone/dist/src/main/blockade/test_blockade_flaky.py
+++ b/hadoop-ozone/dist/src/main/blockade/test_blockade_flaky.py
@@ -15,7 +15,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""This module has apis to create and remove a blockade cluster"""
 import os
 import time
 import logging
@@ -26,7 +25,8 @@ from clusterUtils.cluster_utils import ClusterUtils
 
 logger = logging.getLogger(__name__)
 parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
-FILE = os.path.join(parent_dir, "compose", "ozone", "docker-compose.yaml")
+FILE = os.path.join(parent_dir, "compose", "ozoneblockade",
+                    "docker-compose.yaml")
 SCALE = 6
 CONTAINER_LIST = []
 
@@ -36,7 +36,8 @@ def setup_module():
     Blockade.blockade_destroy()
     CONTAINER_LIST = ClusterUtils.cluster_setup(FILE, SCALE)
     exit_code, output = Blockade.blockade_status()
-    assert exit_code == 0, "blockade status command failed with output=[%s]" % 
output
+    assert exit_code == 0, "blockade status command failed with output=[%s]" % 
\
+                           output
 
 
 def teardown_module():
@@ -54,5 +55,6 @@ def teardown():
 def test_flaky(flaky_nodes):
     Blockade.make_flaky(flaky_nodes, CONTAINER_LIST)
     Blockade.blockade_status()
-    exit_code, output = ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", 
"THREE")
+    exit_code, output = ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS",
+                                               "THREE")
     assert exit_code == 0, "freon run failed with output=[%s]" % output
\ No newline at end of file
diff --git a/hadoop-ozone/dist/src/main/blockade/test_blockade_mixed_failure.py 
b/hadoop-ozone/dist/src/main/blockade/test_blockade_mixed_failure.py
new file mode 100644
index 0000000..69c865c
--- /dev/null
+++ b/hadoop-ozone/dist/src/main/blockade/test_blockade_mixed_failure.py
@@ -0,0 +1,116 @@
+#!/usr/bin/python
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import time
+import logging
+from blockadeUtils.blockade import Blockade
+from clusterUtils.cluster_utils import ClusterUtils
+
+
+logger = logging.getLogger(__name__)
+parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
+FILE = os.path.join(parent_dir, "compose", "ozoneblockade",
+                    "docker-compose.yaml")
+SCALE = 3
+CONTAINER_LIST = []
+OM = []
+SCM = []
+DATANODES = []
+
+
+def setup():
+    global CONTAINER_LIST, OM, SCM, DATANODES
+    Blockade.blockade_destroy()
+    CONTAINER_LIST = ClusterUtils.cluster_setup(FILE, SCALE)
+    exit_code, output = Blockade.blockade_status()
+    assert exit_code == 0, "blockade status command failed with output=[%s]" % 
\
+                           output
+    OM = filter(lambda x: 'ozoneManager' in x, CONTAINER_LIST)
+    SCM = filter(lambda x: 'scm' in x, CONTAINER_LIST)
+    DATANODES = sorted(list(filter(lambda x: 'datanode' in x, CONTAINER_LIST)))
+
+    exit_code, output = ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS",
+                                               "THREE")
+    assert exit_code == 0, "freon run failed with output=[%s]" % output
+
+
+def teardown():
+    logger.info("Inside teardown")
+    Blockade.blockade_destroy()
+
+
+def teardown_module():
+    ClusterUtils.cluster_destroy(FILE)
+
+
+def test_one_dn_isolate_scm_other_dn():
+    """
+    In this test, one of the datanodes cannot communicate with SCM and other
+    datanodes.
+    Other datanodes can communicate with each other and SCM .
+    Expectation : The container should eventually have two closed replicas.
+    """
+    first_set = [OM[0], SCM[0], DATANODES[1], DATANODES[2]]
+    second_set = [OM[0], DATANODES[0]]
+    Blockade.blockade_create_partition(first_set, second_set)
+    Blockade.blockade_status()
+    ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
+    logger.info("Waiting for %s seconds before checking container status",
+                os.environ["CONTAINER_STATUS_SLEEP"])
+    time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
+    all_datanodes_container_status = \
+        ClusterUtils.find_all_datanodes_container_status(FILE, SCALE)
+    count_closed_container_datanodes = filter(lambda x: x == 'CLOSED',
+                                              all_datanodes_container_status)
+    assert len(count_closed_container_datanodes) == 2, \
+        "The container should have two closed replicas."
+
+
+def test_one_dn_isolate_other_dn():
+    """
+    In this test, one of the datanodes (first datanode) cannot communicate
+    other datanodes but can communicate with SCM.
+    One of the other two datanodes (second datanode) cannot communicate with
+    SCM.
+    Expectation :
+    The container replica state in first datanode can be either closed or
+    quasi-closed.
+    The container replica state in second datanode can be either closed or 
open.
+    The container should eventually have at lease one closed replica.
+    """
+    first_set = [OM[0], SCM[0], DATANODES[0]]
+    second_set = [OM[0], DATANODES[1], DATANODES[2]]
+    third_set = [SCM[0], DATANODES[2]]
+    Blockade.blockade_create_partition(first_set, second_set, third_set)
+    Blockade.blockade_status()
+    ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
+    logger.info("Waiting for %s seconds before checking container status",
+                os.environ["CONTAINER_STATUS_SLEEP"])
+    time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
+    all_datanodes_container_status = \
+        ClusterUtils.find_all_datanodes_container_status(FILE, SCALE)
+    count_closed_container_datanodes = filter(lambda x: x == 'CLOSED',
+                                              all_datanodes_container_status)
+    first_datanode_status = all_datanodes_container_status[0]
+    second_datanode_status = all_datanodes_container_status[1]
+    assert first_datanode_status == 'CLOSED' or \
+           first_datanode_status == "QUASI_CLOSED"
+    assert second_datanode_status == 'CLOSED' or \
+           second_datanode_status == "OPEN"
+    assert len(count_closed_container_datanodes) >= 1, \
+        "The container should have at least one closed replica"
\ No newline at end of file
diff --git 
a/hadoop-ozone/dist/src/main/blockade/test_blockade_datanode_isolation.py 
b/hadoop-ozone/dist/src/main/blockade/test_blockade_scm_isolation.py
similarity index 50%
copy from 
hadoop-ozone/dist/src/main/blockade/test_blockade_datanode_isolation.py
copy to hadoop-ozone/dist/src/main/blockade/test_blockade_scm_isolation.py
index cc2bddc..0af9745 100644
--- a/hadoop-ozone/dist/src/main/blockade/test_blockade_datanode_isolation.py
+++ b/hadoop-ozone/dist/src/main/blockade/test_blockade_scm_isolation.py
@@ -15,7 +15,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""This module has apis to create and remove a blockade cluster"""
 import os
 import time
 import logging
@@ -25,7 +24,8 @@ from clusterUtils.cluster_utils import ClusterUtils
 
 logger = logging.getLogger(__name__)
 parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
-FILE = os.path.join(parent_dir, "compose", "ozone", "docker-compose.yaml")
+FILE = os.path.join(parent_dir, "compose", "ozoneblockade",
+                    "docker-compose.yaml")
 SCALE = 3
 CONTAINER_LIST = []
 OM = []
@@ -38,12 +38,14 @@ def setup():
     Blockade.blockade_destroy()
     CONTAINER_LIST = ClusterUtils.cluster_setup(FILE, SCALE)
     exit_code, output = Blockade.blockade_status()
-    assert exit_code == 0, "blockade status command failed with output=[%s]" % 
output
+    assert exit_code == 0, "blockade status command failed with output=[%s]" % 
\
+                           output
     OM = filter(lambda x: 'ozoneManager' in x, CONTAINER_LIST)
     SCM = filter(lambda x: 'scm' in x, CONTAINER_LIST)
     DATANODES = sorted(list(filter(lambda x: 'datanode' in x, CONTAINER_LIST)))
 
-    exit_code, output = ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", 
"THREE")
+    exit_code, output = ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS",
+                                               "THREE")
     assert exit_code == 0, "freon run failed with output=[%s]" % output
 
 
@@ -56,36 +58,53 @@ def teardown_module():
     ClusterUtils.cluster_destroy(FILE)
 
 
-def test_datanode_isolation_one_node():
+def test_scm_isolation_one_node():
     """
-    In this test, one of the datanodes cannot communicate with other two 
datanodes.
-    All datanodes can communicate with SCM.
+    In this test, one of the datanodes cannot communicate with SCM.
+    Other datanodes can communicate with SCM.
+    Expectation : The container should eventually have at least two closed
+    replicas.
     """
-    first_set = [OM[0], SCM[0], DATANODES[0]]
+    first_set = [OM[0], DATANODES[0], DATANODES[1], DATANODES[2]]
     second_set = [OM[0], SCM[0], DATANODES[1], DATANODES[2]]
     Blockade.blockade_create_partition(first_set, second_set)
     Blockade.blockade_status()
     ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
-    logger.info("Waiting for 480 seconds before checking container status")
-    time.sleep(480)
-    all_datanodes_container_status = 
ClusterUtils.find_all_datanodes_container_status(FILE, SCALE)
-    count_closed_container_datanodes = filter(lambda x: x == 'CLOSED', 
all_datanodes_container_status)
-    assert len(count_closed_container_datanodes) == 3, "The container should 
have three closed replicas."
+    logger.info("Waiting for %s seconds before checking container status",
+                os.environ["CONTAINER_STATUS_SLEEP"])
+    time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
+    all_datanodes_container_status = \
+        ClusterUtils.find_all_datanodes_container_status(FILE, SCALE)
+    count_closed_container_datanodes = filter(lambda x: x == 'CLOSED',
+                                              all_datanodes_container_status)
+    assert len(count_closed_container_datanodes) >= 2, \
+        "The container should have at least two closed replicas."
 
 
-def test_datanode_isolation_all():
+def test_scm_isolation_two_node():
     """
-    In this test, none of the datanodes can communicate with other two 
datanodes.
-    All datanodes can communicate with SCM.
+    In this test, two datanodes cannot communicate with SCM.
+    Expectation : The container should eventually have at three closed replicas
+     or, two open replicas and one quasi-closed replica.
     """
-    first_set = [OM[0], SCM[0], DATANODES[0]]
+    first_set = [OM[0], DATANODES[0], DATANODES[1], DATANODES[2]]
     second_set = [OM[0], SCM[0], DATANODES[1]]
-    third_set = [OM[0], SCM[0], DATANODES[2]]
-    Blockade.blockade_create_partition(first_set, second_set, third_set)
+    Blockade.blockade_create_partition(first_set, second_set)
     Blockade.blockade_status()
     ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
-    logger.info("Waiting for 480 seconds before checking container status")
-    time.sleep(480)
-    all_datanodes_container_status = 
ClusterUtils.find_all_datanodes_container_status(FILE, SCALE)
-    count_closed_container_datanodes = filter(lambda x: x == 'CLOSED', 
all_datanodes_container_status)
-    assert len(count_closed_container_datanodes) >= 2, "The container should 
have at least two closed replicas."
\ No newline at end of file
+    logger.info("Waiting for %s seconds before checking container status",
+                os.environ["CONTAINER_STATUS_SLEEP"])
+    time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
+    all_datanodes_container_status = \
+        ClusterUtils.find_all_datanodes_container_status(FILE, SCALE)
+    count_closed_container_datanodes = filter(lambda x: x == 'CLOSED',
+                                              all_datanodes_container_status)
+    count_qausi_closed_container_datanodes = \
+        filter(lambda x: x == 'QUASI_CLOSED', all_datanodes_container_status)
+    count_open_container_datanodes = filter(lambda x: x == 'OPEN',
+                                            all_datanodes_container_status)
+    assert len(count_closed_container_datanodes) == 3 or \
+        (len(count_open_container_datanodes) == 2 and
+            len(count_qausi_closed_container_datanodes) == 1), \
+        "The container should have three closed replicas or two open " \
+        "replicas and one quasi_closed replica."
\ No newline at end of file
diff --git 
a/hadoop-ozone/dist/src/main/compose/ozoneblockade/docker-compose.yaml 
b/hadoop-ozone/dist/src/main/compose/ozoneblockade/docker-compose.yaml
new file mode 100644
index 0000000..0a6a9d8
--- /dev/null
+++ b/hadoop-ozone/dist/src/main/compose/ozoneblockade/docker-compose.yaml
@@ -0,0 +1,49 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+version: "3"
+services:
+   datanode:
+      image: apache/hadoop-runner
+      volumes:
+        - ../..:/opt/hadoop
+      ports:
+        - 9864
+      command: ["/opt/hadoop/bin/ozone","datanode"]
+      env_file:
+        - ./docker-config
+   ozoneManager:
+      image: apache/hadoop-runner
+      volumes:
+         - ../..:/opt/hadoop
+      ports:
+         - 9874:9874
+      environment:
+         ENSURE_OM_INITIALIZED: /data/metadata/ozoneManager/current/VERSION
+      env_file:
+          - ./docker-config
+      command: ["/opt/hadoop/bin/ozone","om"]
+   scm:
+      image: apache/hadoop-runner
+      volumes:
+         - ../..:/opt/hadoop
+      ports:
+         - 9876:9876
+      env_file:
+          - ./docker-config
+      environment:
+          ENSURE_SCM_INITIALIZED: /data/metadata/scm/current/VERSION
+      command: ["/opt/hadoop/bin/ozone","scm"]
diff --git a/hadoop-ozone/dist/src/main/compose/ozoneblockade/docker-config 
b/hadoop-ozone/dist/src/main/compose/ozoneblockade/docker-config
new file mode 100644
index 0000000..506340c
--- /dev/null
+++ b/hadoop-ozone/dist/src/main/compose/ozoneblockade/docker-config
@@ -0,0 +1,77 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+OZONE-SITE.XML_ozone.om.address=ozoneManager
+OZONE-SITE.XML_ozone.om.http-address=ozoneManager:9874
+OZONE-SITE.XML_ozone.scm.names=scm
+OZONE-SITE.XML_ozone.enabled=True
+OZONE-SITE.XML_ozone.scm.datanode.id=/data/datanode.id
+OZONE-SITE.XML_ozone.scm.block.client.address=scm
+OZONE-SITE.XML_ozone.metadata.dirs=/data/metadata
+OZONE-SITE.XML_ozone.handler.type=distributed
+OZONE-SITE.XML_ozone.scm.client.address=scm
+OZONE-SITE.XML_ozone.scm.dead.node.interval=5m
+OZONE-SITE.XML_ozone.replication=1
+OZONE-SITE.XML_hdds.datanode.dir=/data/hdds
+HDFS-SITE.XML_rpc.metrics.quantile.enable=true
+HDFS-SITE.XML_rpc.metrics.percentiles.intervals=60,300
+LOG4J.PROPERTIES_log4j.rootLogger=INFO, stdout
+LOG4J.PROPERTIES_log4j.appender.stdout=org.apache.log4j.ConsoleAppender
+LOG4J.PROPERTIES_log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
+LOG4J.PROPERTIES_log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd 
HH:mm:ss} %-5p %c{1}:%L - %m%n
+LOG4J.PROPERTIES_log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR
+LOG4J.PROPERTIES_log4j.logger.org.apache.ratis.conf.ConfUtils=WARN
+LOG4J.PROPERTIES_log4j.logger.org.apache.hadoop.security.ShellBasedUnixGroupsMapping=ERROR
+
+#Enable this variable to print out all hadoop rpc traffic to the stdout. See 
http://byteman.jboss.org/ to define your own instrumentation.
+#BYTEMAN_SCRIPT_URL=https://raw.githubusercontent.com/apache/hadoop/trunk/dev-support/byteman/hadooprpc.btm
+
+#LOG4J2.PROPERTIES_* are for Ozone Audit Logging
+LOG4J2.PROPERTIES_monitorInterval=30
+LOG4J2.PROPERTIES_filter=read,write
+LOG4J2.PROPERTIES_filter.read.type=MarkerFilter
+LOG4J2.PROPERTIES_filter.read.marker=READ
+LOG4J2.PROPERTIES_filter.read.onMatch=DENY
+LOG4J2.PROPERTIES_filter.read.onMismatch=NEUTRAL
+LOG4J2.PROPERTIES_filter.write.type=MarkerFilter
+LOG4J2.PROPERTIES_filter.write.marker=WRITE
+LOG4J2.PROPERTIES_filter.write.onMatch=NEUTRAL
+LOG4J2.PROPERTIES_filter.write.onMismatch=NEUTRAL
+LOG4J2.PROPERTIES_appenders=console, rolling
+LOG4J2.PROPERTIES_appender.console.type=Console
+LOG4J2.PROPERTIES_appender.console.name=STDOUT
+LOG4J2.PROPERTIES_appender.console.layout.type=PatternLayout
+LOG4J2.PROPERTIES_appender.console.layout.pattern=%d{DEFAULT} | %-5level | 
%c{1} | %msg | %throwable{3} %n
+LOG4J2.PROPERTIES_appender.rolling.type=RollingFile
+LOG4J2.PROPERTIES_appender.rolling.name=RollingFile
+LOG4J2.PROPERTIES_appender.rolling.fileName 
=${sys:hadoop.log.dir}/om-audit-${hostName}.log
+LOG4J2.PROPERTIES_appender.rolling.filePattern=${sys:hadoop.log.dir}/om-audit-${hostName}-%d{yyyy-MM-dd-HH-mm-ss}-%i.log.gz
+LOG4J2.PROPERTIES_appender.rolling.layout.type=PatternLayout
+LOG4J2.PROPERTIES_appender.rolling.layout.pattern=%d{DEFAULT} | %-5level | 
%c{1} | %msg | %throwable{3} %n
+LOG4J2.PROPERTIES_appender.rolling.policies.type=Policies
+LOG4J2.PROPERTIES_appender.rolling.policies.time.type=TimeBasedTriggeringPolicy
+LOG4J2.PROPERTIES_appender.rolling.policies.time.interval=86400
+LOG4J2.PROPERTIES_appender.rolling.policies.size.type=SizeBasedTriggeringPolicy
+LOG4J2.PROPERTIES_appender.rolling.policies.size.size=64MB
+LOG4J2.PROPERTIES_loggers=audit
+LOG4J2.PROPERTIES_logger.audit.type=AsyncLogger
+LOG4J2.PROPERTIES_logger.audit.name=OMAudit
+LOG4J2.PROPERTIES_logger.audit.level=INFO
+LOG4J2.PROPERTIES_logger.audit.appenderRefs=rolling
+LOG4J2.PROPERTIES_logger.audit.appenderRef.file.ref=RollingFile
+LOG4J2.PROPERTIES_rootLogger.level=INFO
+LOG4J2.PROPERTIES_rootLogger.appenderRefs=stdout
+LOG4J2.PROPERTIES_rootLogger.appenderRef.stdout.ref=STDOUT


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-commits-h...@hadoop.apache.org

Reply via email to