Repository: ambari Updated Branches: refs/heads/trunk 766cb64ef -> c3e0771e4
AMBARI-10602 HDFS namenode_opt_newsize and namenode_opt_maxnewsize not using number of datanodes in calculation (dsen) Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/c3e0771e Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/c3e0771e Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/c3e0771e Branch: refs/heads/trunk Commit: c3e0771e40fb255ff69bb6cbc70495435486512e Parents: 766cb64 Author: Dmytro Sen <d...@apache.org> Authored: Tue Apr 21 11:55:56 2015 +0300 Committer: Dmytro Sen <d...@apache.org> Committed: Tue Apr 21 11:55:56 2015 +0300 ---------------------------------------------------------------------- .../stacks/HDP/2.0.6/services/stack_advisor.py | 5 +- .../stacks/HDP/2.2/services/stack_advisor.py | 59 ++++++-- .../stacks/2.0.6/common/test_stack_advisor.py | 149 +++++++++++++++++++ .../stacks/2.2/common/test_stack_advisor.py | 58 ++++---- 4 files changed, 233 insertions(+), 38 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/c3e0771e/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/stack_advisor.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/stack_advisor.py b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/stack_advisor.py index 7af01a4..0b54b38 100644 --- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/stack_advisor.py +++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/stack_advisor.py @@ -213,9 +213,8 @@ class HDP206StackAdvisor(DefaultStackAdvisor): service = [serviceEntry for serviceEntry in services["services"] if serviceEntry["StackServices"]["service_name"] == serviceName][0] components = [componentEntry for componentEntry in service["components"] if componentEntry["StackServiceComponents"]["component_name"] == componentName] if (len(components) > 0 and len(components[0]["StackServiceComponents"]["hostnames"]) > 0): - # component available - determine hosts and memory - componentHostname = components[0]["StackServiceComponents"]["hostnames"][0] - componentHosts = [host for host in hosts["items"] if host["Hosts"]["host_name"] == componentHostname] + componentHostnames = components[0]["StackServiceComponents"]["hostnames"] + componentHosts = [host for host in hosts["items"] if host["Hosts"]["host_name"] in componentHostnames] return componentHosts return [] http://git-wip-us.apache.org/repos/asf/ambari/blob/c3e0771e/ambari-server/src/main/resources/stacks/HDP/2.2/services/stack_advisor.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.2/services/stack_advisor.py b/ambari-server/src/main/resources/stacks/HDP/2.2/services/stack_advisor.py index 7f163dd..6289e6a 100644 --- a/ambari-server/src/main/resources/stacks/HDP/2.2/services/stack_advisor.py +++ b/ambari-server/src/main/resources/stacks/HDP/2.2/services/stack_advisor.py @@ -98,26 +98,67 @@ class HDP22StackAdvisor(HDP21StackAdvisor): putHdfsSiteProperty("dfs.namenode.safemode.threshold-pct", "0.99f" if len(namenodeHosts) > 1 else "1.0f") putHdfsEnvProperty = self.putProperty(configurations, "hadoop-env", services) - putHdfsEnvProperty('namenode_heapsize', max(int(clusterData['totalAvailableRam'] / 2), 1024)) - putHdfsEnvProperty('namenode_opt_newsize', max(int(clusterData['totalAvailableRam'] / 8), 128)) - putHdfsEnvProperty('namenode_opt_maxnewsize', max(int(clusterData['totalAvailableRam'] / 8), 256)) - - # Property Attributes putHdfsEnvPropertyAttribute = self.putPropertyAttribute(configurations, "hadoop-env") + + nn_max_heapsize=None if (namenodeHosts is not None and len(namenodeHosts) > 0): if len(namenodeHosts) > 1: - namenode_heapsize = min(int(namenodeHosts[0]["Hosts"]["total_mem"]), int(namenodeHosts[1]["Hosts"]["total_mem"])) / 1024 + nn_max_heapsize = min(int(namenodeHosts[0]["Hosts"]["total_mem"]), int(namenodeHosts[1]["Hosts"]["total_mem"])) / 1024 else: - namenode_heapsize = int(namenodeHosts[0]["Hosts"]["total_mem"] / 1024) # total_mem in kb + nn_max_heapsize = int(namenodeHosts[0]["Hosts"]["total_mem"] / 1024) # total_mem in kb - putHdfsEnvPropertyAttribute('namenode_heapsize', 'maximum', namenode_heapsize) + putHdfsEnvPropertyAttribute('namenode_heapsize', 'maximum', nn_max_heapsize) + + #Old fallback values + putHdfsEnvProperty('namenode_heapsize', max(int(clusterData['totalAvailableRam'] / 2), 1024)) + putHdfsEnvProperty('namenode_opt_newsize', max(int(clusterData['totalAvailableRam'] / 8), 128)) + putHdfsEnvProperty('namenode_opt_maxnewsize', max(int(clusterData['totalAvailableRam'] / 8), 256)) datanodeHosts = self.getHostsWithComponent("HDFS", "DATANODE", services, hosts) - if (datanodeHosts is not None and len(datanodeHosts)>0): + if datanodeHosts is not None and len(datanodeHosts) > 0: min_datanode_ram_kb = 1073741824 # 1 TB for datanode in datanodeHosts: ram_kb = datanode['Hosts']['total_mem'] min_datanode_ram_kb = min(min_datanode_ram_kb, ram_kb) + + datanodeFilesM = len(datanodeHosts)*dataDirsCount/10 # in millions, # of files = # of disks * 100'000 + nn_memory_configs = [ + {'nn_heap':1024, 'nn_opt':128}, + {'nn_heap':3072, 'nn_opt':512}, + {'nn_heap':5376, 'nn_opt':768}, + {'nn_heap':9984, 'nn_opt':1280}, + {'nn_heap':14848, 'nn_opt':2048}, + {'nn_heap':19456, 'nn_opt':2560}, + {'nn_heap':24320, 'nn_opt':3072}, + {'nn_heap':33536, 'nn_opt':4352}, + {'nn_heap':47872, 'nn_opt':6144}, + {'nn_heap':59648, 'nn_opt':7680}, + {'nn_heap':71424, 'nn_opt':8960}, + {'nn_heap':94976, 'nn_opt':8960} + ] + index = { + datanodeFilesM < 1 : 0, + 1 <= datanodeFilesM < 5 : 1, + 5 <= datanodeFilesM < 10 : 2, + 10 <= datanodeFilesM < 20 : 3, + 20 <= datanodeFilesM < 30 : 4, + 30 <= datanodeFilesM < 40 : 5, + 40 <= datanodeFilesM < 50 : 6, + 50 <= datanodeFilesM < 70 : 7, + 70 <= datanodeFilesM < 100 : 8, + 100 <= datanodeFilesM < 125 : 9, + 125 <= datanodeFilesM < 150 : 10, + 150 <= datanodeFilesM : 11 + }[1] + + nn_memory_config = nn_memory_configs[index] + + #override with new values if applicable + if nn_max_heapsize is not None and nn_max_heapsize <= nn_memory_config['nn_heap']: + putHdfsEnvProperty('namenode_heapsize', nn_memory_config['nn_heap']) + putHdfsEnvProperty('namenode_opt_newsize', nn_memory_config['nn_opt']) + putHdfsEnvProperty('namenode_opt_maxnewsize', nn_memory_config['nn_opt']) + putHdfsEnvPropertyAttribute('dtnode_heapsize', 'maximum', int(min_datanode_ram_kb/1024)) putHdfsSitePropertyAttribute = self.putPropertyAttribute(configurations, "hdfs-site") http://git-wip-us.apache.org/repos/asf/ambari/blob/c3e0771e/ambari-server/src/test/python/stacks/2.0.6/common/test_stack_advisor.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/test/python/stacks/2.0.6/common/test_stack_advisor.py b/ambari-server/src/test/python/stacks/2.0.6/common/test_stack_advisor.py index 1f5549b..ef6def1 100644 --- a/ambari-server/src/test/python/stacks/2.0.6/common/test_stack_advisor.py +++ b/ambari-server/src/test/python/stacks/2.0.6/common/test_stack_advisor.py @@ -585,3 +585,152 @@ class TestHDP206StackAdvisor(TestCase): res = self.stackAdvisor.validateHDFSConfigurationsEnv(properties, recommendedDefaults, configurations, '', '') self.assertEquals(res, res_expected) + + def test_getHostsWithComponent(self): + services = {"services": + [{"StackServices": + {"service_name" : "HDFS", + "service_version" : "2.6.0.2.2" + }, + "components":[ + { + "href":"/api/v1/stacks/HDP/versions/2.2/services/HDFS/components/DATANODE", + "StackServiceComponents":{ + "advertise_version":"true", + "cardinality":"1+", + "component_category":"SLAVE", + "component_name":"DATANODE", + "custom_commands":[ + + ], + "display_name":"DataNode", + "is_client":"false", + "is_master":"false", + "service_name":"HDFS", + "stack_name":"HDP", + "stack_version":"2.2", + "hostnames":[ + "host1", + "host2" + ] + }, + "dependencies":[ + + ] + }, + { + "href":"/api/v1/stacks/HDP/versions/2.2/services/HDFS/components/JOURNALNODE", + "StackServiceComponents":{ + "advertise_version":"true", + "cardinality":"0+", + "component_category":"SLAVE", + "component_name":"JOURNALNODE", + "custom_commands":[ + + ], + "display_name":"JournalNode", + "is_client":"false", + "is_master":"false", + "service_name":"HDFS", + "stack_name":"HDP", + "stack_version":"2.2", + "hostnames":[ + "host1" + ] + }, + "dependencies":[ + { + "href":"/api/v1/stacks/HDP/versions/2.2/services/HDFS/components/JOURNALNODE/dependencies/HDFS_CLIENT", + "Dependencies":{ + "component_name":"HDFS_CLIENT", + "dependent_component_name":"JOURNALNODE", + "dependent_service_name":"HDFS", + "stack_name":"HDP", + "stack_version":"2.2" + } + } + ] + }, + { + "href":"/api/v1/stacks/HDP/versions/2.2/services/HDFS/components/NAMENODE", + "StackServiceComponents":{ + "advertise_version":"true", + "cardinality":"1-2", + "component_category":"MASTER", + "component_name":"NAMENODE", + "custom_commands":[ + "DECOMMISSION", + "REBALANCEHDFS" + ], + "display_name":"NameNode", + "is_client":"false", + "is_master":"true", + "service_name":"HDFS", + "stack_name":"HDP", + "stack_version":"2.2", + "hostnames":[ + "host2" + ] + }, + "dependencies":[ + + ] + }, + ], + }], + "configurations": {} + } + hosts = { + "items" : [ + { + "href" : "/api/v1/hosts/host1", + "Hosts" : { + "cpu_count" : 1, + "host_name" : "host1", + "os_arch" : "x86_64", + "os_type" : "centos6", + "ph_cpu_count" : 1, + "public_host_name" : "host1", + "rack_info" : "/default-rack", + "total_mem" : 2097152 + } + }, + { + "href" : "/api/v1/hosts/host2", + "Hosts" : { + "cpu_count" : 1, + "host_name" : "host2", + "os_arch" : "x86_64", + "os_type" : "centos6", + "ph_cpu_count" : 1, + "public_host_name" : "host2", + "rack_info" : "/default-rack", + "total_mem" : 1048576 + } + }, + ] + } + + datanodes = self.stackAdvisor.getHostsWithComponent("HDFS", "DATANODE", services, hosts) + self.assertEquals(len(datanodes), 2) + self.assertEquals(datanodes, hosts["items"]) + datanode = self.stackAdvisor.getHostWithComponent("HDFS", "DATANODE", services, hosts) + self.assertEquals(datanode, hosts["items"][0]) + namenodes = self.stackAdvisor.getHostsWithComponent("HDFS", "NAMENODE", services, hosts) + self.assertEquals(len(namenodes), 1) + # [host2] + self.assertEquals(namenodes, [hosts["items"][1]]) + namenode = self.stackAdvisor.getHostWithComponent("HDFS", "NAMENODE", services, hosts) + # host2 + self.assertEquals(namenode, hosts["items"][1]) + + # not installed + nodemanager = self.stackAdvisor.getHostWithComponent("YARN", "NODEMANAGER", services, hosts) + self.assertEquals(nodemanager, None) + + # unknown component + unknown_component = self.stackAdvisor.getHostWithComponent("YARN", "UNKNOWN", services, hosts) + self.assertEquals(nodemanager, None) + # unknown service + unknown_component = self.stackAdvisor.getHostWithComponent("UNKNOWN", "NODEMANAGER", services, hosts) + self.assertEquals(nodemanager, None) http://git-wip-us.apache.org/repos/asf/ambari/blob/c3e0771e/ambari-server/src/test/python/stacks/2.2/common/test_stack_advisor.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/test/python/stacks/2.2/common/test_stack_advisor.py b/ambari-server/src/test/python/stacks/2.2/common/test_stack_advisor.py index 60fb33a..0453b7a 100644 --- a/ambari-server/src/test/python/stacks/2.2/common/test_stack_advisor.py +++ b/ambari-server/src/test/python/stacks/2.2/common/test_stack_advisor.py @@ -1444,8 +1444,8 @@ class TestHDP22StackAdvisor(TestCase): 'hadoop-env': { 'properties': { 'namenode_heapsize': '1024', - 'namenode_opt_newsize' : '256', - 'namenode_opt_maxnewsize' : '256' + 'namenode_opt_newsize' : '128', + 'namenode_opt_maxnewsize' : '128' }, 'property_attributes': { 'dtnode_heapsize': {'maximum': '2048'}, @@ -1558,30 +1558,6 @@ class TestHDP22StackAdvisor(TestCase): ] }, - { - "href":"/api/v1/stacks/HDP/versions/2.2/services/HDFS/components/SECONDARY_NAMENODE", - "StackServiceComponents":{ - "advertise_version":"true", - "cardinality":"1", - "component_category":"MASTER", - "component_name":"SECONDARY_NAMENODE", - "custom_commands":[ - - ], - "display_name":"SNameNode", - "is_client":"false", - "is_master":"true", - "service_name":"HDFS", - "stack_name":"HDP", - "stack_version":"2.2", - "hostnames":[ - "host1" - ] - }, - "dependencies":[ - - ] - }, ], }], "configurations": configurations @@ -1619,6 +1595,36 @@ class TestHDP22StackAdvisor(TestCase): self.stackAdvisor.recommendHDFSConfigurations(configurations, clusterData, services, hosts) self.assertEquals(configurations, expected) + # namenode heapsize depends on # of datanodes + datanode_hostnames = services["services"][0]["components"][0]["StackServiceComponents"]["hostnames"] # datanode hostnames + for i in xrange(200): + hostname = "datanode" + `i` + datanode_hostnames.append(hostname) + hosts['items'].append( + { + "href" : "/api/v1/hosts/" + hostname, + "Hosts" : { + "cpu_count" : 1, + "host_name" : hostname, + "os_arch" : "x86_64", + "os_type" : "centos6", + "ph_cpu_count" : 1, + "public_host_name" : hostname, + "rack_info" : "/default-rack", + "total_mem" : 2097152 + } + } + ) + self.stackAdvisor.recommendHDFSConfigurations(configurations, clusterData, services, hosts) + self.assertEquals(configurations["hadoop-env"]["properties"]["namenode_heapsize"], "47872") + self.assertEquals(configurations["hadoop-env"]["properties"]["namenode_opt_maxnewsize"], "6144") + self.assertEquals(configurations["hadoop-env"]["properties"]["namenode_opt_maxnewsize"], "6144") + # namenode_heapsize depends on number of disks used used by datanode + configurations["hdfs-site"]["properties"]["dfs.datanode.data.dir"] = "/path1" + self.stackAdvisor.recommendHDFSConfigurations(configurations, clusterData, services, hosts) + self.assertEquals(configurations["hadoop-env"]["properties"]["namenode_heapsize"], "14848") + self.assertEquals(configurations["hadoop-env"]["properties"]["namenode_opt_maxnewsize"], "2048") + self.assertEquals(configurations["hadoop-env"]["properties"]["namenode_opt_maxnewsize"], "2048") def test_validateHDFSConfigurationsEnv(self): configurations = {}