jenkins-bot has submitted this change and it was merged. Change subject: Adding role::hadoop and role::hive ......................................................................
Adding role::hadoop and role::hive Note: $::fqdn must be defined properly. This might be a problem for some vagrant setups, not sure. Change-Id: I5e7860c16c419b977c66bcc566b62fe79a153f54 --- A .gitmodules M puppet/manifests/packages.pp M puppet/manifests/roles.pp A puppet/modules/cdh4 A puppet/templates/hadoop/fair-scheduler-allocation.xml.erb A puppet/templates/hadoop/fair-scheduler.xml.erb 6 files changed, 163 insertions(+), 0 deletions(-) Approvals: Ori.livneh: Looks good to me, approved jenkins-bot: Verified diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..b4a3cb4 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,4 @@ +[submodule "puppet/modules/cdh4"] + path = puppet/modules/cdh4 + url = https://gerrit.wikimedia.org/r/operations/puppet/cdh4 + diff --git a/puppet/manifests/packages.pp b/puppet/manifests/packages.pp index f41658e..6561392 100644 --- a/puppet/manifests/packages.pp +++ b/puppet/manifests/packages.pp @@ -56,3 +56,7 @@ class packages::ffmpeg2theora { package { 'ffmpeg2theora': } } + +class packages::java { + package { 'openjdk-7-jdk': } +} diff --git a/puppet/manifests/roles.pp b/puppet/manifests/roles.pp index ee4d047..d012c4e 100644 --- a/puppet/manifests/roles.pp +++ b/puppet/manifests/roles.pp @@ -763,3 +763,120 @@ class role::hhvm { include ::hhvm } + + +# == Class role::analytics +# Includes all analytics related roles: +# - hadoop +# - hive +# +# NOTE! To use this and other analytics classes, you must have the +# puppet-cdh4 git submodule available. Run this command on your +# local machine make sure modules/dh4 is cloned and up to date. +# +# git submodule update --init +# +# You'll also need more RAM! Edit Vagrantfile and increase --memory. +# 2048 M should be enough, but you can probably get away with less. +class role::analytics { + include role::hadoop + include role::hive +} + + +# == Class role::hadoop +# Installs and runs all hadoop services. +class role::hadoop { + # need java before hadoop is installed + require packages::java + + $namenode_hosts = [$::fqdn] + + $hadoop_directory = '/var/lib/hadoop' + $hadoop_name_directory = "${hadoop_directory}/name" + $hadoop_data_directory = "${hadoop_directory}/data" + + file { $hadoop_directory: + ensure => 'directory', + } + file { $hadoop_data_directory: + ensure => 'directory', + } + + $datanode_mounts = [ + "${hadoop_data_directory}/a", + "${hadoop_data_directory}/b", + ] + + # Install Hadoop client and configs + class { '::cdh4::hadoop': + namenode_hosts => $namenode_hosts, + datanode_mounts => $datanode_mounts, + dfs_name_dir => [$hadoop_name_directory], + # Turn on Snappy compression by default for maps and final outputs + mapreduce_intermediate_compression => true, + mapreduce_intermediate_compression_codec => 'org.apache.hadoop.io.compress.SnappyCodec', + mapreduce_output_compression => true, + mapreduce_output_compression_codec => 'org.apache.hadoop.io.compress.SnappyCodec', + mapreduce_output_compression_type => BLOCK, + mapreduce_map_tasks_maximum => 2, + mapreduce_reduce_tasks_maximum => 2, + # mapreduce.shuffle.port defaults to 8080 apparently. + # Override this so as not to conflict with apache + mapreduce_shuffle_port => 13562, + yarn_resourcemanager_scheduler_class => 'org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler', + } + + file { "${::cdh4::hadoop::config_directory}/fair-scheduler.xml": + content => template('hadoop/fair-scheduler.xml.erb'), + require => Class['cdh4::hadoop'], + } + file { "${::cdh4::hadoop::config_directory}/fair-scheduler-allocation.xml": + content => template('hadoop/fair-scheduler-allocation.xml.erb'), + require => Class['cdh4::hadoop'], + } + + # Install and run master and worker classes all on this node. + # - NameNode + # - ResourceManager + # - DataNode + # - NodeManager + class { '::cdh4::hadoop::master': } + class { '::cdh4::hadoop::worker': + require => Class['::cdh4::hadoop::master'], + } +} + +# == Class role::hive +# Installs and runs hive client, hive metastore and hive server. +class role::hive { + # Mediawiki includes the mysql module. + # We need the root db password defined there + # in order to create the Hive metastore database. + require role::mediawiki + # Need hadoop up and running and configs defined first. + Class['role::hadoop'] -> Class['role::hive'] + + class { '::cdh4::hive': + metastore_host => $role::hadoop::namenode_hosts[0], + db_root_password => $::mysql::root_password, + } + + # Setup Hive server and Metastore + class { '::cdh4::hive::master': } + + # Include hcatalog class so that Hive clients can use + # ths JsonSerDe from it. If we expand the usage of HCatalog + # in the future, this will probably move to its own role. + class { '::cdh4::hcatalog': + require => Class['::cdh4::hive'], + } + + # Add vagrant user to hive group so that + # hive-site.xml can be read. + exec { 'add_vagrant_user_to_hive_group': + command => '/usr/sbin/usermod --append --groups hive vagrant', + unless => '/usr/bin/groups vagrant | grep -q hive', + require => Class['::cdh4::hive'], + } +} diff --git a/puppet/modules/cdh4 b/puppet/modules/cdh4 new file mode 160000 index 0000000..30fc8b2 --- /dev/null +++ b/puppet/modules/cdh4 +Subproject commit 30fc8b2072ac422faadb66d0913ce7540396d686 diff --git a/puppet/templates/hadoop/fair-scheduler-allocation.xml.erb b/puppet/templates/hadoop/fair-scheduler-allocation.xml.erb new file mode 100644 index 0000000..c6ab343 --- /dev/null +++ b/puppet/templates/hadoop/fair-scheduler-allocation.xml.erb @@ -0,0 +1,22 @@ +<?xml version="1.0"?> +<allocations> + <fairSharePreemptionTimeout>30</fairSharePreemptionTimeout> + + <queue name="default"> + <maxRunningApps>50</maxRunningApps> + <schedulingMode>fair</schedulingMode> + </queue> + + <queue name="standard"> + <!-- <weight>2.0</weight> --> + <aclSubmitApps>hdfs,stats</aclSubmitApps> + <maxRunningApps>50</maxRunningApps> + <schedulingMode>fair</schedulingMode> + </queue> + + <queue name="adhoc"> + <maxRunningApps>10</maxRunningApps> + <schedulingMode>fair</schedulingMode> + </queue> + +</allocations> diff --git a/puppet/templates/hadoop/fair-scheduler.xml.erb b/puppet/templates/hadoop/fair-scheduler.xml.erb new file mode 100644 index 0000000..689716c --- /dev/null +++ b/puppet/templates/hadoop/fair-scheduler.xml.erb @@ -0,0 +1,16 @@ +<?xml version="1.0"?> +<configuration> + + <property> + <name>yarn.scheduler.fair.allocation.file</name> + <value>/etc/hadoop/conf/fair-scheduler-allocation.xml</value> + <description>Path to allocation file. An allocation file is an XML manifest describing queues and their properties, in addition to certain policy defaults. This file must be in XML format as described in the next section.</description> + </property> + + <property> + <name>yarn.scheduler.fair.user-as-default-queue</name> + <value>false</value> + <description>Whether to use the username associated with the allocation as the default queue name, in the event that a queue name is not specified. If this is set to "false" or unset, all jobs have a shared default queue, called "default". Defaults to true.</description> + </property> + +</configuration> -- To view, visit https://gerrit.wikimedia.org/r/107317 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I5e7860c16c419b977c66bcc566b62fe79a153f54 Gerrit-PatchSet: 8 Gerrit-Project: mediawiki/vagrant Gerrit-Branch: master Gerrit-Owner: Ottomata <o...@wikimedia.org> Gerrit-Reviewer: Ori.livneh <o...@wikimedia.org> Gerrit-Reviewer: Ottomata <o...@wikimedia.org> Gerrit-Reviewer: jenkins-bot _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits