jenkins-bot has submitted this change and it was merged.

Change subject: Adding role::hadoop and role::hive
......................................................................


Adding role::hadoop and role::hive

Note: $::fqdn must be defined properly.  This might be
a problem for some vagrant setups, not sure.

Change-Id: I5e7860c16c419b977c66bcc566b62fe79a153f54
---
A .gitmodules
M puppet/manifests/packages.pp
M puppet/manifests/roles.pp
A puppet/modules/cdh4
A puppet/templates/hadoop/fair-scheduler-allocation.xml.erb
A puppet/templates/hadoop/fair-scheduler.xml.erb
6 files changed, 163 insertions(+), 0 deletions(-)

Approvals:
  Ori.livneh: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000..b4a3cb4
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,4 @@
+[submodule "puppet/modules/cdh4"]
+       path = puppet/modules/cdh4
+       url = https://gerrit.wikimedia.org/r/operations/puppet/cdh4
+
diff --git a/puppet/manifests/packages.pp b/puppet/manifests/packages.pp
index f41658e..6561392 100644
--- a/puppet/manifests/packages.pp
+++ b/puppet/manifests/packages.pp
@@ -56,3 +56,7 @@
 class packages::ffmpeg2theora {
     package { 'ffmpeg2theora': }
 }
+
+class packages::java {
+    package { 'openjdk-7-jdk': }
+}
diff --git a/puppet/manifests/roles.pp b/puppet/manifests/roles.pp
index ee4d047..d012c4e 100644
--- a/puppet/manifests/roles.pp
+++ b/puppet/manifests/roles.pp
@@ -763,3 +763,120 @@
 class role::hhvm {
     include ::hhvm
 }
+
+
+# == Class role::analytics
+# Includes all analytics related roles:
+# - hadoop
+# - hive
+#
+# NOTE!  To use this and other analytics classes, you must have the
+# puppet-cdh4 git submodule available.  Run this command on your
+# local machine make sure modules/dh4 is cloned and up to date.
+#
+#    git submodule update --init
+#
+# You'll also need more RAM!  Edit Vagrantfile and increase --memory.
+# 2048 M should be enough, but you can probably get away with less.
+class role::analytics {
+    include role::hadoop
+    include role::hive
+}
+
+
+# == Class role::hadoop
+# Installs and runs all hadoop services.
+class role::hadoop {
+    # need java before hadoop is installed
+    require packages::java
+
+    $namenode_hosts           = [$::fqdn]
+
+    $hadoop_directory         = '/var/lib/hadoop'
+    $hadoop_name_directory    = "${hadoop_directory}/name"
+    $hadoop_data_directory    = "${hadoop_directory}/data"
+
+    file { $hadoop_directory:
+        ensure => 'directory',
+    }
+    file { $hadoop_data_directory:
+        ensure => 'directory',
+    }
+
+    $datanode_mounts = [
+      "${hadoop_data_directory}/a",
+      "${hadoop_data_directory}/b",
+    ]
+
+    # Install Hadoop client and configs
+    class { '::cdh4::hadoop':
+        namenode_hosts                           => $namenode_hosts,
+        datanode_mounts                          => $datanode_mounts,
+        dfs_name_dir                             => [$hadoop_name_directory],
+        # Turn on Snappy compression by default for maps and final outputs
+        mapreduce_intermediate_compression       => true,
+        mapreduce_intermediate_compression_codec => 
'org.apache.hadoop.io.compress.SnappyCodec',
+        mapreduce_output_compression             => true,
+        mapreduce_output_compression_codec       => 
'org.apache.hadoop.io.compress.SnappyCodec',
+        mapreduce_output_compression_type        => BLOCK,
+        mapreduce_map_tasks_maximum              => 2,
+        mapreduce_reduce_tasks_maximum           => 2,
+        # mapreduce.shuffle.port defaults to 8080 apparently.
+        # Override this so as not to conflict with apache
+        mapreduce_shuffle_port                   => 13562,
+        yarn_resourcemanager_scheduler_class     => 
'org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler',
+    }
+
+    file { "${::cdh4::hadoop::config_directory}/fair-scheduler.xml":
+        content => template('hadoop/fair-scheduler.xml.erb'),
+        require => Class['cdh4::hadoop'],
+    }
+    file { "${::cdh4::hadoop::config_directory}/fair-scheduler-allocation.xml":
+        content => template('hadoop/fair-scheduler-allocation.xml.erb'),
+        require => Class['cdh4::hadoop'],
+    }
+
+    # Install and run master and worker classes all on this node.
+    # - NameNode
+    # - ResourceManager
+    # - DataNode
+    # - NodeManager
+    class { '::cdh4::hadoop::master': }
+    class { '::cdh4::hadoop::worker':
+        require => Class['::cdh4::hadoop::master'],
+    }
+}
+
+# == Class role::hive
+# Installs and runs hive client, hive metastore and hive server.
+class role::hive {
+    # Mediawiki includes the mysql module.
+    # We need the root db password defined there
+    # in order to create the Hive metastore database.
+    require role::mediawiki
+    # Need hadoop up and running and configs defined first.
+    Class['role::hadoop'] -> Class['role::hive']
+
+    class { '::cdh4::hive':
+        metastore_host   => $role::hadoop::namenode_hosts[0],
+        db_root_password => $::mysql::root_password,
+    }
+
+    # Setup Hive server and Metastore
+    class { '::cdh4::hive::master': }
+
+    # Include hcatalog class so that Hive clients can use
+    # ths JsonSerDe from it.  If we expand the usage of HCatalog
+    # in the future, this will probably move to its own role.
+    class { '::cdh4::hcatalog':
+        require => Class['::cdh4::hive'],
+    }
+
+    # Add vagrant user to hive group so that
+    # hive-site.xml can be read.
+    exec { 'add_vagrant_user_to_hive_group':
+        command => '/usr/sbin/usermod --append --groups hive vagrant',
+        unless  => '/usr/bin/groups vagrant | grep -q hive',
+        require => Class['::cdh4::hive'],
+    }
+}
diff --git a/puppet/modules/cdh4 b/puppet/modules/cdh4
new file mode 160000
index 0000000..30fc8b2
--- /dev/null
+++ b/puppet/modules/cdh4
+Subproject commit 30fc8b2072ac422faadb66d0913ce7540396d686
diff --git a/puppet/templates/hadoop/fair-scheduler-allocation.xml.erb 
b/puppet/templates/hadoop/fair-scheduler-allocation.xml.erb
new file mode 100644
index 0000000..c6ab343
--- /dev/null
+++ b/puppet/templates/hadoop/fair-scheduler-allocation.xml.erb
@@ -0,0 +1,22 @@
+<?xml version="1.0"?>
+<allocations>
+  <fairSharePreemptionTimeout>30</fairSharePreemptionTimeout>
+
+  <queue name="default">
+    <maxRunningApps>50</maxRunningApps>
+    <schedulingMode>fair</schedulingMode>
+  </queue>
+
+  <queue name="standard">
+    <!-- <weight>2.0</weight> -->
+    <aclSubmitApps>hdfs,stats</aclSubmitApps>
+    <maxRunningApps>50</maxRunningApps>
+    <schedulingMode>fair</schedulingMode>
+  </queue>
+
+  <queue name="adhoc">
+    <maxRunningApps>10</maxRunningApps>
+    <schedulingMode>fair</schedulingMode>
+  </queue>
+  
+</allocations>
diff --git a/puppet/templates/hadoop/fair-scheduler.xml.erb 
b/puppet/templates/hadoop/fair-scheduler.xml.erb
new file mode 100644
index 0000000..689716c
--- /dev/null
+++ b/puppet/templates/hadoop/fair-scheduler.xml.erb
@@ -0,0 +1,16 @@
+<?xml version="1.0"?>
+<configuration>
+
+  <property>
+    <name>yarn.scheduler.fair.allocation.file</name>
+    <value>/etc/hadoop/conf/fair-scheduler-allocation.xml</value>
+    <description>Path to allocation file. An allocation file is an XML 
manifest describing queues and their properties, in addition to certain policy 
defaults. This file must be in XML format as described in the next 
section.</description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.fair.user-as-default-queue</name>
+    <value>false</value>
+    <description>Whether to use the username associated with the allocation as 
the default queue name, in the event that a queue name is not specified. If 
this is set to "false" or unset, all jobs have a shared default queue, called 
"default". Defaults to true.</description>
+  </property>
+
+</configuration>

-- 
To view, visit https://gerrit.wikimedia.org/r/107317
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I5e7860c16c419b977c66bcc566b62fe79a153f54
Gerrit-PatchSet: 8
Gerrit-Project: mediawiki/vagrant
Gerrit-Branch: master
Gerrit-Owner: Ottomata <o...@wikimedia.org>
Gerrit-Reviewer: Ori.livneh <o...@wikimedia.org>
Gerrit-Reviewer: Ottomata <o...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to