Author: abayer
Date: Mon Aug 13 19:49:06 2012
New Revision: 1372568
URL: http://svn.apache.org/viewvc?rev=1372568&view=rev
Log:
WHIRR-189. Hadoop on EC2 should use all available storage.
Added:
whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/VolumeManager.java
(with props)
whirr/trunk/services/hadoop/src/main/resources/functions/prepare_all_disks.sh
(with props)
whirr/trunk/services/hadoop/src/main/resources/functions/prepare_disks.sh
(with props)
Modified:
whirr/trunk/CHANGES.txt
whirr/trunk/core/src/main/java/org/apache/whirr/compute/BootstrapTemplate.java
whirr/trunk/core/src/test/java/org/apache/whirr/compute/BootstrapTemplateTest.java
whirr/trunk/core/src/test/java/org/apache/whirr/service/TemplateBuilderStrategyTest.java
whirr/trunk/pom.xml
whirr/trunk/services/cdh/src/main/resources/functions/configure_cdh_hadoop.sh
whirr/trunk/services/cdh/src/main/resources/functions/configure_cdh_hbase.sh
whirr/trunk/services/cdh/src/main/resources/functions/install_cdh_hbase.sh
whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/HadoopClusterActionHandler.java
whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/HadoopConfigurationBuilder.java
whirr/trunk/services/hadoop/src/main/resources/functions/configure_hadoop.sh
whirr/trunk/services/hadoop/src/main/resources/whirr-hadoop-default.properties
whirr/trunk/services/hadoop/src/test/java/org/apache/whirr/service/hadoop/HadoopConfigurationBuilderTest.java
whirr/trunk/services/yarn/src/main/resources/functions/configure_hadoop_mr2.sh
Modified: whirr/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/whirr/trunk/CHANGES.txt?rev=1372568&r1=1372567&r2=1372568&view=diff
==============================================================================
--- whirr/trunk/CHANGES.txt (original)
+++ whirr/trunk/CHANGES.txt Mon Aug 13 19:49:06 2012
@@ -15,6 +15,8 @@ Trunk (unreleased changes)
IMPROVEMENTS
+ WHIRR-189. Hadoop on EC2 should use all available storage. (abayer)
+
WHIRR-63. Support EC2 Cluster Compute groups for Hadoop etc. (abayer)
WHIRR-573. Allow configuring REPO_HOST for CDH repositories. (abayer)
Modified:
whirr/trunk/core/src/main/java/org/apache/whirr/compute/BootstrapTemplate.java
URL:
http://svn.apache.org/viewvc/whirr/trunk/core/src/main/java/org/apache/whirr/compute/BootstrapTemplate.java?rev=1372568&r1=1372567&r2=1372568&view=diff
==============================================================================
---
whirr/trunk/core/src/main/java/org/apache/whirr/compute/BootstrapTemplate.java
(original)
+++
whirr/trunk/core/src/main/java/org/apache/whirr/compute/BootstrapTemplate.java
Mon Aug 13 19:49:06 2012
@@ -19,6 +19,7 @@
package org.apache.whirr.compute;
import static org.jclouds.compute.options.TemplateOptions.Builder.runScript;
+import static org.jclouds.ec2.domain.RootDeviceType.EBS;
import static org.jclouds.scriptbuilder.domain.Statements.appendFile;
import static
org.jclouds.scriptbuilder.domain.Statements.createOrOverwriteFile;
import static org.jclouds.scriptbuilder.domain.Statements.interpret;
@@ -31,6 +32,9 @@ import org.apache.whirr.service.jclouds.
import org.apache.whirr.service.jclouds.TemplateBuilderStrategy;
import org.jclouds.aws.ec2.AWSEC2ApiMetadata;
import org.jclouds.aws.ec2.compute.AWSEC2TemplateOptions;
+import org.jclouds.ec2.EC2ApiMetadata;
+import org.jclouds.ec2.compute.options.EC2TemplateOptions;
+import org.jclouds.ec2.compute.predicates.EC2ImagePredicates;
import org.jclouds.compute.ComputeService;
import org.jclouds.compute.ComputeServiceContext;
import org.jclouds.compute.domain.Template;
@@ -108,10 +112,25 @@ public class BootstrapTemplate {
}
}
- return setPlacementGroup(context, spec, template, instanceTemplate);
+ return mapEphemeralIfImageIsEBSBacked(context, spec, template,
instanceTemplate);
}
/**
+ * If this is an EBS-backed volume, map the ephemeral device.
+ */
+ private static Template
mapEphemeralIfImageIsEBSBacked(ComputeServiceContext context,
+ ClusterSpec spec,
+ Template template,
+ InstanceTemplate
instanceTemplate) {
+ if
(EC2ApiMetadata.CONTEXT_TOKEN.isAssignableFrom(context.getBackendType())) {
+ if
(EC2ImagePredicates.rootDeviceType(EBS).apply(template.getImage())) {
+
template.getOptions().as(EC2TemplateOptions.class).mapEphemeralDeviceToDeviceName("/dev/sdc",
"ephemeral1");
+ }
+ }
+ return setPlacementGroup(context, spec, template, instanceTemplate);
+ }
+
+ /**
* Set the placement group, if desired - if it doesn't already exist,
create it.
*/
private static Template setPlacementGroup(ComputeServiceContext context,
ClusterSpec spec,
Modified:
whirr/trunk/core/src/test/java/org/apache/whirr/compute/BootstrapTemplateTest.java
URL:
http://svn.apache.org/viewvc/whirr/trunk/core/src/test/java/org/apache/whirr/compute/BootstrapTemplateTest.java?rev=1372568&r1=1372567&r2=1372568&view=diff
==============================================================================
---
whirr/trunk/core/src/test/java/org/apache/whirr/compute/BootstrapTemplateTest.java
(original)
+++
whirr/trunk/core/src/test/java/org/apache/whirr/compute/BootstrapTemplateTest.java
Mon Aug 13 19:49:06 2012
@@ -38,6 +38,7 @@ import org.jclouds.aws.ec2.compute.AWSEC
import org.jclouds.aws.ec2.compute.AWSEC2TemplateOptions;
import org.jclouds.compute.ComputeService;
import org.jclouds.compute.ComputeServiceContext;
+import org.jclouds.compute.domain.Image;
import org.jclouds.compute.domain.Template;
import org.jclouds.compute.domain.TemplateBuilder;
import org.jclouds.compute.options.TemplateOptions;
@@ -127,8 +128,10 @@ private void assertSpotPriceIs(
Template template = mock(Template.class);
TemplateOptions options = mock(TemplateOptions.class);
+ Image image = mock(Image.class);
when(templateBuilder.build()).thenReturn(template);
when(template.getOptions()).thenReturn(options);
+ when(template.getImage()).thenReturn(image);
AWSEC2TemplateOptions awsEec2TemplateOptions =
mock(AWSEC2TemplateOptions.class);
when(options.as((Class<TemplateOptions>)
any())).thenReturn(awsEec2TemplateOptions);
Modified:
whirr/trunk/core/src/test/java/org/apache/whirr/service/TemplateBuilderStrategyTest.java
URL:
http://svn.apache.org/viewvc/whirr/trunk/core/src/test/java/org/apache/whirr/service/TemplateBuilderStrategyTest.java?rev=1372568&r1=1372567&r2=1372568&view=diff
==============================================================================
---
whirr/trunk/core/src/test/java/org/apache/whirr/service/TemplateBuilderStrategyTest.java
(original)
+++
whirr/trunk/core/src/test/java/org/apache/whirr/service/TemplateBuilderStrategyTest.java
Mon Aug 13 19:49:06 2012
@@ -44,6 +44,7 @@ public class TemplateBuilderStrategyTest
@Before
public void setUp() throws ConfigurationException, JSchException,
IOException {
spec = ClusterSpec.withTemporaryKeys();
+ spec.setProvider("my-provider");
instanceTemplate = mock(InstanceTemplate.class);
}
Modified: whirr/trunk/pom.xml
URL:
http://svn.apache.org/viewvc/whirr/trunk/pom.xml?rev=1372568&r1=1372567&r2=1372568&view=diff
==============================================================================
--- whirr/trunk/pom.xml (original)
+++ whirr/trunk/pom.xml Mon Aug 13 19:49:06 2012
@@ -348,23 +348,7 @@
</archive>
</configuration>
</plugin>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-javadoc-plugin</artifactId>
- <version>2.8.1</version>
- <configuration>
- <excludePackageNames>org.jclouds.*</excludePackageNames>
- </configuration>
- <executions>
- <execution>
- <id>aggregate</id>
- <goals>
- <goal>aggregate</goal>
- </goals>
- <phase>site</phase>
- </execution>
- </executions>
- </plugin>
+
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-remote-resources-plugin</artifactId>
@@ -411,6 +395,7 @@
<groupId>org.apache.rat</groupId>
<artifactId>apache-rat-plugin</artifactId>
<version>0.8</version>
+ <inherited>false</inherited>
<executions>
<execution>
<phase>package</phase>
@@ -604,12 +589,32 @@
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-site-plugin</artifactId>
<version>3.0</version>
+ <inherited>false</inherited>
<configuration>
- <!-- Reports are generated at the site level -->
- <generateReports>false</generateReports>
<templateDirectory>${basedir}/src/site/resources</templateDirectory>
<template>site.vm</template>
<relativizeDecorationLinks>false</relativizeDecorationLinks>
+ <reportPlugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-javadoc-plugin</artifactId>
+ <version>2.8.1</version>
+ <configuration>
+ <encoding>${project.build.sourceEncoding}</encoding>
+ <quiet>true</quiet>
+ <maxmemory>256m</maxmemory>
+ </configuration>
+ <reportSets>
+ <reportSet>
+ <id>default</id>
+ <reports>
+ <report>javadoc</report>
+ <report>aggregate</report>
+ </reports>
+ </reportSet>
+ </reportSets>
+ </plugin>
+ </reportPlugins>
</configuration>
</plugin>
</plugins>
@@ -701,13 +706,6 @@
</configuration>
<executions>
<execution>
- <id>aggregate</id>
- <goals>
- <goal>aggregate</goal>
- </goals>
- <phase>site</phase>
- </execution>
- <execution>
<id>attach-javadocs</id>
<goals>
<goal>jar</goal>
Modified:
whirr/trunk/services/cdh/src/main/resources/functions/configure_cdh_hadoop.sh
URL:
http://svn.apache.org/viewvc/whirr/trunk/services/cdh/src/main/resources/functions/configure_cdh_hadoop.sh?rev=1372568&r1=1372567&r2=1372568&view=diff
==============================================================================
---
whirr/trunk/services/cdh/src/main/resources/functions/configure_cdh_hadoop.sh
(original)
+++
whirr/trunk/services/cdh/src/main/resources/functions/configure_cdh_hadoop.sh
Mon Aug 13 19:49:06 2012
@@ -26,15 +26,6 @@ function configure_cdh_hadoop() {
ROLES=$1
shift
- case $CLOUD_PROVIDER in
- ec2 | aws-ec2 )
- # Alias /mnt as /data
- if [ ! -e /data ]; then ln -s /mnt /data; fi
- ;;
- *)
- ;;
- esac
-
REPO=${REPO:-cdh4}
CDH_MAJOR_VERSION=$(echo $REPO | sed -e 's/cdh\([0-9]\).*/\1/')
if [ $CDH_MAJOR_VERSION = "4" ]; then
@@ -49,11 +40,7 @@ function configure_cdh_hadoop() {
MAPREDUCE_PACKAGE_PREFIX=hadoop-${HADOOP_VERSION:-0.20}
fi
- mkdir -p /data/hadoop
- chgrp hadoop /data/hadoop
- chmod g+w /data/hadoop
- mkdir /data/tmp
- chmod a+rwxt /data/tmp
+ make_hadoop_dirs /data*
# Copy generated configuration files in place
cp /tmp/{core,hdfs,mapred}-site.xml $HADOOP_CONF_DIR
@@ -105,6 +92,19 @@ function configure_cdh_hadoop() {
}
+function make_hadoop_dirs {
+ for mount in "$@"; do
+ if [ ! -e $mount/hadoop ]; then
+ mkdir -p $mount/hadoop
+ chown hadoop:hadoop $mount/hadoop
+ fi
+ if [ ! -e $mount/tmp ]; then
+ mkdir $mount/tmp
+ chmod a+rwxt $mount/tmp
+ fi
+ done
+}
+
function start_namenode() {
if which dpkg &> /dev/null; then
retry_apt_get -y install $HDFS_PACKAGE_PREFIX-namenode
Modified:
whirr/trunk/services/cdh/src/main/resources/functions/configure_cdh_hbase.sh
URL:
http://svn.apache.org/viewvc/whirr/trunk/services/cdh/src/main/resources/functions/configure_cdh_hbase.sh?rev=1372568&r1=1372567&r2=1372568&view=diff
==============================================================================
---
whirr/trunk/services/cdh/src/main/resources/functions/configure_cdh_hbase.sh
(original)
+++
whirr/trunk/services/cdh/src/main/resources/functions/configure_cdh_hbase.sh
Mon Aug 13 19:49:06 2012
@@ -54,21 +54,7 @@ function configure_cdh_hbase() {
HBASE_PREFIX=hadoop-
fi
- case $CLOUD_PROVIDER in
- ec2 | aws-ec2 )
- # Alias /mnt as /data
- if [ ! -e /data ]; then ln -s /mnt /data; fi
- ;;
- *)
- ;;
- esac
-
- mkdir -p /data/hbase
- chown hbase:hbase /data/hbase
- if [ ! -e /data/tmp ]; then
- mkdir /data/tmp
- chmod a+rwxt /data/tmp
- fi
+ make_hbase_dirs /data*
# Copy generated configuration files in place
cp /tmp/hbase-site.xml $HBASE_CONF_DIR
@@ -147,3 +133,17 @@ function install_hbase_daemon() {
retry_yum install -y $daemon
fi
}
+
+
+function make_hbase_dirs {
+ for mount in "$@"; do
+ if [ ! -e $mount/hbase ]; then
+ mkdir -p $mount/hbase
+ chown hbase:hbase $mount/hbase
+ fi
+ if [ ! -e $mount/tmp ]; then
+ mkdir $mount/tmp
+ chmod a+rwxt $mount/tmp
+ fi
+ done
+}
Modified:
whirr/trunk/services/cdh/src/main/resources/functions/install_cdh_hbase.sh
URL:
http://svn.apache.org/viewvc/whirr/trunk/services/cdh/src/main/resources/functions/install_cdh_hbase.sh?rev=1372568&r1=1372567&r2=1372568&view=diff
==============================================================================
--- whirr/trunk/services/cdh/src/main/resources/functions/install_cdh_hbase.sh
(original)
+++ whirr/trunk/services/cdh/src/main/resources/functions/install_cdh_hbase.sh
Mon Aug 13 19:49:06 2012
@@ -69,15 +69,6 @@ function install_cdh_hbase() {
esac
done
- case $CLOUD_PROVIDER in
- ec2 | aws-ec2 )
- # Alias /mnt as /data
- if [ ! -e /data ]; then ln -s /mnt /data; fi
- ;;
- *)
- ;;
- esac
-
REPO=${REPO:-cdh4}
REPO_HOST=${REPO_HOST:-archive.cloudera.com}
HBASE_HOME=/usr/lib/hbase
Modified:
whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/HadoopClusterActionHandler.java
URL:
http://svn.apache.org/viewvc/whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/HadoopClusterActionHandler.java?rev=1372568&r1=1372567&r2=1372568&view=diff
==============================================================================
---
whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/HadoopClusterActionHandler.java
(original)
+++
whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/HadoopClusterActionHandler.java
Mon Aug 13 19:49:06 2012
@@ -25,7 +25,9 @@ import static org.apache.whirr.service.h
import static org.jclouds.scriptbuilder.domain.Statements.call;
import com.google.common.base.Joiner;
+import com.google.common.collect.Iterables;
import java.io.IOException;
+import java.util.Map;
import java.util.Set;
import org.apache.commons.configuration.Configuration;
import org.apache.commons.configuration.ConfigurationException;
@@ -80,6 +82,16 @@ public abstract class HadoopClusterActio
"-u", tarball));
}
+ protected Map<String, String> getDeviceMappings(ClusterActionEvent event) {
+ Set<Instance> instances =
event.getCluster().getInstancesMatching(RolePredicates.role(getRole()));
+ Instance prototype = Iterables.getFirst(instances, null);
+ if (prototype == null) {
+ throw new IllegalStateException("No instances found in role " +
getRole());
+ }
+ VolumeManager volumeManager = new VolumeManager();
+ return volumeManager.getDeviceMappings(event.getClusterSpec(),
prototype);
+ }
+
@Override
protected void beforeConfigure(ClusterActionEvent event)
throws IOException, InterruptedException {
@@ -102,11 +114,12 @@ public abstract class HadoopClusterActio
private void createHadoopConfigFiles(ClusterActionEvent event,
ClusterSpec clusterSpec, Cluster cluster) throws IOException {
+ Map<String, String> deviceMappings = getDeviceMappings(event);
try {
event.getStatementBuilder().addStatements(
buildCommon("/tmp/core-site.xml", clusterSpec, cluster),
- buildHdfs("/tmp/hdfs-site.xml", clusterSpec, cluster),
- buildMapReduce("/tmp/mapred-site.xml", clusterSpec, cluster),
+ buildHdfs("/tmp/hdfs-site.xml", clusterSpec, cluster,
deviceMappings.keySet()),
+ buildMapReduce("/tmp/mapred-site.xml", clusterSpec, cluster,
deviceMappings.keySet()),
buildHadoopEnv("/tmp/hadoop-env.sh", clusterSpec, cluster),
TemplateUtils.createFileFromTemplate("/tmp/hadoop-metrics.properties",
event.getTemplateEngine(), getMetricsTemplate(event, clusterSpec, cluster),
clusterSpec, cluster)
);
@@ -114,6 +127,8 @@ public abstract class HadoopClusterActio
} catch (ConfigurationException e) {
throw new IOException(e);
}
+ String devMappings = VolumeManager.asString(deviceMappings);
+ addStatement(event, call("prepare_all_disks", "'" + devMappings + "'"));
}
private String getMetricsTemplate(ClusterActionEvent event, ClusterSpec
clusterSpec, Cluster cluster) {
Modified:
whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/HadoopConfigurationBuilder.java
URL:
http://svn.apache.org/viewvc/whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/HadoopConfigurationBuilder.java?rev=1372568&r1=1372567&r2=1372568&view=diff
==============================================================================
---
whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/HadoopConfigurationBuilder.java
(original)
+++
whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/HadoopConfigurationBuilder.java
Mon Aug 13 19:49:06 2012
@@ -21,7 +21,10 @@ package org.apache.whirr.service.hadoop;
import static org.apache.whirr.RolePredicates.role;
import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Function;
+import com.google.common.base.Joiner;
import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
import java.io.IOException;
import java.util.Set;
@@ -50,6 +53,7 @@ public class HadoopConfigurationBuilder
Configuration defaults, String prefix)
throws ConfigurationException {
CompositeConfiguration config = new CompositeConfiguration();
+ config.setDelimiterParsingDisabled(true);
Configuration sub = clusterSpec.getConfigurationForKeysWithPrefix(prefix);
config.addConfiguration(sub.subset(prefix)); // remove prefix
config.addConfiguration(defaults.subset(prefix));
@@ -64,16 +68,16 @@ public class HadoopConfigurationBuilder
}
public static Statement buildHdfs(String path, ClusterSpec clusterSpec,
- Cluster cluster) throws ConfigurationException, IOException {
+ Cluster cluster, Set<String> dataDirectories) throws
ConfigurationException, IOException {
Configuration config = buildHdfsConfiguration(clusterSpec, cluster,
- new PropertiesConfiguration(WHIRR_HADOOP_DEFAULT_PROPERTIES));
+ new PropertiesConfiguration(WHIRR_HADOOP_DEFAULT_PROPERTIES),
dataDirectories);
return
HadoopConfigurationConverter.asCreateXmlConfigurationFileStatement(path,
config);
}
public static Statement buildMapReduce(String path, ClusterSpec clusterSpec,
- Cluster cluster) throws ConfigurationException, IOException {
+ Cluster cluster, Set<String> dataDirectories) throws
ConfigurationException, IOException {
Configuration config = buildMapReduceConfiguration(clusterSpec, cluster,
- new PropertiesConfiguration(WHIRR_HADOOP_DEFAULT_PROPERTIES));
+ new PropertiesConfiguration(WHIRR_HADOOP_DEFAULT_PROPERTIES),
dataDirectories);
return
HadoopConfigurationConverter.asCreateXmlConfigurationFileStatement(path,
config);
}
@@ -100,16 +104,27 @@ public class HadoopConfigurationBuilder
@VisibleForTesting
static Configuration buildHdfsConfiguration(ClusterSpec clusterSpec,
- Cluster cluster, Configuration defaults) throws ConfigurationException {
- return build(clusterSpec, cluster, defaults, "hadoop-hdfs");
+ Cluster cluster, Configuration defaults, Set<String> dataDirectories)
throws ConfigurationException {
+ Configuration config = build(clusterSpec, cluster, defaults,
"hadoop-hdfs");
+
+ setIfAbsent(config, "dfs.data.dir",
+ appendToDataDirectories(dataDirectories, "/hadoop/hdfs/data"));
+ setIfAbsent(config, "dfs.name.dir",
+ appendToDataDirectories(dataDirectories, "/hadoop/hdfs/name"));
+ setIfAbsent(config, "fs.checkpoint.dir",
+ appendToDataDirectories(dataDirectories, "/hadoop/hdfs/secondary"));
+ return config;
}
@VisibleForTesting
static Configuration buildMapReduceConfiguration(ClusterSpec clusterSpec,
- Cluster cluster, Configuration defaults) throws ConfigurationException,
IOException {
+ Cluster cluster, Configuration defaults, Set<String> dataDirectories)
throws ConfigurationException, IOException {
Configuration config = build(clusterSpec, cluster, defaults,
"hadoop-mapreduce");
+ setIfAbsent(config, "mapred.local.dir",
+ appendToDataDirectories(dataDirectories, "/hadoop/mapred/local"));
+
Set<Instance> taskTrackers = cluster
.getInstancesMatching(role(HadoopTaskTrackerClusterActionHandler.ROLE));
@@ -158,5 +173,15 @@ public class HadoopConfigurationBuilder
config.setProperty(property, value);
}
}
+
+ private static String appendToDataDirectories(Set<String> dataDirectories,
final String suffix) {
+ return
Joiner.on(',').join(Lists.transform(Lists.newArrayList(dataDirectories),
+ new Function<String, String>() {
+ @Override public String apply(String input) {
+ return input + suffix;
+ }
+ }
+ ));
+ }
}
Added:
whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/VolumeManager.java
URL:
http://svn.apache.org/viewvc/whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/VolumeManager.java?rev=1372568&view=auto
==============================================================================
---
whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/VolumeManager.java
(added)
+++
whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/VolumeManager.java
Mon Aug 13 19:49:06 2012
@@ -0,0 +1,65 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.whirr.service.hadoop;
+
+import com.google.common.collect.Maps;
+
+import java.util.List;
+import java.util.Map;
+
+import org.apache.whirr.Cluster.Instance;
+import org.apache.whirr.ClusterSpec;
+import org.jclouds.compute.domain.Hardware;
+import org.jclouds.compute.domain.Volume;
+
+public class VolumeManager {
+
+ public static final String MOUNT_PREFIX = "/data";
+
+ public Map<String, String> getDeviceMappings(ClusterSpec clusterSpec,
Instance instance) {
+ Map<String, String> mappings = Maps.newLinkedHashMap();
+ int number = 0;
+ Hardware hardware = instance.getNodeMetadata().getHardware();
+
+ /* null when using the BYON jclouds compute provider */
+ if (hardware != null) {
+ List<? extends Volume> volumes =
+ instance.getNodeMetadata().getHardware().getVolumes();
+ for (Volume volume : volumes) {
+ if (volume.isBootDevice()) {
+ continue;
+ }
+
+ mappings.put(MOUNT_PREFIX + number++, volume.getDevice());
+ }
+ }
+ return mappings;
+ }
+
+ public static String asString(Map<String, String> deviceMappings) {
+ StringBuilder sb = new StringBuilder();
+ for (Map.Entry<String, String> mapping : deviceMappings.entrySet()) {
+ if (sb.length() > 0) {
+ sb.append(";");
+ }
+ sb.append(mapping.getKey()).append(",").append(mapping.getValue());
+ }
+ return sb.toString();
+ }
+}
Propchange:
whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/VolumeManager.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified:
whirr/trunk/services/hadoop/src/main/resources/functions/configure_hadoop.sh
URL:
http://svn.apache.org/viewvc/whirr/trunk/services/hadoop/src/main/resources/functions/configure_hadoop.sh?rev=1372568&r1=1372567&r2=1372568&view=diff
==============================================================================
---
whirr/trunk/services/hadoop/src/main/resources/functions/configure_hadoop.sh
(original)
+++
whirr/trunk/services/hadoop/src/main/resources/functions/configure_hadoop.sh
Mon Aug 13 19:49:06 2012
@@ -26,24 +26,10 @@ function configure_hadoop() {
ROLES=$1
shift
- case $CLOUD_PROVIDER in
- ec2 | aws-ec2 )
- # Alias /mnt as /data
- ln -s /mnt /data
- ;;
- *)
- ;;
- esac
-
HADOOP_HOME=/usr/local/hadoop
HADOOP_CONF_DIR=$HADOOP_HOME/conf
- mkdir -p /data/hadoop
- chown hadoop:hadoop /data/hadoop
- if [ ! -e /data/tmp ]; then
- mkdir /data/tmp
- chmod a+rwxt /data/tmp
- fi
+ make_hadoop_dirs /data*
mkdir /etc/hadoop
ln -s $HADOOP_CONF_DIR /etc/hadoop/conf
@@ -94,6 +80,19 @@ function configure_hadoop() {
}
+function make_hadoop_dirs {
+ for mount in "$@"; do
+ if [ ! -e $mount/hadoop ]; then
+ mkdir -p $mount/hadoop
+ chown hadoop:hadoop $mount/hadoop
+ fi
+ if [ ! -e $mount/tmp ]; then
+ mkdir $mount/tmp
+ chmod a+rwxt $mount/tmp
+ fi
+ done
+}
+
function start_namenode() {
if which dpkg &> /dev/null; then
AS_HADOOP="su -s /bin/bash - hadoop -c"
Added:
whirr/trunk/services/hadoop/src/main/resources/functions/prepare_all_disks.sh
URL:
http://svn.apache.org/viewvc/whirr/trunk/services/hadoop/src/main/resources/functions/prepare_all_disks.sh?rev=1372568&view=auto
==============================================================================
---
whirr/trunk/services/hadoop/src/main/resources/functions/prepare_all_disks.sh
(added)
+++
whirr/trunk/services/hadoop/src/main/resources/functions/prepare_all_disks.sh
Mon Aug 13 19:49:06 2012
@@ -0,0 +1,84 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+set -x
+
+# This function ensures that all the mount directories in the mapping string
+# are available to be used. This is achieved by formatting, mounting, and
+# symlinking (if the volume is already mounted as another directory).
+#
+# E.g. suppose the mapping string were /data0,/dev/sdb;/data1,/dev/sdc
+# and /dev/sdb were mounted on /mnt, and /dev/sdc was not mounted or formatted.
+# In this case a symlink would be created from /data0 to /mnt. /dev/sdc would
+# be formatted, then mounted on /data1.
+function prepare_all_disks() {
+ for mapping in $(echo "$1" | tr ";" "\n"); do
+ # Split on the comma (see "Parameter Expansion" in the bash man page)
+ mount=${mapping%,*}
+ device=${mapping#*,}
+ prep_disk $mount $device
+ done
+ # Make sure there's at least a /data0 and /data (on the root filesystem)
+ if [ ! -e /data0 ]; then
+ if [ -e /data ]; then
+ ln -s /data /data0
+ else
+ mkdir /data0
+ ln -s /data0 /data
+ fi
+ else
+ if [ ! -e /data ]; then
+ ln -s /data0 /data
+ fi
+ fi
+}
+
+function prep_disk() {
+ mount=$1
+ device=$2
+ automount=${3:-false}
+
+ # is device formatted?
+ if [ $(mountpoint -q -x $device) ]; then
+ echo "$device is formatted"
+ else
+ if which dpkg &> /dev/null; then
+ apt-get install -y xfsprogs
+ elif which rpm &> /dev/null; then
+ yum install -y xfsprogs
+ fi
+ echo "warning: ERASING CONTENTS OF $device"
+ mkfs.xfs -f $device
+ fi
+ # is device mounted?
+ mount | grep -q $device
+ if [ $? == 0 ]; then
+ echo "$device is mounted"
+ if [ ! -d $mount ]; then
+ echo "Symlinking to $mount"
+ ln -s $(grep $device /proc/mounts | awk '{print $2}') $mount
+ fi
+ else
+ echo "Mounting $device on $mount"
+ if [ ! -e $mount ]; then
+ mkdir $mount
+ fi
+ mount -o defaults,noatime $device $mount
+ if $automount ; then
+ echo "$device $mount xfs defaults,noatime 0 0" >> /etc/fstab
+ fi
+ fi
+}
Propchange:
whirr/trunk/services/hadoop/src/main/resources/functions/prepare_all_disks.sh
------------------------------------------------------------------------------
svn:eol-style = native
Added: whirr/trunk/services/hadoop/src/main/resources/functions/prepare_disks.sh
URL:
http://svn.apache.org/viewvc/whirr/trunk/services/hadoop/src/main/resources/functions/prepare_disks.sh?rev=1372568&view=auto
==============================================================================
--- whirr/trunk/services/hadoop/src/main/resources/functions/prepare_disks.sh
(added)
+++ whirr/trunk/services/hadoop/src/main/resources/functions/prepare_disks.sh
Mon Aug 13 19:49:06 2012
@@ -0,0 +1,49 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+set -x
+
+# This function ensures that all the mount directories in the mapping string
+# whose devices are already mounted are available to be used. Symlinks are
+# created as necessary.
+#
+# E.g. suppose the mapping string were /data0,/dev/sdb;/data1,/dev/sdc
+# and /dev/sdb were mounted on /mnt, and /dev/sdc was not mounted (possibly
+# not even formatted).
+# In this case a symlink would be created from /data0 to /mnt. /data1 would
+# be created.
+function prepare_disks() {
+ for mapping in $(echo "$1" | tr ";" "\n"); do
+ # Split on the comma (see "Parameter Expansion" in the bash man page)
+ mount=${mapping%,*}
+ device=${mapping#*,}
+ prep_disk $mount $device
+ done
+}
+
+function prep_disk() {
+ mount=$1
+ device=$2
+ # is device mounted?
+ mount | grep -q $device
+ if [ $? == 0 ]; then
+ echo "$device is mounted"
+ if [ ! -d $mount ]; then
+ echo "Symlinking to $mount"
+ ln -s $(grep $device /proc/mounts | awk '{print $2}') $mount
+ fi
+ fi
+}
Propchange:
whirr/trunk/services/hadoop/src/main/resources/functions/prepare_disks.sh
------------------------------------------------------------------------------
svn:eol-style = native
Modified:
whirr/trunk/services/hadoop/src/main/resources/whirr-hadoop-default.properties
URL:
http://svn.apache.org/viewvc/whirr/trunk/services/hadoop/src/main/resources/whirr-hadoop-default.properties?rev=1372568&r1=1372567&r2=1372568&view=diff
==============================================================================
---
whirr/trunk/services/hadoop/src/main/resources/whirr-hadoop-default.properties
(original)
+++
whirr/trunk/services/hadoop/src/main/resources/whirr-hadoop-default.properties
Mon Aug 13 19:49:06 2012
@@ -32,13 +32,9 @@ hadoop-common.fs.trash.interval=1440
# HDFS
hadoop-hdfs.dfs.block.size=134217728
-hadoop-hdfs.dfs.data.dir=/data/hadoop/hdfs/data
hadoop-hdfs.dfs.datanode.du.reserved=1073741824
-hadoop-hdfs.dfs.name.dir=/data/hadoop/hdfs/name
-hadoop-hdfs.fs.checkpoint.dir=/data/hadoop/hdfs/secondary
# MR
-hadoop-mapreduce.mapred.local.dir=/data/hadoop/mapred/local
hadoop-mapreduce.mapred.map.tasks.speculative.execution=true
hadoop-mapreduce.mapred.reduce.tasks.speculative.execution=false
hadoop-mapreduce.mapred.system.dir=/hadoop/system/mapred
Modified:
whirr/trunk/services/hadoop/src/test/java/org/apache/whirr/service/hadoop/HadoopConfigurationBuilderTest.java
URL:
http://svn.apache.org/viewvc/whirr/trunk/services/hadoop/src/test/java/org/apache/whirr/service/hadoop/HadoopConfigurationBuilderTest.java?rev=1372568&r1=1372567&r2=1372568&view=diff
==============================================================================
---
whirr/trunk/services/hadoop/src/test/java/org/apache/whirr/service/hadoop/HadoopConfigurationBuilderTest.java
(original)
+++
whirr/trunk/services/hadoop/src/test/java/org/apache/whirr/service/hadoop/HadoopConfigurationBuilderTest.java
Mon Aug 13 19:49:06 2012
@@ -23,6 +23,8 @@ import com.google.common.collect.Immutab
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ImmutableSet.Builder;
import com.google.common.collect.Iterators;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
import org.apache.commons.configuration.Configuration;
import org.apache.commons.configuration.PropertiesConfiguration;
import org.apache.whirr.Cluster;
@@ -142,21 +144,27 @@ public class HadoopConfigurationBuilderT
@Test
public void testHdfs() throws Exception {
Configuration conf = HadoopConfigurationBuilder.buildHdfsConfiguration(
- clusterSpec, cluster, defaults);
- assertThat(Iterators.size(conf.getKeys()), is(1));
+ clusterSpec, cluster, defaults,
+ Sets.newLinkedHashSet(Lists.newArrayList("/data0", "/data1")));
+ assertThat(Iterators.size(conf.getKeys()), is(4));
assertThat(conf.getString("p1"), is("hdfs1"));
+ assertThat(conf.getString("dfs.data.dir"),
+ is("/data0/hadoop/hdfs/data,/data1/hadoop/hdfs/data"));
}
@Test
public void testMapReduce() throws Exception {
Cluster cluster = newCluster(5);
Configuration conf = HadoopConfigurationBuilder
- .buildMapReduceConfiguration(clusterSpec, cluster, defaults);
+ .buildMapReduceConfiguration(clusterSpec, cluster, defaults,
+ Sets.newLinkedHashSet(Lists.newArrayList("/data0", "/data1")));
assertThat(conf.getString("p1"), is("mapred1"));
assertThat(conf.getString("mapred.job.tracker"), matches(".+:8021"));
assertThat(conf.getString("mapred.tasktracker.map.tasks.maximum"),
is("4"));
assertThat(conf.getString("mapred.tasktracker.reduce.tasks.maximum"),
is("3"));
assertThat(conf.getString("mapred.reduce.tasks"), is("15"));
+ assertThat(conf.getString("mapred.local.dir"),
+ is("/data0/hadoop/mapred/local,/data1/hadoop/mapred/local"));
}
@Test
@@ -165,7 +173,8 @@ public class HadoopConfigurationBuilderT
overrides.addProperty("hadoop-mapreduce.mapred.tasktracker.map.tasks.maximum",
"70");
clusterSpec = ClusterSpec.withNoDefaults(overrides);
Configuration conf =
HadoopConfigurationBuilder.buildMapReduceConfiguration(
- clusterSpec, cluster, defaults);
+ clusterSpec, cluster, defaults,
+ Sets.newLinkedHashSet(Lists.newArrayList("/data0", "/data1")));
assertThat(conf.getString("mapred.tasktracker.map.tasks.maximum"),
is("70"));
}
@@ -175,7 +184,8 @@ public class HadoopConfigurationBuilderT
overrides.addProperty("hadoop-mapreduce.mapred.reduce.tasks", "7");
clusterSpec = ClusterSpec.withNoDefaults(overrides);
Configuration conf =
HadoopConfigurationBuilder.buildMapReduceConfiguration(
- clusterSpec, cluster, defaults);
+ clusterSpec, cluster, defaults,
+ Sets.newLinkedHashSet(Lists.newArrayList("/data0", "/data1")));
assertThat(conf.getString("mapred.reduce.tasks"), is("7"));
}
Modified:
whirr/trunk/services/yarn/src/main/resources/functions/configure_hadoop_mr2.sh
URL:
http://svn.apache.org/viewvc/whirr/trunk/services/yarn/src/main/resources/functions/configure_hadoop_mr2.sh?rev=1372568&r1=1372567&r2=1372568&view=diff
==============================================================================
---
whirr/trunk/services/yarn/src/main/resources/functions/configure_hadoop_mr2.sh
(original)
+++
whirr/trunk/services/yarn/src/main/resources/functions/configure_hadoop_mr2.sh
Mon Aug 13 19:49:06 2012
@@ -28,26 +28,12 @@ function configure_hadoop_mr2() {
ROLES=$1
shift
- case $CLOUD_PROVIDER in
- ec2 | aws-ec2 )
- # Alias /mnt as /data
- ln -s /mnt /data
- ;;
- *)
- ;;
- esac
-
HADOOP_COMMON_HOME=$HADOOP_HOME
HADOOP_HDFS_HOME=$HADOOP_HOME
HADOOP_HOME=/usr/local/hadoop
HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
- mkdir -p /data/hadoop
- chown hadoop:hadoop /data/hadoop
- if [ ! -e /data/tmp ]; then
- mkdir /data/tmp
- chmod a+rwxt /data/tmp
- fi
+ make_hadoop_dirs /data*
mkdir /etc/hadoop
ln -s $HADOOP_CONF_DIR /etc/hadoop/conf
@@ -101,6 +87,19 @@ function configure_hadoop_mr2() {
}
+function make_hadoop_dirs {
+ for mount in "$@"; do
+ if [ ! -e $mount/hadoop ]; then
+ mkdir -p $mount/hadoop
+ chown hadoop:hadoop $mount/hadoop
+ fi
+ if [ ! -e $mount/tmp ]; then
+ mkdir $mount/tmp
+ chmod a+rwxt $mount/tmp
+ fi
+ done
+}
+
function start_namenode() {
if which dpkg &> /dev/null; then
AS_HADOOP="su -s /bin/bash - hadoop -c"