Updated Branches: refs/heads/trunk 303386f75 -> d3f4a4e0d
GIRAPH-312: Giraph needs an admin script (ereisman) Project: http://git-wip-us.apache.org/repos/asf/giraph/repo Commit: http://git-wip-us.apache.org/repos/asf/giraph/commit/d3f4a4e0 Tree: http://git-wip-us.apache.org/repos/asf/giraph/tree/d3f4a4e0 Diff: http://git-wip-us.apache.org/repos/asf/giraph/diff/d3f4a4e0 Branch: refs/heads/trunk Commit: d3f4a4e0d4fdd78f5ba2f1bc907c67a68a6c518b Parents: 303386f Author: Eli Reisman <[email protected]> Authored: Mon Jan 28 14:22:07 2013 -0800 Committer: Eli Reisman <[email protected]> Committed: Mon Jan 28 14:22:07 2013 -0800 ---------------------------------------------------------------------- CHANGELOG | 2 + bin/giraph | 94 +-------- bin/giraph-admin | 39 ++++ bin/giraph-env | 106 +++++++++ .../org/apache/giraph/zk/GiraphZooKeeperAdmin.java | 172 +++++++++++++++ 5 files changed, 322 insertions(+), 91 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/giraph/blob/d3f4a4e0/CHANGELOG ---------------------------------------------------------------------- diff --git a/CHANGELOG b/CHANGELOG index 99b55b4..a82ee15 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,8 @@ Giraph Change Log Release 0.2.0 - unreleased + GIRAPH-312: Giraph needs an admin script (ereisman) + GIRAPH-469: Refactor GraphMapper (ereisman) GIRAPH-491: Observer for job lifecycle (nitay) http://git-wip-us.apache.org/repos/asf/giraph/blob/d3f4a4e0/bin/giraph ---------------------------------------------------------------------- diff --git a/bin/giraph b/bin/giraph index 46263fc..bc4f5dc 100755 --- a/bin/giraph +++ b/bin/giraph @@ -22,96 +22,8 @@ if [ $# = 0 ]; then exit 1 fi - -# resolve links - $0 may be a softlink -THIS="$0" -while [ -h "$THIS" ]; do - ls=`ls -ld "$THIS"` - link=`expr "$ls" : '.*-> \(.*\)$'` - if expr "$link" : '.*/.*' > /dev/null; then - THIS="$link" - else - THIS=`dirname "$THIS"`/"$link" - fi -done - -# some directories -THIS_DIR=`dirname "$THIS"` -GIRAPH_HOME=`cd "$THIS_DIR/.." ; pwd` - -# extra properties to send straight to Hadoop -HADOOP_PROPERTIES= -while [ $1 ] && [ ${1:0:2} == "-D" ] ; do - HADOOP_PROPERTIES="$1 $HADOOP_PROPERTIES" - shift -done - -USER_JAR=$1 -shift - -if [ ! -e "$USER_JAR" ]; then - echo "Can't find user jar (${USER_JAR}) to execute." - exit 1 -fi - -# add user jar to classpath -CLASSPATH=${USER_JAR} - -# add Giraph conf dir to classpath -CLASSPATH=$CLASSPATH:$GIRAPH_HOME/conf - -CLASS=org.apache.giraph.GiraphRunner - -# so that filenames w/ spaces are handled correctly in loops below -IFS= - -# add release dependencies to CLASSPATH -if [ -d "$GIRAPH_HOME/lib" ]; then - for f in $GIRAPH_HOME/lib/*.jar; do - CLASSPATH=${CLASSPATH}:$f - done - - for f in $GIRAPH_HOME/giraph*.jar ; do - if [ -e "$f" ]; then - JAR=$f - CLASSPATH=${CLASSPATH}:$f - break - fi - done -else - echo "No lib directory, assuming dev environment" - if [ ! -d "$GIRAPH_HOME/target" ]; then - echo "No target directory. Build Giraph jar before proceeding." - exit 1 - fi - - CLASSPATH2=`mvn dependency:build-classpath | grep -v "[INFO]"` - CLASSPATH=$CLASSPATH:$CLASSPATH2 - - for f in $GIRAPH_HOME/giraph/target/giraph*.jar; do - if [ -e "$f" ]; then - JAR=$f - break - fi - done -fi -# restore ordinary behaviour -unset IFS - -if [ "$JAR" = "" ] ; then - echo "Can't find Giraph jar." - exit 1 -fi - -if [ "$HADOOP_CONF_DIR" = "" ] ; then - HADOOP_CONF_DIR=$HADOOP_HOME/conf - echo "No HADOOP_CONF_DIR set, using $HADOOP_HOME/conf " -else - echo "HADOOP_CONF_DIR=$HADOOP_CONF_DIR" -fi - -# Giraph's jars to add to distributed cache via -libjar, which are csv rather than :sv -GIRAPH_JARS=`echo ${JAR}:${CLASSPATH}|sed s/:/,/g` -export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$CLASSPATH +# locate and run the giraph-env script to populate env vars, classpath, etc. +GIRAPH_ENV_DIR="$( cd -P "$( dirname "$0" )" && pwd )" +source $GIRAPH_ENV_DIR/giraph-env exec "$HADOOP_HOME/bin/hadoop" --config $HADOOP_CONF_DIR jar $JAR $CLASS $HADOOP_PROPERTIES -libjars $GIRAPH_JARS "$@" http://git-wip-us.apache.org/repos/asf/giraph/blob/d3f4a4e0/bin/giraph-admin ---------------------------------------------------------------------- diff --git a/bin/giraph-admin b/bin/giraph-admin new file mode 100644 index 0000000..827c289 --- /dev/null +++ b/bin/giraph-admin @@ -0,0 +1,39 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# friendly message for those who forget to tell us what to do +if [ $# = 0 ]; then + echo "Usage: giraph-admin [-Dgiraph.property1 -Dgiraph.property2...] -cleanZk" + echo "if no appropriate -D overrides are included, the conf file is checked" + echo "or default values are supplied. The ZK cleaner accepts the standard" + echo "giraph -D config options to configure the ZK server list, ports, base path." + exit 1 +fi + +GIRAPH_ENV_DIR="$( cd -P "$( dirname "$0" )" && pwd )" +source $GIRAPH_ENV_DIR/giraph-env + +# run the ZK cleaner? +if [ "$1" == "-cleanZk" ]; then + CLASS=org.apache.giraph.zk.GiraphZooKeeperAdmin + exec "$HADOOP_HOME/bin/hadoop" --config $HADOOP_CONF_DIR jar $JAR $CLASS $HADOOP_PROPERTIES -libjars $GIRAPH_JARS "$@" + exit 0 +fi + +echo "[ERROR] User must supply command-line arguments to start giraph-admin task." +echo "[ERROR] Current commands accepted: -cleanZk" +exit 1 http://git-wip-us.apache.org/repos/asf/giraph/blob/d3f4a4e0/bin/giraph-env ---------------------------------------------------------------------- diff --git a/bin/giraph-env b/bin/giraph-env new file mode 100644 index 0000000..9aa78bb --- /dev/null +++ b/bin/giraph-env @@ -0,0 +1,106 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# resolve links - $0 may be a softlink +THIS="$0" +while [ -h "$THIS" ]; do + ls=`ls -ld "$THIS"` + link=`expr "$ls" : '.*-> \(.*\)$'` + if expr "$link" : '.*/.*' > /dev/null; then + THIS="$link" + else + THIS=`dirname "$THIS"`/"$link" + fi +done + +# some directories +THIS_DIR=`dirname "$THIS"` +GIRAPH_HOME=`cd "$THIS_DIR/.." ; pwd` + +# extra properties to send straight to Hadoop +HADOOP_PROPERTIES= +while [ $1 ] && [ ${1:0:2} == "-D" ] ; do + HADOOP_PROPERTIES="$1 $HADOOP_PROPERTIES" + shift +done + +USER_JAR=$1 +shift + +if [ ! -e "$USER_JAR" ]; then + echo "Can't find user jar (${USER_JAR}) to execute." + exit 1 +fi + +# add user jar to classpath +CLASSPATH=${USER_JAR} + +# add Giraph conf dir to classpath +CLASSPATH=$CLASSPATH:$GIRAPH_HOME/conf + +CLASS=org.apache.giraph.GiraphRunner + +# so that filenames w/ spaces are handled correctly in loops below +IFS= + +# add release dependencies to CLASSPATH +if [ -d "$GIRAPH_HOME/lib" ]; then + for f in $GIRAPH_HOME/lib/*.jar; do + CLASSPATH=${CLASSPATH}:$f + done + + for f in $GIRAPH_HOME/giraph*.jar ; do + if [ -e "$f" ]; then + JAR=$f + CLASSPATH=${CLASSPATH}:$f + break + fi + done +else + echo "No lib directory, assuming dev environment" + if [ ! -d "$GIRAPH_HOME/target" ]; then + echo "No target directory. Build Giraph jar before proceeding." + exit 1 + fi + + CLASSPATH2=`mvn dependency:build-classpath | grep -v "[INFO]"` + CLASSPATH=$CLASSPATH:$CLASSPATH2 + + for f in $GIRAPH_HOME/giraph/target/giraph*.jar; do + if [ -e "$f" ]; then + JAR=$f + break + fi + done +fi +# restore ordinary behaviour +unset IFS + +if [ "$JAR" = "" ] ; then + echo "Can't find Giraph jar." + exit 1 +fi + +if [ "$HADOOP_CONF_DIR" = "" ] ; then + HADOOP_CONF_DIR=$HADOOP_HOME/conf + echo "No HADOOP_CONF_DIR set, using $HADOOP_HOME/conf " +else + echo "HADOOP_CONF_DIR=$HADOOP_CONF_DIR" +fi + +# Giraph's jars to add to distributed cache via -libjar, which are csv rather than :sv +GIRAPH_JARS=`echo ${JAR}:${CLASSPATH}|sed s/:/,/g` +export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$CLASSPATH http://git-wip-us.apache.org/repos/asf/giraph/blob/d3f4a4e0/giraph-core/src/main/java/org/apache/giraph/zk/GiraphZooKeeperAdmin.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/main/java/org/apache/giraph/zk/GiraphZooKeeperAdmin.java b/giraph-core/src/main/java/org/apache/giraph/zk/GiraphZooKeeperAdmin.java new file mode 100644 index 0000000..7589a09 --- /dev/null +++ b/giraph-core/src/main/java/org/apache/giraph/zk/GiraphZooKeeperAdmin.java @@ -0,0 +1,172 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.zk; + + +import static java.lang.System.out; + +import java.net.UnknownHostException; +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import org.apache.giraph.bsp.BspService; +import org.apache.giraph.conf.GiraphConfiguration; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; +import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.Watcher; +import org.apache.zookeeper.WatchedEvent; + +/** + * A Utility class to be used by Giraph admins to occasionally clean up the + * ZK remnants of jobs that have failed or were killed before finishing. + * Usage (note that defaults are used if giraph.XYZ args are missing): + * <code> + * bin/giraph-admin -Dgiraph.zkBaseNode=... -Dgiraph.zkList=... + * -Dgiraph.zkServerPort=... -cleanZk + * </code> + * + * alterantely, the <code>Configuration</code> file will populate these fields + * as it would in a <code>bin/giraph</code> run. + * + * <strong>WARNING:</strong> Obviously, running this while actual Giraph jobs + * using your cluster are in progress is <strong>not recommended.</strong> + */ +public class GiraphZooKeeperAdmin implements Watcher, Tool { + static { + Configuration.addDefaultResource("giraph-site.xml"); + } + + /** The configuration for this admin run */ + private Configuration conf; + + @Override + public Configuration getConf() { + return conf; + } + + @Override + public void setConf(Configuration conf) { + this.conf = conf; + } + + /** + * Clean the ZooKeeper of all failed and cancelled in-memory + * job remnants that pile up on the ZK quorum over time. + * @param args the input command line arguments, if any. + * @return the System.exit value to return to the console. + */ + @Override + public int run(String[] args) { + final int zkPort = getConf().getInt( + GiraphConfiguration.ZOOKEEPER_SERVER_PORT, + GiraphConfiguration.ZOOKEEPER_SERVER_PORT_DEFAULT); + final String zkBasePath = getConf().get( + GiraphConfiguration.BASE_ZNODE_KEY, "") + BspService.BASE_DIR; + final String[] zkServerList; + try { + zkServerList = getConf() + .get(GiraphConfiguration.ZOOKEEPER_LIST).split(","); + } catch (NullPointerException npe) { + throw new IllegalStateException("GiraphZooKeeperAdmin requires a list " + + "of ZooKeeper servers to clean."); + } + out.println("[GIRAPH-ZKADMIN] Attempting to clean Zookeeper " + + "hosts at: " + Arrays.deepToString(zkServerList)); + out.println("[GIRAPH-ZKADMIN] Connecting on port: " + zkPort); + out.println("[GIRAPH-ZKADMIN] to ZNode root path: " + zkBasePath); + try { + ZooKeeperExt zooKeeper = new ZooKeeperExt( + formatZkServerList(zkServerList, zkPort), + GiraphConfiguration.ZOOKEEPER_SESSION_TIMEOUT_DEFAULT, + GiraphConfiguration.ZOOKEEPER_OPS_MAX_ATTEMPTS_DEFAULT, + GiraphConfiguration.ZOOKEEPER_SERVERLIST_POLL_MSECS_DEFAULT, + this); + doZooKeeperCleanup(zooKeeper, zkBasePath); + return 0; + } catch (KeeperException e) { + System.err.println("[ERROR] Failed to do cleanup of " + + zkBasePath + " due to KeeperException: " + e.getMessage()); + } catch (InterruptedException e) { + System.err.println("[ERROR] Failed to do cleanup of " + + zkBasePath + " due to InterruptedException: " + e.getMessage()); + } catch (UnknownHostException e) { + System.err.println("[ERROR] Failed to do cleanup of " + + zkBasePath + " due to UnknownHostException: " + e.getMessage()); + } catch (IOException e) { + System.err.println("[ERROR] Failed to do cleanup of " + + zkBasePath + " due to IOException: " + e.getMessage()); + } + return -1; + } + + /** Implement watcher to receive event at the end of the cleaner run + * @param event the WatchedEvent returned by ZK after the cleaning job. + */ + @Override + public final void process(WatchedEvent event) { + out.println("[GIRAPH-ZKADMIN] ZK event received: " + event); + } + + /** + * Cleans the ZooKeeper quorum of in-memory failed/killed job fragments. + * @param zooKeeper the connected ZK instance (session) to delete from. + * @param zkBasePath the base node to begin erasing from. + */ + public void doZooKeeperCleanup(ZooKeeperExt zooKeeper, String zkBasePath) + throws KeeperException, InterruptedException { + try { + zooKeeper.deleteExt(zkBasePath, -1, false); + out.println("[GIRAPH-ZKADMIN] Deleted: " + zkBasePath); + } catch (KeeperException.NotEmptyException e) { + List<String> childList = + zooKeeper.getChildrenExt(zkBasePath, false, false, false); + for (String child : childList) { + String childPath = zkBasePath + "/" + child; + doZooKeeperCleanup(zooKeeper, childPath); + } + zooKeeper.deleteExt(zkBasePath, -1, false); + out.println("[GIRAPH-ZKADMIN] Deleted: " + zkBasePath); + } + } + + /** Forms ZK server list in a format the ZooKeeperExt object + * requires to connect to the quorum. + * @param zkServerList the CSV-style list of hostnames of Zk quorum members. + * @param zkPort the port the quorum is listening on. + * @return the formatted zkConnectList for use in the ZkExt constructor. + */ + private String formatZkServerList(String[] zkServerList, int zkPort) + throws UnknownHostException { + StringBuffer zkConnectList = new StringBuffer(); + for (String zkServer : zkServerList) { + if (!zkServer.equals("")) { + zkConnectList.append(zkServer + ":" + zkPort + ","); + } + } + return zkConnectList.substring(0, zkConnectList.length() - 1); + } + + /** Entry point from shell script + * @param args the command line arguments + */ + public static void main(String[] args) throws Exception { + System.exit(ToolRunner.run(new GiraphZooKeeperAdmin(), args)); + } +}
