Author: ekoifman
Date: Tue Jan 13 19:15:27 2015
New Revision: 1651445

URL: http://svn.apache.org/r1651445
Log:
HIVE-9351 Running Hive Jobs with Tez cause templeton to never report percent 
complete

Modified:
    hive/trunk/hcatalog/src/test/e2e/templeton/README.txt
    hive/trunk/hcatalog/src/test/e2e/templeton/deployers/deploy_e2e_artifacts.sh
    hive/trunk/hcatalog/src/test/e2e/templeton/deployers/env.sh
    hive/trunk/hcatalog/src/test/e2e/templeton/drivers/TestDriverCurl.pm
    hive/trunk/hcatalog/src/test/e2e/templeton/tests/jobsubmission.conf
    
hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonUtils.java

Modified: hive/trunk/hcatalog/src/test/e2e/templeton/README.txt
URL: 
http://svn.apache.org/viewvc/hive/trunk/hcatalog/src/test/e2e/templeton/README.txt?rev=1651445&r1=1651444&r2=1651445&view=diff
==============================================================================
--- hive/trunk/hcatalog/src/test/e2e/templeton/README.txt (original)
+++ hive/trunk/hcatalog/src/test/e2e/templeton/README.txt Tue Jan 13 19:15:27 
2015
@@ -223,3 +223,14 @@ enough map slots (10?) (mapred.tasktrack
 Adding Tests
 ------------
 ToDo: add some guidelines
+
+Running on Tez
+1. set up Tez as in http://tez.apache.org/install.html
+2. set hive.execution.engine=tez in hive-site.xml (actually is this needed?)
+3. add hive.execution.engine=tez to templeton.hive.properties in 
webhcat-site.xml
+4. add to mapred-env.sh/yarn-env.sh (as you defined these in step 1)
+export TEZ_VERSION=0.5.3
+export TEZ_JARS=/Users/ekoifman/dev/apache-tez-client-${TEZ_VERSION}
+export TEZ_CONF_DIR=${TEZ_JARS}/conf
+export 
HADOOP_CLASSPATH=${TEZ_CONF_DIR}:${TEZ_JARS}/*:${TEZ_JARS}/lib/*:${HADOOP_CLASSPATH}
+(w/o this you'll see something like "java.lang.NoClassDefFoundError: 
org/apache/tez/dag/api/SessionNotRunning")

Modified: 
hive/trunk/hcatalog/src/test/e2e/templeton/deployers/deploy_e2e_artifacts.sh
URL: 
http://svn.apache.org/viewvc/hive/trunk/hcatalog/src/test/e2e/templeton/deployers/deploy_e2e_artifacts.sh?rev=1651445&r1=1651444&r2=1651445&view=diff
==============================================================================
--- 
hive/trunk/hcatalog/src/test/e2e/templeton/deployers/deploy_e2e_artifacts.sh 
(original)
+++ 
hive/trunk/hcatalog/src/test/e2e/templeton/deployers/deploy_e2e_artifacts.sh 
Tue Jan 13 19:15:27 2015
@@ -48,5 +48,17 @@ ${HADOOP_HOME}/bin/hadoop fs -put ${PIG_
 
 ${HADOOP_HOME}/bin/hadoop fs -put 
/Users/ekoifman/dev/sqoop-1.4.5.bin__hadoop-2.0.4-alpha.tar.gz 
/apps/templeton/sqoop-1.4.5.bin__hadoop-2.0.4-alpha.tar.gz
 ${HADOOP_HOME}/bin/hadoop fs -put 
/Users/ekoifman/dev/mysql-connector-java-5.1.30/mysql-connector-java-5.1.30-bin.jar
 /apps/templeton/jdbc/mysql-connector-java.jar
+
+#Tez set up (http://tez.apache.org/install.html)
+#if not using Tez - ignore this
+${HADOOP_HOME}/bin/hdfs dfs -put 
/Users/ekoifman/dev/apache-tez-${TEZ_VERSION}-src/tez-dist/target/tez-${TEZ_VERSION}.tar.gz
 /apps/tez-${TEZ_VERSION}.tar.gz
+${HADOOP_HOME}/bin/hdfs dfs -mkdir /tmp/tezin
+${HADOOP_HOME}/bin/hdfs dfs -mkdir /tmp/tezout
+${HADOOP_HOME}/bin/hdfs dfs -put /Users/ekoifman/dev/hive/build.sh /tmp/tezin
+#Above line is for Sanity Check: this is to run #6 in 
http://tez.apache.org/install.html
+#$HADOOP_HOME/bin/hadoop jar tez-examples-0.5.3.jar orderedwordcount 
/tmp/tezin /tmp/tezout
+
+
+
 #check what got deployed
-${HADOOP_HOME}/bin/hdfs dfs -ls -R /apps/templeton webhcate2e /user/templeton 
/user/hive/warehouse
+${HADOOP_HOME}/bin/hdfs dfs -ls -R /apps webhcate2e /user/templeton 
/user/hive/warehouse

Modified: hive/trunk/hcatalog/src/test/e2e/templeton/deployers/env.sh
URL: 
http://svn.apache.org/viewvc/hive/trunk/hcatalog/src/test/e2e/templeton/deployers/env.sh?rev=1651445&r1=1651444&r2=1651445&view=diff
==============================================================================
--- hive/trunk/hcatalog/src/test/e2e/templeton/deployers/env.sh (original)
+++ hive/trunk/hcatalog/src/test/e2e/templeton/deployers/env.sh Tue Jan 13 
19:15:27 2015
@@ -22,14 +22,29 @@
 
 # define necessary env vars here and source it in other files
 
-export HADOOP_VERSION=2.4.1-SNAPSHOT
-#export HIVE_VERSION=0.14.0-SNAPSHOT
-export PIG_VERSION=0.12.2-SNAPSHOT
+echo ${HADOOP_VERSION};
+
+if [ -z ${HADOOP_VERSION} ]; then
+  export HADOOP_VERSION=2.4.1-SNAPSHOT
+fi
+
+if [ -z ${HIVE_VERSION} ]; then
+  export HIVE_VERSION=0.14.0-SNAPSHOT
+fi
+
+if [ -z ${PIG_VERSION} ]; then
+  export PIG_VERSION=0.12.2-SNAPSHOT
+fi
 
 #Root of project source tree
-export PROJ_HOME=/Users/${USER}/dev/hive
+if [ -z ${PROJ_HOME} ]; then
+  export PROJ_HOME=/Users/${USER}/dev/hive
+fi
 export 
HIVE_HOME=${PROJ_HOME}/packaging/target/apache-hive-${HIVE_VERSION}-bin/apache-hive-${HIVE_VERSION}-bin
-export 
HADOOP_HOME=/Users/${USER}/dev/hwxhadoop/hadoop-dist/target/hadoop-${HADOOP_VERSION}
+
+if [ -z ${HADOOP_HOME} ]; then
+  export 
HADOOP_HOME=/Users/${USER}/dev/hwxhadoop/hadoop-dist/target/hadoop-${HADOOP_VERSION}
+fi
 
 #Make sure Pig is built for the Hadoop version you are running
 export PIG_TAR_PATH=/Users/${USER}/dev/pig-${PIG_VERSION}-src/build

Modified: hive/trunk/hcatalog/src/test/e2e/templeton/drivers/TestDriverCurl.pm
URL: 
http://svn.apache.org/viewvc/hive/trunk/hcatalog/src/test/e2e/templeton/drivers/TestDriverCurl.pm?rev=1651445&r1=1651444&r2=1651445&view=diff
==============================================================================
--- hive/trunk/hcatalog/src/test/e2e/templeton/drivers/TestDriverCurl.pm 
(original)
+++ hive/trunk/hcatalog/src/test/e2e/templeton/drivers/TestDriverCurl.pm Tue 
Jan 13 19:15:27 2015
@@ -878,7 +878,7 @@ sub compare
             if (defined($testCmd->{'check_job_percent_complete'})) {
               my $pcValue = $res_hash->{'percentComplete'};
               my $expectedPercentComplete = 
$testCmd->{'check_job_percent_complete'};
-              if ( (!defined $pcValue) || $pcValue ne $expectedPercentComplete 
) {
+              if ( (!defined $pcValue) || $pcValue !~ 
m/$expectedPercentComplete/ ) {
                 print $log "check_job_percent_complete failed. got 
percentComplete $pcValue,  expected  $expectedPercentComplete";
                 $result = 0;
               }

Modified: hive/trunk/hcatalog/src/test/e2e/templeton/tests/jobsubmission.conf
URL: 
http://svn.apache.org/viewvc/hive/trunk/hcatalog/src/test/e2e/templeton/tests/jobsubmission.conf?rev=1651445&r1=1651444&r2=1651445&view=diff
==============================================================================
--- hive/trunk/hcatalog/src/test/e2e/templeton/tests/jobsubmission.conf 
(original)
+++ hive/trunk/hcatalog/src/test/e2e/templeton/tests/jobsubmission.conf Tue Jan 
13 19:15:27 2015
@@ -405,7 +405,7 @@ $cfg =
 
     {
                                 #test select a,b
-     'num' => 7,
+     'num' => 7,#seems to be the same as test 6 except for percent_complete 
check
      'method' => 'POST',
      'url' => ':TEMPLETON_URL:/templeton/v1/hive',
      'post_options' => ['user.name=:UNAME:','execute=select count(*) from 
mynums', ],
@@ -414,7 +414,7 @@ $cfg =
      'status_code' => 200,
      'check_job_created' => 1,
      'check_job_complete' => 'SUCCESS', 
-     'check_job_percent_complete' => 'map 100% reduce 100%',
+     'check_job_percent_complete' => 'map 100% reduce 100%|100% complete',
      'check_job_exit_value' => 0,
      'check_call_back' => 1,
 

Modified: 
hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonUtils.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonUtils.java?rev=1651445&r1=1651444&r2=1651445&view=diff
==============================================================================
--- 
hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonUtils.java
 (original)
+++ 
hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonUtils.java
 Tue Jan 13 19:15:27 2015
@@ -42,6 +42,8 @@ import java.util.regex.Pattern;
 
 import javax.ws.rs.core.UriBuilder;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
@@ -57,6 +59,8 @@ import org.apache.hive.hcatalog.templeto
  * General utility methods.
  */
 public class TempletonUtils {
+  private static final Log LOG = LogFactory.getLog(TempletonUtils.class);
+
   /**
    * Is the object non-empty?
    */
@@ -98,6 +102,24 @@ public class TempletonUtils {
   public static final Pattern PIG_COMPLETE = Pattern.compile(" \\d+% 
complete$");
   //looking for map = 100%,  reduce = 100%
   public static final Pattern HIVE_COMPLETE = Pattern.compile(" map = 
(\\d+%),\\s+reduce = (\\d+%).*$");
+  /**
+   * Hive on Tez produces progress report that looks like this
+   * Map 1: -/-        Reducer 2: 0/1  
+   * Map 1: -/-        Reducer 2: 0(+1)/1      
+   * Map 1: -/-        Reducer 2: 1/1
+   * 
+   * -/- means there are no tasks (yet)
+   * 0/1 means 1 total tasks, 0 completed
+   * 1(+2)/3 means 3 total, 1 completed and 2 running
+   * 
+   * HIVE-8495, in particular 
https://issues.apache.org/jira/secure/attachment/12675504/Screen%20Shot%202014-10-16%20at%209.35.26%20PM.png
+   * has more examples.
+   * To report progress, we'll assume all tasks are equal size and compute 
"completed" as percent of "total"
+   * "(Map|Reducer) (\\d+:) ((-/-)|(\\d+(\\(\\+\\d+\\))?/\\d+))" is the 
complete pattern but we'll drop "-/-" to exclude
+   * groups that don't add information such as "Map 1: -/-"
+   */
+  public static final Pattern TEZ_COMPLETE = Pattern.compile("(Map|Reducer) 
(\\d+:) (\\d+(\\(\\+\\d+\\))?/\\d+)");
+  public static final Pattern TEZ_COUNTERS = Pattern.compile("\\d+");
 
   /**
    * Extract the percent complete line from Pig or Jar jobs.
@@ -115,6 +137,31 @@ public class TempletonUtils {
     if(hive.find()) {
       return "map " + hive.group(1) + " reduce " + hive.group(2);
     }
+    Matcher tez = TEZ_COMPLETE.matcher(line);
+    if(tez.find()) {
+      int totalTasks = 0;
+      int completedTasks = 0;
+      do {
+        //here each group looks something like "Map 2: 2/4" "Reducer 3: 
1(+2)/4"
+        //just parse the numbers and ignore one from "Map 2" and from "(+2)" 
if it's there
+        Matcher counts = TEZ_COUNTERS.matcher(tez.group());
+        List<String> items = new ArrayList<String>(4);
+        while(counts.find()) {
+          items.add(counts.group());
+        }
+        completedTasks += Integer.parseInt(items.get(1));
+        if(items.size() == 3) {
+          totalTasks += Integer.parseInt(items.get(2));
+        }
+        else {
+          totalTasks += Integer.parseInt(items.get(3));
+        }
+      } while(tez.find());
+      if(totalTasks == 0) {
+        return "0% complete (0 total tasks)";
+      }
+      return completedTasks * 100 / totalTasks + "% complete";
+    }
     return null;
   }
 


Reply via email to