Author: ekoifman Date: Tue Jan 13 19:15:27 2015 New Revision: 1651445 URL: http://svn.apache.org/r1651445 Log: HIVE-9351 Running Hive Jobs with Tez cause templeton to never report percent complete
Modified: hive/trunk/hcatalog/src/test/e2e/templeton/README.txt hive/trunk/hcatalog/src/test/e2e/templeton/deployers/deploy_e2e_artifacts.sh hive/trunk/hcatalog/src/test/e2e/templeton/deployers/env.sh hive/trunk/hcatalog/src/test/e2e/templeton/drivers/TestDriverCurl.pm hive/trunk/hcatalog/src/test/e2e/templeton/tests/jobsubmission.conf hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonUtils.java Modified: hive/trunk/hcatalog/src/test/e2e/templeton/README.txt URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/src/test/e2e/templeton/README.txt?rev=1651445&r1=1651444&r2=1651445&view=diff ============================================================================== --- hive/trunk/hcatalog/src/test/e2e/templeton/README.txt (original) +++ hive/trunk/hcatalog/src/test/e2e/templeton/README.txt Tue Jan 13 19:15:27 2015 @@ -223,3 +223,14 @@ enough map slots (10?) (mapred.tasktrack Adding Tests ------------ ToDo: add some guidelines + +Running on Tez +1. set up Tez as in http://tez.apache.org/install.html +2. set hive.execution.engine=tez in hive-site.xml (actually is this needed?) +3. add hive.execution.engine=tez to templeton.hive.properties in webhcat-site.xml +4. add to mapred-env.sh/yarn-env.sh (as you defined these in step 1) +export TEZ_VERSION=0.5.3 +export TEZ_JARS=/Users/ekoifman/dev/apache-tez-client-${TEZ_VERSION} +export TEZ_CONF_DIR=${TEZ_JARS}/conf +export HADOOP_CLASSPATH=${TEZ_CONF_DIR}:${TEZ_JARS}/*:${TEZ_JARS}/lib/*:${HADOOP_CLASSPATH} +(w/o this you'll see something like "java.lang.NoClassDefFoundError: org/apache/tez/dag/api/SessionNotRunning") Modified: hive/trunk/hcatalog/src/test/e2e/templeton/deployers/deploy_e2e_artifacts.sh URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/src/test/e2e/templeton/deployers/deploy_e2e_artifacts.sh?rev=1651445&r1=1651444&r2=1651445&view=diff ============================================================================== --- hive/trunk/hcatalog/src/test/e2e/templeton/deployers/deploy_e2e_artifacts.sh (original) +++ hive/trunk/hcatalog/src/test/e2e/templeton/deployers/deploy_e2e_artifacts.sh Tue Jan 13 19:15:27 2015 @@ -48,5 +48,17 @@ ${HADOOP_HOME}/bin/hadoop fs -put ${PIG_ ${HADOOP_HOME}/bin/hadoop fs -put /Users/ekoifman/dev/sqoop-1.4.5.bin__hadoop-2.0.4-alpha.tar.gz /apps/templeton/sqoop-1.4.5.bin__hadoop-2.0.4-alpha.tar.gz ${HADOOP_HOME}/bin/hadoop fs -put /Users/ekoifman/dev/mysql-connector-java-5.1.30/mysql-connector-java-5.1.30-bin.jar /apps/templeton/jdbc/mysql-connector-java.jar + +#Tez set up (http://tez.apache.org/install.html) +#if not using Tez - ignore this +${HADOOP_HOME}/bin/hdfs dfs -put /Users/ekoifman/dev/apache-tez-${TEZ_VERSION}-src/tez-dist/target/tez-${TEZ_VERSION}.tar.gz /apps/tez-${TEZ_VERSION}.tar.gz +${HADOOP_HOME}/bin/hdfs dfs -mkdir /tmp/tezin +${HADOOP_HOME}/bin/hdfs dfs -mkdir /tmp/tezout +${HADOOP_HOME}/bin/hdfs dfs -put /Users/ekoifman/dev/hive/build.sh /tmp/tezin +#Above line is for Sanity Check: this is to run #6 in http://tez.apache.org/install.html +#$HADOOP_HOME/bin/hadoop jar tez-examples-0.5.3.jar orderedwordcount /tmp/tezin /tmp/tezout + + + #check what got deployed -${HADOOP_HOME}/bin/hdfs dfs -ls -R /apps/templeton webhcate2e /user/templeton /user/hive/warehouse +${HADOOP_HOME}/bin/hdfs dfs -ls -R /apps webhcate2e /user/templeton /user/hive/warehouse Modified: hive/trunk/hcatalog/src/test/e2e/templeton/deployers/env.sh URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/src/test/e2e/templeton/deployers/env.sh?rev=1651445&r1=1651444&r2=1651445&view=diff ============================================================================== --- hive/trunk/hcatalog/src/test/e2e/templeton/deployers/env.sh (original) +++ hive/trunk/hcatalog/src/test/e2e/templeton/deployers/env.sh Tue Jan 13 19:15:27 2015 @@ -22,14 +22,29 @@ # define necessary env vars here and source it in other files -export HADOOP_VERSION=2.4.1-SNAPSHOT -#export HIVE_VERSION=0.14.0-SNAPSHOT -export PIG_VERSION=0.12.2-SNAPSHOT +echo ${HADOOP_VERSION}; + +if [ -z ${HADOOP_VERSION} ]; then + export HADOOP_VERSION=2.4.1-SNAPSHOT +fi + +if [ -z ${HIVE_VERSION} ]; then + export HIVE_VERSION=0.14.0-SNAPSHOT +fi + +if [ -z ${PIG_VERSION} ]; then + export PIG_VERSION=0.12.2-SNAPSHOT +fi #Root of project source tree -export PROJ_HOME=/Users/${USER}/dev/hive +if [ -z ${PROJ_HOME} ]; then + export PROJ_HOME=/Users/${USER}/dev/hive +fi export HIVE_HOME=${PROJ_HOME}/packaging/target/apache-hive-${HIVE_VERSION}-bin/apache-hive-${HIVE_VERSION}-bin -export HADOOP_HOME=/Users/${USER}/dev/hwxhadoop/hadoop-dist/target/hadoop-${HADOOP_VERSION} + +if [ -z ${HADOOP_HOME} ]; then + export HADOOP_HOME=/Users/${USER}/dev/hwxhadoop/hadoop-dist/target/hadoop-${HADOOP_VERSION} +fi #Make sure Pig is built for the Hadoop version you are running export PIG_TAR_PATH=/Users/${USER}/dev/pig-${PIG_VERSION}-src/build Modified: hive/trunk/hcatalog/src/test/e2e/templeton/drivers/TestDriverCurl.pm URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/src/test/e2e/templeton/drivers/TestDriverCurl.pm?rev=1651445&r1=1651444&r2=1651445&view=diff ============================================================================== --- hive/trunk/hcatalog/src/test/e2e/templeton/drivers/TestDriverCurl.pm (original) +++ hive/trunk/hcatalog/src/test/e2e/templeton/drivers/TestDriverCurl.pm Tue Jan 13 19:15:27 2015 @@ -878,7 +878,7 @@ sub compare if (defined($testCmd->{'check_job_percent_complete'})) { my $pcValue = $res_hash->{'percentComplete'}; my $expectedPercentComplete = $testCmd->{'check_job_percent_complete'}; - if ( (!defined $pcValue) || $pcValue ne $expectedPercentComplete ) { + if ( (!defined $pcValue) || $pcValue !~ m/$expectedPercentComplete/ ) { print $log "check_job_percent_complete failed. got percentComplete $pcValue, expected $expectedPercentComplete"; $result = 0; } Modified: hive/trunk/hcatalog/src/test/e2e/templeton/tests/jobsubmission.conf URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/src/test/e2e/templeton/tests/jobsubmission.conf?rev=1651445&r1=1651444&r2=1651445&view=diff ============================================================================== --- hive/trunk/hcatalog/src/test/e2e/templeton/tests/jobsubmission.conf (original) +++ hive/trunk/hcatalog/src/test/e2e/templeton/tests/jobsubmission.conf Tue Jan 13 19:15:27 2015 @@ -405,7 +405,7 @@ $cfg = { #test select a,b - 'num' => 7, + 'num' => 7,#seems to be the same as test 6 except for percent_complete check 'method' => 'POST', 'url' => ':TEMPLETON_URL:/templeton/v1/hive', 'post_options' => ['user.name=:UNAME:','execute=select count(*) from mynums', ], @@ -414,7 +414,7 @@ $cfg = 'status_code' => 200, 'check_job_created' => 1, 'check_job_complete' => 'SUCCESS', - 'check_job_percent_complete' => 'map 100% reduce 100%', + 'check_job_percent_complete' => 'map 100% reduce 100%|100% complete', 'check_job_exit_value' => 0, 'check_call_back' => 1, Modified: hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonUtils.java URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonUtils.java?rev=1651445&r1=1651444&r2=1651445&view=diff ============================================================================== --- hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonUtils.java (original) +++ hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonUtils.java Tue Jan 13 19:15:27 2015 @@ -42,6 +42,8 @@ import java.util.regex.Pattern; import javax.ws.rs.core.UriBuilder; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -57,6 +59,8 @@ import org.apache.hive.hcatalog.templeto * General utility methods. */ public class TempletonUtils { + private static final Log LOG = LogFactory.getLog(TempletonUtils.class); + /** * Is the object non-empty? */ @@ -98,6 +102,24 @@ public class TempletonUtils { public static final Pattern PIG_COMPLETE = Pattern.compile(" \\d+% complete$"); //looking for map = 100%, reduce = 100% public static final Pattern HIVE_COMPLETE = Pattern.compile(" map = (\\d+%),\\s+reduce = (\\d+%).*$"); + /** + * Hive on Tez produces progress report that looks like this + * Map 1: -/- Reducer 2: 0/1 + * Map 1: -/- Reducer 2: 0(+1)/1 + * Map 1: -/- Reducer 2: 1/1 + * + * -/- means there are no tasks (yet) + * 0/1 means 1 total tasks, 0 completed + * 1(+2)/3 means 3 total, 1 completed and 2 running + * + * HIVE-8495, in particular https://issues.apache.org/jira/secure/attachment/12675504/Screen%20Shot%202014-10-16%20at%209.35.26%20PM.png + * has more examples. + * To report progress, we'll assume all tasks are equal size and compute "completed" as percent of "total" + * "(Map|Reducer) (\\d+:) ((-/-)|(\\d+(\\(\\+\\d+\\))?/\\d+))" is the complete pattern but we'll drop "-/-" to exclude + * groups that don't add information such as "Map 1: -/-" + */ + public static final Pattern TEZ_COMPLETE = Pattern.compile("(Map|Reducer) (\\d+:) (\\d+(\\(\\+\\d+\\))?/\\d+)"); + public static final Pattern TEZ_COUNTERS = Pattern.compile("\\d+"); /** * Extract the percent complete line from Pig or Jar jobs. @@ -115,6 +137,31 @@ public class TempletonUtils { if(hive.find()) { return "map " + hive.group(1) + " reduce " + hive.group(2); } + Matcher tez = TEZ_COMPLETE.matcher(line); + if(tez.find()) { + int totalTasks = 0; + int completedTasks = 0; + do { + //here each group looks something like "Map 2: 2/4" "Reducer 3: 1(+2)/4" + //just parse the numbers and ignore one from "Map 2" and from "(+2)" if it's there + Matcher counts = TEZ_COUNTERS.matcher(tez.group()); + List<String> items = new ArrayList<String>(4); + while(counts.find()) { + items.add(counts.group()); + } + completedTasks += Integer.parseInt(items.get(1)); + if(items.size() == 3) { + totalTasks += Integer.parseInt(items.get(2)); + } + else { + totalTasks += Integer.parseInt(items.get(3)); + } + } while(tez.find()); + if(totalTasks == 0) { + return "0% complete (0 total tasks)"; + } + return completedTasks * 100 / totalTasks + "% complete"; + } return null; }