[ 
https://issues.apache.org/jira/browse/TEZ-3074?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15121261#comment-15121261
 ] 

Oleksiy Sayankin edited comment on TEZ-3074 at 1/28/16 11:45 AM:
-----------------------------------------------------------------

Yes, turning off Tez and using just MapReduce fixes the issue. But our customer 
wants to use Tez to speed up Hive queries. 

Actually, these steps only simulate production cluster behavior, but they not 
exactly the same. They were found by our support team. To figure out what is 
going on with block location and why blkLocations.length = 0, we have added 
logging statements into Tes sources. Here they are:

{code:title=org.apache.tez.dag.api.DAG.java|borderStyle=solid}
  public synchronized DAG addTaskLocalFiles(Map<String, LocalResource> 
localFiles) {
    Preconditions.checkNotNull(localFiles);
    logLocalFiles(localFiles);
    logCommonTaskLocalFiles(commonTaskLocalFiles);
    TezCommonUtils.addAdditionalLocalResources(localFiles, 
commonTaskLocalFiles, "DAG " + getName());
    return this;
  }

  private static void logLocalFiles(Map<String, LocalResource> localFiles){
    LOG.info("###@@@ localFiles:");
     for(Map.Entry<String, LocalResource> entry : localFiles.entrySet()){
       String key = entry.getKey();
       LocalResource localRecourse = entry.getValue();
       LOG.info("###@@@001 key = " + key + ", localRecourse.getSize() = " + 
localRecourse.getSize() + ", localRecourse.getType() = " + 
localRecourse.getType() + ", localRecourse.getVisibility() = " + 
localRecourse.getVisibility());
    }
  }

  private static void logCommonTaskLocalFiles(Map<String, LocalResource> 
commonTaskLocalFiles){
    LOG.info("###@@@ commonTaskLocalFiles:");
    for(Map.Entry<String, LocalResource> entry : 
commonTaskLocalFiles.entrySet()){
      String key = entry.getKey();
      LocalResource localRecourse = entry.getValue();
      LOG.info("###@@@002 key = " + key + ", localRecourse.getSize() = " + 
localRecourse.getSize() + ", localRecourse.getType() = " + 
localRecourse.getType() + ", localRecourse.getVisibility() = " + 
localRecourse.getVisibility());
    }
  }
{code}

and 

{code:title=org.apache.tez.mapreduce.hadoop.MRInputHelpers.java|borderStyle=solid}
  private static void updateLocalResourcesForInputSplits(
      FileSystem fs,
      InputSplitInfo inputSplitInfo,
      Map<String, LocalResource> localResources) throws IOException {
    if (localResources.containsKey(JOB_SPLIT_RESOURCE_NAME)) {
      throw new RuntimeException("LocalResources already contains a"
          + " resource named " + JOB_SPLIT_RESOURCE_NAME);
    }
    if (localResources.containsKey(JOB_SPLIT_METAINFO_RESOURCE_NAME)) {
      throw new RuntimeException("LocalResources already contains a"
          + " resource named " + JOB_SPLIT_METAINFO_RESOURCE_NAME);
    }

    LOG.info("###@@@003 inputSplitInfo.getSplitsFile() = " + 
inputSplitInfo.getSplitsFile());
{code}

But it gave nothing. Exception happened before any tag 
{noformat}###@@@{noformat} was printed out.


was (Author: osayankin):
Yes, turning off Tez and using just MapReduce fixes the issue. But our customer 
wants to use Tez to speed up Hive queries. 

Actually, these steps only simulate production cluster behavior, but not the 
exactly the same. They were found by our support team. To figure out what is 
going on with block location and why blkLocations.length = 0, we have added 
logging statements into Tes sources. Here they are:

{code:title=org.apache.tez.dag.api.DAG.java|borderStyle=solid}
  public synchronized DAG addTaskLocalFiles(Map<String, LocalResource> 
localFiles) {
    Preconditions.checkNotNull(localFiles);
    logLocalFiles(localFiles);
    logCommonTaskLocalFiles(commonTaskLocalFiles);
    TezCommonUtils.addAdditionalLocalResources(localFiles, 
commonTaskLocalFiles, "DAG " + getName());
    return this;
  }

  private static void logLocalFiles(Map<String, LocalResource> localFiles){
    LOG.info("###@@@ localFiles:");
     for(Map.Entry<String, LocalResource> entry : localFiles.entrySet()){
       String key = entry.getKey();
       LocalResource localRecourse = entry.getValue();
       LOG.info("###@@@001 key = " + key + ", localRecourse.getSize() = " + 
localRecourse.getSize() + ", localRecourse.getType() = " + 
localRecourse.getType() + ", localRecourse.getVisibility() = " + 
localRecourse.getVisibility());
    }
  }

  private static void logCommonTaskLocalFiles(Map<String, LocalResource> 
commonTaskLocalFiles){
    LOG.info("###@@@ commonTaskLocalFiles:");
    for(Map.Entry<String, LocalResource> entry : 
commonTaskLocalFiles.entrySet()){
      String key = entry.getKey();
      LocalResource localRecourse = entry.getValue();
      LOG.info("###@@@002 key = " + key + ", localRecourse.getSize() = " + 
localRecourse.getSize() + ", localRecourse.getType() = " + 
localRecourse.getType() + ", localRecourse.getVisibility() = " + 
localRecourse.getVisibility());
    }
  }
{code}

and 

{code:title=org.apache.tez.mapreduce.hadoop.MRInputHelpers.java|borderStyle=solid}
  private static void updateLocalResourcesForInputSplits(
      FileSystem fs,
      InputSplitInfo inputSplitInfo,
      Map<String, LocalResource> localResources) throws IOException {
    if (localResources.containsKey(JOB_SPLIT_RESOURCE_NAME)) {
      throw new RuntimeException("LocalResources already contains a"
          + " resource named " + JOB_SPLIT_RESOURCE_NAME);
    }
    if (localResources.containsKey(JOB_SPLIT_METAINFO_RESOURCE_NAME)) {
      throw new RuntimeException("LocalResources already contains a"
          + " resource named " + JOB_SPLIT_METAINFO_RESOURCE_NAME);
    }

    LOG.info("###@@@003 inputSplitInfo.getSplitsFile() = " + 
inputSplitInfo.getSplitsFile());
{code}

But it gave nothing. Exception happened before any tag 
{noformat}###@@@{noformat} was printed out.

> Multithreading issue java.lang.ArrayIndexOutOfBoundsException: -1 while 
> working with Tez
> ----------------------------------------------------------------------------------------
>
>                 Key: TEZ-3074
>                 URL: https://issues.apache.org/jira/browse/TEZ-3074
>             Project: Apache Tez
>          Issue Type: Bug
>    Affects Versions: 0.5.3
>            Reporter: Oleksiy Sayankin
>             Fix For: 0.5.3
>
>         Attachments: tempsource.data
>
>
> *STEP 1. Install and configure Tez on yarn*
> *STEP 2. Configure hive for tez*
> *STEP 3. Create test tables in Hive and fill it with data*
> Enable dynamic partitioning in Hive. Add to {{hive-site.xml}} and restart 
> Hive.
> {code:xml}
> <!-- DYNAMIC PARTITION -->
> <property>
>   <name>hive.exec.dynamic.partition</name>
>   <value>true</value>
> </property>
> <property>
>   <name>hive.exec.dynamic.partition.mode</name>
>   <value>nonstrict</value>
> </property>
> <property>
>   <name>hive.exec.max.dynamic.partitions.pernode</name>
>   <value>2000</value>
> </property>
> <property>
>   <name>hive.exec.max.dynamic.partitions</name>
>   <value>2000</value>
> </property>
> {code}
> Execute in command line
> {code}
> hadoop fs -put tempsource.data /
> {code}
> Execute in command line. Use attached file {{tempsource.data}}
> {code}
> hive> CREATE TABLE test3 (x INT, y STRING) ROW FORMAT DELIMITED FIELDS 
> TERMINATED BY ',';
> hive> CREATE TABLE ptest1 (x INT, y STRING) PARTITIONED BY (z STRING) ROW 
> FORMAT DELIMITED FIELDS TERMINATED BY ',';
> hive> CREATE TABLE tempsource (x INT, y STRING, z STRING) ROW FORMAT 
> DELIMITED FIELDS TERMINATED BY ',';
> hive> LOAD DATA INPATH '/tempsource.data' OVERWRITE INTO TABLE tempsource;
> hive> INSERT OVERWRITE TABLE ptest1 PARTITION (z) SELECT x,y,z FROM 
> tempsource;
> {code}
> *STEP 4. Mount NFS on cluster*
> *STEP 5. Run teragen test application*
> Use separate console
> {code}
> /usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples-2.6.0-cdh5.5.1.jar 
> teragen -Dmapred.map.tasks=7 -Dmapreduce.map.disk=0 
> -Dmapreduce.map.cpu.vcores=0 1000000000 /user/hdfs/input
> {code}
> *STEP 6. Create many test files*
> Use separate console
> {code}
> cd /hdfs/cluster/user/hive/warehouse/ptest1/z=66
> for i in `seq 1 10000`; do dd if=/dev/urandom of=tempfile$i bs=1M count=1;
> done
> {code}
> *STEP 7. Run the following query repeatedly in other console*
> Use separate console
> {code}
> hive> insert overwrite table test3 select x,y from ( select x,y,z from 
> (select x,y,z from ptest1 where x > 5 and x < 1000 union all select x,y,z 
> from ptest1 where x > 5 and x < 1000) a)b;
> {code}
> After some time of working it gives an exception.
> {noformat}
> Status: Failed
> Vertex failed, vertexName=Map 3, vertexId=vertex_1443452487059_0426_1_01,
> diagnostics=[Vertex vertex_1443452487059_0426_1_01 [Map 3] killed/failed due
> to:ROOT_INPUT_INIT_FAILURE, Vertex Input: ptest1 initializer failed,
> vertex=vertex_1443452487059_0426_1_01 [Map 3],
> java.lang.ArrayIndexOutOfBoundsException: -1
>     at
> org.apache.hadoop.mapred.FileInputFormat.getBlockIndex(FileInputFormat.java:395)
>     at
> org.apache.hadoop.mapred.FileInputFormat.getSplitHostsAndCachedHosts(FileInputFormat.java:579)
>     at
> org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:359)
>     at
> org.apache.hadoop.hive.ql.io.HiveInputFormat.addSplitsForGroup(HiveInputFormat.java:300)
>     at
> org.apache.hadoop.hive.ql.io.HiveInputFormat.getSplits(HiveInputFormat.java:402)
>     at
> org.apache.hadoop.hive.ql.exec.tez.HiveSplitGenerator.initialize(HiveSplitGenerator.java:132)
>     at
> org.apache.tez.dag.app.dag.RootInputInitializerManager$InputInitializerCallable$1.run(RootInputInitializerManager.java:245)
>     at
> org.apache.tez.dag.app.dag.RootInputInitializerManager$InputInitializerCallable$1.run(RootInputInitializerManager.java:239)
>     at java.security.AccessController.doPrivileged(Native Method)
>     at javax.security.auth.Subject.doAs(Subject.java:422)
>     at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1566)
>     at
> org.apache.tez.dag.app.dag.RootInputInitializerManager$InputInitializerCallable.call(RootInputInitializerManager.java:239)
>     at
> org.apache.tez.dag.app.dag.RootInputInitializerManager$InputInitializerCallable.call(RootInputInitializerManager.java:226)
>     at java.util.concurrent.FutureTask.run(FutureTask.java:266)
>     at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
>     at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
>     at java.lang.Thread.run(Thread.java:745)
> ]
> Vertex killed, vertexName=Map 1, vertexId=vertex_1443452487059_0426_1_00,
> diagnostics=[Vertex received Kill in INITED state., Vertex
> vertex_1443452487059_0426_1_00 [Map 1] killed/failed due to:null]
> DAG failed due to vertex failure. failedVertices:1 killedVertices:1
> FAILED: Execution Error, return code 2 from
> org.apache.hadoop.hive.ql.exec.tez.TezTask
> {noformat}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to