[ https://issues.apache.org/jira/browse/HDFS-16732?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17584585#comment-17584585 ]
ASF GitHub Bot commented on HDFS-16732: --------------------------------------- zhengchenyu commented on code in PR #4756: URL: https://github.com/apache/hadoop/pull/4756#discussion_r954478351 ########## hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestObserverNodeWhenReportDelay.java: ########## @@ -0,0 +1,153 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * <p> + * http://www.apache.org/licenses/LICENSE-2.0 + * <p> + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode.ha; + +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SIZE_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_STATE_CONTEXT_ENABLED_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_KEY; +import static org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter.getServiceState; +import static org.apache.hadoop.hdfs.server.namenode.ha.ObserverReadProxyProvider.OBSERVER_PROBE_RETRY_PERIOD_KEY; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.util.List; + +import org.apache.commons.lang3.ArrayUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; +import org.apache.hadoop.hdfs.DistributedFileSystem; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.protocol.DirectoryListing; +import org.apache.hadoop.hdfs.protocol.HdfsLocatedFileStatus; +import org.apache.hadoop.hdfs.protocol.LocatedBlock; +import org.apache.hadoop.hdfs.protocol.LocatedBlocks; +import org.apache.hadoop.hdfs.qjournal.MiniQJMHACluster; +import org.apache.hadoop.hdfs.server.datanode.DataNode; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class TestObserverNodeWhenReportDelay { Review Comment: I add some new config in TestObserverNodeWhenReportDelay, I worried about affect other unit test in TestObserverNode. I will try to add this new test in TestObserverNode. > [SBN READ] Avoid get location from observer when the block report is delayed. > ----------------------------------------------------------------------------- > > Key: HDFS-16732 > URL: https://issues.apache.org/jira/browse/HDFS-16732 > Project: Hadoop HDFS > Issue Type: Bug > Components: hdfs > Affects Versions: 3.2.1 > Reporter: zhengchenyu > Assignee: zhengchenyu > Priority: Critical > Labels: pull-request-available > > Hive on tez application fail occasionally after observer is enable, log show > below. > {code:java} > 2022-08-18 15:22:06,914 [ERROR] [Dispatcher thread {Central}] > |impl.VertexImpl|: Vertex Input: namenodeinfo_stg initializer failed, > vertex=vertex_1660618571916_4839_1_00 [Map 1] > org.apache.tez.dag.app.dag.impl.AMUserCodeException: > java.lang.ArrayIndexOutOfBoundsException: 0 > at > org.apache.tez.dag.app.dag.RootInputInitializerManager$InputInitializerCallback.onFailure(RootInputInitializerManager.java:329) > at > com.google.common.util.concurrent.Futures$CallbackListener.run(Futures.java:1056) > at > com.google.common.util.concurrent.DirectExecutor.execute(DirectExecutor.java:30) > at > com.google.common.util.concurrent.AbstractFuture.executeListener(AbstractFuture.java:1138) > at > com.google.common.util.concurrent.AbstractFuture.complete(AbstractFuture.java:958) > at > com.google.common.util.concurrent.AbstractFuture.setException(AbstractFuture.java:748) > at > com.google.common.util.concurrent.TrustedListenableFutureTask$TrustedFutureInterruptibleTask.afterRanInterruptibly(TrustedListenableFutureTask.java:133) > at > com.google.common.util.concurrent.InterruptibleTask.run(InterruptibleTask.java:80) > at > com.google.common.util.concurrent.TrustedListenableFutureTask.run(TrustedListenableFutureTask.java:78) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > Caused by: java.lang.ArrayIndexOutOfBoundsException: 0 > at > org.apache.hadoop.mapred.FileInputFormat.identifyHosts(FileInputFormat.java:748) > at > org.apache.hadoop.mapred.FileInputFormat.getSplitHostsAndCachedHosts(FileInputFormat.java:714) > at > org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:378) > at > org.apache.hadoop.hive.ql.io.HiveInputFormat.addSplitsForGroup(HiveInputFormat.java:306) > at > org.apache.hadoop.hive.ql.io.HiveInputFormat.getSplits(HiveInputFormat.java:408) > at > org.apache.hadoop.hive.ql.exec.tez.HiveSplitGenerator.initialize(HiveSplitGenerator.java:159) > at > org.apache.tez.dag.app.dag.RootInputInitializerManager$InputInitializerCallable$1.run(RootInputInitializerManager.java:279) > at > org.apache.tez.dag.app.dag.RootInputInitializerManager$InputInitializerCallable$1.run(RootInputInitializerManager.java:270) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1730) > at > org.apache.tez.dag.app.dag.RootInputInitializerManager$InputInitializerCallable.call(RootInputInitializerManager.java:270) > at > org.apache.tez.dag.app.dag.RootInputInitializerManager$InputInitializerCallable.call(RootInputInitializerManager.java:254) > at > com.google.common.util.concurrent.TrustedListenableFutureTask$TrustedFutureInterruptibleTask.runInterruptibly(TrustedListenableFutureTask.java:125) > at > com.google.common.util.concurrent.InterruptibleTask.run(InterruptibleTask.java:57) > ... 4 more {code} > As describe in MAPREDUCE-7082, when the block is missing, then will throw > this exception, but my cluster had no missing block. > In this example, I found getListing return location information. When block > report of observer is delayed, will return the block without location. > HDFS-13924 is introduce to solve this problem, but only consider > getBlockLocations. > In observer node, all method which may return location should check whether > locations is empty or not. -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: hdfs-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: hdfs-issues-h...@hadoop.apache.org