[ https://issues.apache.org/jira/browse/HDFS-14768?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16954304#comment-16954304 ]
Hadoop QA commented on HDFS-14768: ---------------------------------- | (x) *{color:red}-1 overall{color}* | \\ \\ || Vote || Subsystem || Runtime || Comment || | {color:blue}0{color} | {color:blue} reexec {color} | {color:blue} 0m 58s{color} | {color:blue} Docker mode activated. {color} | || || || || {color:brown} Prechecks {color} || | {color:green}+1{color} | {color:green} @author {color} | {color:green} 0m 0s{color} | {color:green} The patch does not contain any @author tags. {color} | | {color:green}+1{color} | {color:green} test4tests {color} | {color:green} 0m 0s{color} | {color:green} The patch appears to include 2 new or modified test files. {color} | || || || || {color:brown} trunk Compile Tests {color} || | {color:green}+1{color} | {color:green} mvninstall {color} | {color:green} 39m 24s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} compile {color} | {color:green} 2m 42s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} checkstyle {color} | {color:green} 2m 9s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} mvnsite {color} | {color:green} 2m 52s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} shadedclient {color} | {color:green} 31m 11s{color} | {color:green} branch has no errors when building and testing our client artifacts. {color} | | {color:green}+1{color} | {color:green} findbugs {color} | {color:green} 4m 36s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} javadoc {color} | {color:green} 2m 40s{color} | {color:green} trunk passed {color} | || || || || {color:brown} Patch Compile Tests {color} || | {color:green}+1{color} | {color:green} mvninstall {color} | {color:green} 2m 17s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} compile {color} | {color:green} 2m 2s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} javac {color} | {color:green} 2m 2s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} checkstyle {color} | {color:green} 1m 32s{color} | {color:green} hadoop-hdfs-project/hadoop-hdfs: The patch generated 0 new + 170 unchanged - 1 fixed = 170 total (was 171) {color} | | {color:green}+1{color} | {color:green} mvnsite {color} | {color:green} 2m 17s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} whitespace {color} | {color:green} 0m 0s{color} | {color:green} The patch has no whitespace issues. {color} | | {color:green}+1{color} | {color:green} shadedclient {color} | {color:green} 26m 8s{color} | {color:green} patch has no errors when building and testing our client artifacts. {color} | | {color:green}+1{color} | {color:green} findbugs {color} | {color:green} 4m 36s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} javadoc {color} | {color:green} 2m 11s{color} | {color:green} the patch passed {color} | || || || || {color:brown} Other Tests {color} || | {color:red}-1{color} | {color:red} unit {color} | {color:red}156m 8s{color} | {color:red} hadoop-hdfs in the patch failed. {color} | | {color:red}-1{color} | {color:red} asflicense {color} | {color:red} 1m 30s{color} | {color:red} The patch generated 50 ASF License warnings. {color} | | {color:black}{color} | {color:black} {color} | {color:black}284m 48s{color} | {color:black} {color} | \\ \\ || Reason || Tests || | Failed junit tests | hadoop.hdfs.server.datanode.TestNNHandlesCombinedBlockReport | | | hadoop.hdfs.tools.TestECAdmin | | | hadoop.hdfs.server.datanode.TestDataNodeErasureCodingMetrics | | | hadoop.hdfs.TestFileChecksum | | | hadoop.hdfs.security.TestDelegationTokenForProxyUser | | | hadoop.hdfs.server.datanode.TestDataNodeVolumeFailureReporting | | | hadoop.hdfs.TestDFSStripedInputStream | | | hadoop.hdfs.server.datanode.TestBlockHasMultipleReplicasOnSameDN | | | hadoop.hdfs.qjournal.server.TestJournalNodeMXBean | | | hadoop.hdfs.TestDecommission | | | hadoop.hdfs.TestFileCreation | | | hadoop.hdfs.server.namenode.TestQuotaWithStripedBlocksWithRandomECPolicy | | | hadoop.hdfs.server.namenode.TestAddStripedBlocks | | | hadoop.hdfs.TestPread | | | hadoop.hdfs.server.datanode.TestDataNodeReconfiguration | | | hadoop.hdfs.tools.TestStoragePolicySatisfyAdminCommands | | | hadoop.hdfs.server.datanode.TestDataNodeUUID | | | hadoop.hdfs.server.namenode.TestDeleteRace | | | hadoop.hdfs.tools.TestDelegationTokenFetcher | | | hadoop.hdfs.TestParallelShortCircuitRead | | | hadoop.hdfs.TestFileCorruption | | | hadoop.hdfs.TestDatanodeReport | | | hadoop.hdfs.TestEncryptionZonesWithHA | | | hadoop.hdfs.server.namenode.snapshot.TestSnapshot | | | hadoop.hdfs.server.blockmanagement.TestBlockTokenWithDFSStriped | | | hadoop.hdfs.TestAppendSnapshotTruncate | | | hadoop.hdfs.server.namenode.ha.TestConsistentReadsObserver | | | hadoop.hdfs.server.namenode.TestINodeAttributeProvider | | | hadoop.hdfs.tools.offlineEditsViewer.TestOfflineEditsViewer | | | hadoop.hdfs.server.blockmanagement.TestBlockStatsMXBean | | | hadoop.hdfs.server.blockmanagement.TestReplicationPolicyConsiderLoad | | | hadoop.hdfs.server.blockmanagement.TestReconstructStripedBlocksWithRackAwareness | | | hadoop.hdfs.TestErasureCodingPolicyWithSnapshot | | | hadoop.hdfs.TestErasureCodingMultipleRacks | | | hadoop.hdfs.TestDFSStartupVersions | | | hadoop.hdfs.TestDFSInputStream | | | hadoop.hdfs.TestModTime | | | hadoop.hdfs.TestDFSStripedOutputStreamWithRandomECPolicy | | | hadoop.hdfs.tools.TestDFSAdmin | | | hadoop.hdfs.security.token.block.TestBlockToken | | | hadoop.hdfs.server.blockmanagement.TestRedundancyMonitor | | | hadoop.hdfs.server.blockmanagement.TestPendingInvalidateBlock | | | hadoop.hdfs.TestDFSStripedOutputStreamWithFailure | | | hadoop.hdfs.server.mover.TestMover | | | hadoop.hdfs.server.namenode.TestNamenodeCapacityReport | | | hadoop.hdfs.server.namenode.TestINodeFile | | | hadoop.hdfs.tools.TestDFSZKFailoverController | | | hadoop.hdfs.TestFileAppend3 | | | hadoop.hdfs.server.namenode.TestListCorruptFileBlocks | | | hadoop.hdfs.TestFileConcurrentReader | | | hadoop.hdfs.server.blockmanagement.TestBlockManager | | | hadoop.hdfs.server.namenode.TestDefaultBlockPlacementPolicy | | | hadoop.hdfs.TestEncryptionZonesWithKMS | | | hadoop.hdfs.TestDatanodeRegistration | | | hadoop.hdfs.server.blockmanagement.TestSequentialBlockId | | | hadoop.hdfs.shortcircuit.TestShortCircuitCache | | | hadoop.hdfs.TestFileAppend2 | | | hadoop.hdfs.qjournal.server.TestJournal | | | hadoop.hdfs.server.datanode.TestRefreshNamenodes | | | hadoop.hdfs.server.namenode.ha.TestStandbyCheckpoints | | | hadoop.hdfs.TestDecommissionWithStriped | | | hadoop.hdfs.server.namenode.ha.TestPipelinesFailover | | | hadoop.hdfs.TestBalancerBandwidth | | | hadoop.hdfs.TestLeaseRecoveryStriped | | | hadoop.hdfs.server.blockmanagement.TestSequentialBlockGroupId | | | hadoop.hdfs.server.datanode.TestTriggerBlockReport | | | hadoop.hdfs.server.datanode.TestDirectoryScanner | | | hadoop.hdfs.server.blockmanagement.TestBlocksWithNotEnoughRacks | | | hadoop.hdfs.TestClientProtocolForPipelineRecovery | | | hadoop.hdfs.server.datanode.TestDataNodeHotSwapVolumes | | | hadoop.hdfs.TestTrashWithSecureEncryptionZones | | | hadoop.hdfs.tools.offlineImageViewer.TestOfflineImageViewerWithStripedBlocks | | | hadoop.hdfs.server.blockmanagement.TestBlockInfoStriped | | | hadoop.hdfs.server.blockmanagement.TestBlockReportRateLimiting | | | hadoop.hdfs.server.namenode.ha.TestRetryCacheWithHA | | | hadoop.hdfs.server.datanode.web.TestDatanodeHttpXFrame | | | hadoop.hdfs.TestInjectionForSimulatedStorage | | | hadoop.hdfs.server.namenode.TestLargeDirectoryDelete | | | hadoop.hdfs.TestErasureCodingPoliciesWithRandomECPolicy | | | hadoop.hdfs.server.namenode.ha.TestStandbyInProgressTail | | | hadoop.hdfs.server.namenode.TestFileTruncate | | | hadoop.hdfs.TestRead | | | hadoop.hdfs.qjournal.server.TestJournalNodeSync | | | hadoop.hdfs.TestHDFSFileSystemContract | | | hadoop.hdfs.TestDFSStripedInputStreamWithRandomECPolicy | | | hadoop.hdfs.server.namenode.TestFSImage | | | hadoop.hdfs.TestFileAppend | | | hadoop.hdfs.server.namenode.TestProcessCorruptBlocks | | | hadoop.hdfs.server.datanode.TestDataNodeMetrics | | | hadoop.hdfs.server.namenode.ha.TestEditLogsDuringFailover | | | hadoop.hdfs.TestFileChecksumCompositeCrc | | | hadoop.hdfs.server.datanode.TestHSync | | | hadoop.hdfs.server.namenode.TestFsck | | | hadoop.hdfs.server.datanode.TestDataNodeMetricsLogger | | | hadoop.hdfs.client.impl.TestBlockReaderLocal | | | hadoop.hdfs.server.blockmanagement.TestOverReplicatedBlocks | | | hadoop.hdfs.server.namenode.TestSecondaryNameNodeUpgrade | | | hadoop.hdfs.TestFileCreationDelete | | | hadoop.hdfs.crypto.TestHdfsCryptoStreams | | | hadoop.hdfs.TestDFSUpgradeFromImage | | | hadoop.hdfs.TestRollingUpgrade | | | hadoop.hdfs.server.namenode.snapshot.TestSnapshotDeletion | | | hadoop.hdfs.server.blockmanagement.TestPendingReconstruction | | | hadoop.hdfs.qjournal.client.TestQJMWithFaults | | | hadoop.hdfs.server.blockmanagement.TestReplicationPolicy | | | hadoop.hdfs.TestWriteRead | | | hadoop.hdfs.server.namenode.ha.TestXAttrsWithHA | | | hadoop.hdfs.server.namenode.snapshot.TestNestedSnapshots | | | hadoop.hdfs.protocol.datatransfer.sasl.TestSaslDataTransfer | | | hadoop.hdfs.server.datanode.TestIncrementalBrVariations | | | hadoop.hdfs.TestBlockTokenWrappingQOP | | | hadoop.hdfs.server.namenode.TestBlockPlacementPolicyRackFaultTolerant | \\ \\ || Subsystem || Report/Notes || | Docker | Client=19.03.3 Server=19.03.3 Image:yetus/hadoop:104ccca9169 | | JIRA Issue | HDFS-14768 | | JIRA Patch URL | https://issues.apache.org/jira/secure/attachment/12983322/HDFS-14768.005.patch | | Optional Tests | dupname asflicense compile javac javadoc mvninstall mvnsite unit shadedclient findbugs checkstyle | | uname | Linux 9571a9b97d94 4.15.0-58-generic #64-Ubuntu SMP Tue Aug 6 11:12:41 UTC 2019 x86_64 x86_64 x86_64 GNU/Linux | | Build tool | maven | | Personality | /testptch/patchprocess/precommit/personality/provided.sh | | git revision | trunk / 54dc6b7 | | maven | version: Apache Maven 3.3.9 | | Default Java | 1.8.0_222 | | findbugs | v3.1.0-RC1 | | unit | https://builds.apache.org/job/PreCommit-HDFS-Build/28107/artifact/out/patch-unit-hadoop-hdfs-project_hadoop-hdfs.txt | | Test Results | https://builds.apache.org/job/PreCommit-HDFS-Build/28107/testReport/ | | asflicense | https://builds.apache.org/job/PreCommit-HDFS-Build/28107/artifact/out/patch-asflicense-problems.txt | | Max. process+thread count | 1583 (vs. ulimit of 5500) | | modules | C: hadoop-hdfs-project/hadoop-hdfs U: hadoop-hdfs-project/hadoop-hdfs | | Console output | https://builds.apache.org/job/PreCommit-HDFS-Build/28107/console | | Powered by | Apache Yetus 0.8.0 http://yetus.apache.org | This message was automatically generated. > In some cases, erasure blocks are corruption when they are reconstruct. > ------------------------------------------------------------------------ > > Key: HDFS-14768 > URL: https://issues.apache.org/jira/browse/HDFS-14768 > Project: Hadoop HDFS > Issue Type: Bug > Components: datanode, erasure-coding, hdfs, namenode > Affects Versions: 3.0.2 > Reporter: guojh > Assignee: guojh > Priority: Major > Labels: patch > Fix For: 3.3.0 > > Attachments: 1568275810244.jpg, 1568276338275.jpg, 1568771471942.jpg, > HDFS-14768.000.patch, HDFS-14768.001.patch, HDFS-14768.002.patch, > HDFS-14768.003.patch, HDFS-14768.004.patch, HDFS-14768.005.patch, > HDFS-14768.006.patch, HDFS-14768.jpg, guojh_UT_after_deomission.txt, > guojh_UT_before_deomission.txt, zhaoyiming_UT_after_deomission.txt, > zhaoyiming_UT_beofre_deomission.txt > > > Policy is RS-6-3-1024K, version is hadoop 3.0.2; > We suppose a file's block Index is [0,1,2,3,4,5,6,7,8], And decommission > index[3,4], increase the index 6 datanode's > pendingReplicationWithoutTargets that make it large than > replicationStreamsHardLimit(we set 14). Then, After the method > chooseSourceDatanodes of BlockMananger, the liveBlockIndices is > [0,1,2,3,4,5,7,8], Block Counter is, Live:7, Decommission:2. > In method scheduleReconstruction of BlockManager, the additionalReplRequired > is 9 - 7 = 2. After Namenode choose two target Datanode, will assign a > erasureCode task to target datanode. > When datanode get the task will build targetIndices from liveBlockIndices > and target length. the code is blow. > {code:java} > // code placeholder > targetIndices = new short[targets.length]; > private void initTargetIndices() { > BitSet bitset = reconstructor.getLiveBitSet(); > int m = 0; hasValidTargets = false; > for (int i = 0; i < dataBlkNum + parityBlkNum; i++) { > if (!bitset.get) { > if (reconstructor.getBlockLen > 0) { > if (m < targets.length) { > targetIndices[m++] = (short)i; > hasValidTargets = true; > } > } > } > } > {code} > targetIndices[0]=6, and targetIndices[1] is aways 0 from initial value. > The StripedReader is aways create reader from first 6 index block, and is > [0,1,2,3,4,5] > Use the index [0,1,2,3,4,5] to build target index[6,0] will trigger the isal > bug. the block index6's data is corruption(all data is zero). > I write a unit test can stabilize repreduce. > {code:java} > // code placeholder > private int replicationStreamsHardLimit = > DFSConfigKeys.DFS_NAMENODE_REPLICATION_STREAMS_HARD_LIMIT_DEFAULT; > numDNs = dataBlocks + parityBlocks + 10; > @Test(timeout = 240000) > public void testFileDecommission() throws Exception { > LOG.info("Starting test testFileDecommission"); > final Path ecFile = new Path(ecDir, "testFileDecommission"); > int writeBytes = cellSize * dataBlocks; > writeStripedFile(dfs, ecFile, writeBytes); > Assert.assertEquals(0, bm.numOfUnderReplicatedBlocks()); > FileChecksum fileChecksum1 = dfs.getFileChecksum(ecFile, writeBytes); > final INodeFile fileNode = cluster.getNamesystem().getFSDirectory() > .getINode4Write(ecFile.toString()).asFile(); > LocatedBlocks locatedBlocks = > StripedFileTestUtil.getLocatedBlocks(ecFile, dfs); > LocatedBlock lb = dfs.getClient().getLocatedBlocks(ecFile.toString(), 0) > .get(0); > DatanodeInfo[] dnLocs = lb.getLocations(); > LocatedStripedBlock lastBlock = > (LocatedStripedBlock)locatedBlocks.getLastLocatedBlock(); > DatanodeInfo[] storageInfos = lastBlock.getLocations(); > // > DatanodeDescriptor datanodeDescriptor = > cluster.getNameNode().getNamesystem() > > .getBlockManager().getDatanodeManager().getDatanode(storageInfos[6].getDatanodeUuid()); > BlockInfo firstBlock = fileNode.getBlocks()[0]; > DatanodeStorageInfo[] dStorageInfos = bm.getStorages(firstBlock); > // the first heartbeat will consume 3 replica tasks > for (int i = 0; i <= replicationStreamsHardLimit + 3; i++) { > BlockManagerTestUtil.addBlockToBeReplicated(datanodeDescriptor, new > Block(i), > new DatanodeStorageInfo[]{dStorageInfos[0]}); > } > assertEquals(dataBlocks + parityBlocks, dnLocs.length); > int[] decommNodeIndex = {3, 4}; > final List<DatanodeInfo> decommisionNodes = new ArrayList<DatanodeInfo>(); > // add the node which will be decommissioning > decommisionNodes.add(dnLocs[decommNodeIndex[0]]); > decommisionNodes.add(dnLocs[decommNodeIndex[1]]); > decommissionNode(0, decommisionNodes, AdminStates.DECOMMISSIONED); > assertEquals(decommisionNodes.size(), fsn.getNumDecomLiveDataNodes()); > bm.getDatanodeManager().removeDatanode(datanodeDescriptor); > //assertNull(checkFile(dfs, ecFile, 9, decommisionNodes, numDNs)); > // Ensure decommissioned datanode is not automatically shutdown > DFSClient client = getDfsClient(cluster.getNameNode(0), conf); > assertEquals("All datanodes must be alive", numDNs, > client.datanodeReport(DatanodeReportType.LIVE).length); > FileChecksum fileChecksum2 = dfs.getFileChecksum(ecFile, writeBytes); > Assert.assertTrue("Checksum mismatches!", > fileChecksum1.equals(fileChecksum2)); > StripedFileTestUtil.checkData(dfs, ecFile, writeBytes, decommisionNodes, > null, blockGroupSize); > } > {code} > -- This message was sent by Atlassian Jira (v8.3.4#803005) --------------------------------------------------------------------- To unsubscribe, e-mail: hdfs-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: hdfs-issues-h...@hadoop.apache.org