[ https://issues.apache.org/jira/browse/HDFS-2414?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13123236#comment-13123236 ]
Robert Joseph Evans commented on HDFS-2414: ------------------------------------------- +1 for the fix (non-binding) I am happy to see that the corruption to the file is now deterministic Throwing random data at code is good for testing, but it needs to be reproducible with a random seed or something. Thanks for jumping on this so quickly. > TestDFSRollback fails intermittently > ------------------------------------ > > Key: HDFS-2414 > URL: https://issues.apache.org/jira/browse/HDFS-2414 > Project: Hadoop HDFS > Issue Type: Bug > Components: name-node, test > Affects Versions: 0.23.0 > Reporter: Robert Joseph Evans > Assignee: Todd Lipcon > Priority: Critical > Attachments: hdfs-2414.txt, hdfs-2414.txt, hdfs-2414.txt, > run-106-failed.tgz, run-158-failed.tgz > > > When running TestDFSRollback repeatedly in a loop I observed a failure rate > of about 3%. Two separate stack traces are in the output and it appears to > have something to do with not writing out a complete snapshot of the data for > rollback. > {noformat} > ------------------------------------------------------------------------------- > Test set: org.apache.hadoop.hdfs.TestDFSRollback > ------------------------------------------------------------------------------- > Tests run: 1, Failures: 1, Errors: 0, Skipped: 0, Time elapsed: 8.514 sec <<< > FAILURE! > testRollback(org.apache.hadoop.hdfs.TestDFSRollback) Time elapsed: 8.34 sec > <<< FAILURE! > java.lang.AssertionError: File contents differed: > > /home/evans/src/hadoop-git/hadoop-hdfs-project/hadoop-hdfs/target/test/data/dfs/data2/current/VERSION=5b19197114fad0a254e3f318b7f14aec > > /home/evans/src/hadoop-git/hadoop-hdfs-project/hadoop-hdfs/target/test/data/dfs/data1/current/VERSION=ea7b000a6a1711169fc7a836b240a991 > at org.junit.Assert.fail(Assert.java:91) > at > org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil.assertFileContentsSame(FSImageTestUtil.java:250) > at > org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil.assertParallelFilesAreIdentical(FSImageTestUtil.java:236) > at > org.apache.hadoop.hdfs.TestDFSRollback.checkResult(TestDFSRollback.java:86) > at > org.apache.hadoop.hdfs.TestDFSRollback.testRollback(TestDFSRollback.java:171) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25) > at java.lang.reflect.Method.invoke(Method.java:597) > at junit.framework.TestCase.runTest(TestCase.java:168) > at junit.framework.TestCase.runBare(TestCase.java:134) > at junit.framework.TestResult$1.protect(TestResult.java:110) > at junit.framework.TestResult.runProtected(TestResult.java:128) > at junit.framework.TestResult.run(TestResult.java:113) > at junit.framework.TestCase.run(TestCase.java:124) > at junit.framework.TestSuite.runTest(TestSuite.java:232) > at junit.framework.TestSuite.run(TestSuite.java:227) > at > org.junit.internal.runners.JUnit38ClassRunner.run(JUnit38ClassRunner.java:83) > at > org.apache.maven.surefire.junit4.JUnit4TestSet.execute(JUnit4TestSet.java:59) > at > org.apache.maven.surefire.suite.AbstractDirectoryTestSuite.executeTestSet(AbstractDirectoryTestSuite.java:120) > at > org.apache.maven.surefire.suite.AbstractDirectoryTestSuite.execute(AbstractDirectoryTestSuite.java:145) > at org.apache.maven.surefire.Surefire.run(Surefire.java:104) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25) > at java.lang.reflect.Method.invoke(Method.java:597) > at > org.apache.maven.surefire.booter.SurefireBooter.runSuitesInProcess(SurefireBooter.java:290) > at > org.apache.maven.surefire.booter.SurefireBooter.main(SurefireBooter.java:1017) > {noformat} > is the more common one, but I also saw > {noformat} > ------------------------------------------------------------------------------- > Test set: org.apache.hadoop.hdfs.TestDFSRollback > ------------------------------------------------------------------------------- > Tests run: 1, Failures: 1, Errors: 0, Skipped: 0, Time elapsed: 7.471 sec <<< > FAILURE! > testRollback(org.apache.hadoop.hdfs.TestDFSRollback) Time elapsed: 7.304 sec > <<< FAILURE! > junit.framework.AssertionFailedError: Expected substring 'file VERSION has > layoutVersion missing' in exception but got: > java.lang.IllegalArgumentException: Malformed \uxxxx encoding. > at java.util.Properties.loadConvert(Properties.java:552) > at java.util.Properties.load0(Properties.java:374) > at java.util.Properties.load(Properties.java:325) > at > org.apache.hadoop.hdfs.server.common.Storage.readPropertiesFile(Storage.java:837) > at > org.apache.hadoop.hdfs.server.common.Storage.readPreviousVersionProperties(Storage.java:789) > at > org.apache.hadoop.hdfs.server.namenode.FSImage.doRollback(FSImage.java:439) > at > org.apache.hadoop.hdfs.server.namenode.FSImage.recoverTransitionRead(FSImage.java:270) > at > org.apache.hadoop.hdfs.server.namenode.FSDirectory.loadFSImage(FSDirectory.java:174) > at > org.apache.hadoop.hdfs.server.namenode.FSNamesystem.initialize(FSNamesystem.java:294) > at > org.apache.hadoop.hdfs.server.namenode.FSNamesystem.<init>(FSNamesystem.java:266) > at > org.apache.hadoop.hdfs.server.namenode.NameNode.loadNamesystem(NameNode.java:292) > at > org.apache.hadoop.hdfs.server.namenode.NameNode.initialize(NameNode.java:326) > at > org.apache.hadoop.hdfs.server.namenode.NameNode.<init>(NameNode.java:452) > at > org.apache.hadoop.hdfs.server.namenode.NameNode.<init>(NameNode.java:444) > at > org.apache.hadoop.hdfs.server.namenode.NameNode.createNameNode(NameNode.java:742) > at > org.apache.hadoop.hdfs.MiniDFSCluster.createNameNode(MiniDFSCluster.java:637) > at > org.apache.hadoop.hdfs.MiniDFSCluster.initMiniDFSCluster(MiniDFSCluster.java:541) > at > org.apache.hadoop.hdfs.MiniDFSCluster.<init>(MiniDFSCluster.java:257) > at > org.apache.hadoop.hdfs.MiniDFSCluster.<init>(MiniDFSCluster.java:85) > at > org.apache.hadoop.hdfs.MiniDFSCluster$Builder.build(MiniDFSCluster.java:243) > at > org.apache.hadoop.hdfs.TestDFSRollback.startNameNodeShouldFail(TestDFSRollback.java:100) > at > org.apache.hadoop.hdfs.TestDFSRollback.testRollback(TestDFSRollback.java:268) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25) > at java.lang.reflect.Method.invoke(Method.java:597) > at junit.framework.TestCase.runTest(TestCase.java:168) > at junit.framework.TestCase.runBare(TestCase.java:134) > at junit.framework.TestResult$1.protect(TestResult.java:110) > at junit.framework.TestResult.runProtected(TestResult.java:128) > at junit.framework.TestResult.run(TestResult.java:113) > at junit.framework.TestCase.run(TestCase.java:124) > at junit.framework.TestSuite.runTest(TestSuite.java:232) > at junit.framework.TestSuite.run(TestSuite.java:227) > at > org.junit.internal.runners.JUnit38ClassRunner.run(JUnit38ClassRunner.java:83) > at > org.apache.maven.surefire.junit4.JUnit4TestSet.execute(JUnit4TestSet.java:59) > at > org.apache.maven.surefire.suite.AbstractDirectoryTestSuite.executeTestSet(AbstractDirectoryTestSuite.java:120) > at > org.apache.maven.surefire.suite.AbstractDirectoryTestSuite.execute(AbstractDirectoryTestSuite.java:145) > at org.apache.maven.surefire.Surefire.run(Surefire.java:104) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25) > at java.lang.reflect.Method.invoke(Method.java:597) > at > org.apache.maven.surefire.booter.SurefireBooter.runSuitesInProcess(SurefireBooter.java:290) > at > org.apache.maven.surefire.booter.SurefireBooter.main(SurefireBooter.java:1017) > at junit.framework.Assert.fail(Assert.java:47) > at > org.apache.hadoop.hdfs.TestDFSRollback.startNameNodeShouldFail(TestDFSRollback.java:109) > at > org.apache.hadoop.hdfs.TestDFSRollback.testRollback(TestDFSRollback.java:268) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25) > at java.lang.reflect.Method.invoke(Method.java:597) > at junit.framework.TestCase.runTest(TestCase.java:168) > at junit.framework.TestCase.runBare(TestCase.java:134) > at junit.framework.TestResult$1.protect(TestResult.java:110) > at junit.framework.TestResult.runProtected(TestResult.java:128) > at junit.framework.TestResult.run(TestResult.java:113) > at junit.framework.TestCase.run(TestCase.java:124) > at junit.framework.TestSuite.runTest(TestSuite.java:232) > at junit.framework.TestSuite.run(TestSuite.java:227) > at > org.junit.internal.runners.JUnit38ClassRunner.run(JUnit38ClassRunner.java:83) > at > org.apache.maven.surefire.junit4.JUnit4TestSet.execute(JUnit4TestSet.java:59) > at > org.apache.maven.surefire.suite.AbstractDirectoryTestSuite.executeTestSet(AbstractDirectoryTestSuite.java:120) > at > org.apache.maven.surefire.suite.AbstractDirectoryTestSuite.execute(AbstractDirectoryTestSuite.java:145) > at org.apache.maven.surefire.Surefire.run(Surefire.java:104) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25) > at java.lang.reflect.Method.invoke(Method.java:597) > at > org.apache.maven.surefire.booter.SurefireBooter.runSuitesInProcess(SurefireBooter.java:290) > at > org.apache.maven.surefire.booter.SurefireBooter.main(SurefireBooter.java:1017) > {noformat} -- This message is automatically generated by JIRA. If you think it was sent incorrectly, please contact your JIRA administrators: https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa For more information on JIRA, see: http://www.atlassian.com/software/jira