HADOOP-13192. org.apache.hadoop.util.LineReader cannot handle multibyte delimiters correctly. Contributed by binde.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/fc6b50cc Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/fc6b50cc Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/fc6b50cc Branch: refs/heads/YARN-2915 Commit: fc6b50cc574e144fd314dea6c11987c6a384bfa6 Parents: d0162f2 Author: Akira Ajisaka <aajis...@apache.org> Authored: Mon Jun 20 17:07:26 2016 +0900 Committer: Akira Ajisaka <aajis...@apache.org> Committed: Mon Jun 20 17:07:26 2016 +0900 ---------------------------------------------------------------------- .../java/org/apache/hadoop/util/LineReader.java | 5 +- .../org/apache/hadoop/util/TestLineReader.java | 59 ++++++++++++-------- 2 files changed, 41 insertions(+), 23 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/fc6b50cc/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LineReader.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LineReader.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LineReader.java index 153953d..e20a7c1 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LineReader.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LineReader.java @@ -318,7 +318,10 @@ public class LineReader implements Closeable { break; } } else if (delPosn != 0) { - bufferPosn--; + bufferPosn -= delPosn; + if(bufferPosn < -1) { + bufferPosn = -1; + } delPosn = 0; } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/fc6b50cc/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestLineReader.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestLineReader.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestLineReader.java index 9d909bc..52f8b9f 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestLineReader.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestLineReader.java @@ -58,7 +58,7 @@ public class TestLineReader { * Check Condition * In the second key value pair, the value should contain * "</" from currentToken and - * "id>" from next token + * "id>" from next token */ Delimiter="</entity>"; @@ -80,20 +80,21 @@ public class TestLineReader { String TestPartOfInput = CurrentBufferTailToken+NextBufferHeadToken; int BufferSize=64 * 1024; - int numberOfCharToFillTheBuffer=BufferSize-CurrentBufferTailToken.length(); + int numberOfCharToFillTheBuffer = + BufferSize - CurrentBufferTailToken.length(); StringBuilder fillerString=new StringBuilder(); - for (int i=0;i<numberOfCharToFillTheBuffer;i++) { + for (int i=0; i<numberOfCharToFillTheBuffer; i++) { fillerString.append('a'); // char 'a' as a filler for the test string } TestData = fillerString + TestPartOfInput; lineReader = new LineReader( - new ByteArrayInputStream(TestData.getBytes()),Delimiter.getBytes()); + new ByteArrayInputStream(TestData.getBytes()), Delimiter.getBytes()); line = new Text(); - lineReader.readLine(line); - Assert.assertEquals(fillerString.toString(),line.toString()); + lineReader.readLine(line); + Assert.assertEquals(fillerString.toString(), line.toString()); lineReader.readLine(line); Assert.assertEquals(Expected, line.toString()); @@ -107,35 +108,49 @@ public class TestLineReader { Delimiter = "record"; StringBuilder TestStringBuilder = new StringBuilder(); - TestStringBuilder.append(Delimiter+"Kerala "); - TestStringBuilder.append(Delimiter+"Bangalore"); - TestStringBuilder.append(Delimiter+" North Korea"); - TestStringBuilder.append(Delimiter+Delimiter+ + TestStringBuilder.append(Delimiter + "Kerala "); + TestStringBuilder.append(Delimiter + "Bangalore"); + TestStringBuilder.append(Delimiter + " North Korea"); + TestStringBuilder.append(Delimiter + Delimiter+ "Guantanamo"); - TestStringBuilder.append(Delimiter+"ecord"+"recor"+"core"); //~EOF with 're' + TestStringBuilder.append(Delimiter + "ecord" + + "recor" + "core"); //~EOF with 're' TestData=TestStringBuilder.toString(); lineReader = new LineReader( - new ByteArrayInputStream(TestData.getBytes()),Delimiter.getBytes()); - - lineReader.readLine(line); - Assert.assertEquals("",line.toString()); - lineReader.readLine(line); - Assert.assertEquals("Kerala ",line.toString()); + new ByteArrayInputStream(TestData.getBytes()), Delimiter.getBytes()); + + lineReader.readLine(line); + Assert.assertEquals("", line.toString()); + lineReader.readLine(line); + Assert.assertEquals("Kerala ", line.toString()); lineReader.readLine(line); - Assert.assertEquals("Bangalore",line.toString()); + Assert.assertEquals("Bangalore", line.toString()); lineReader.readLine(line); - Assert.assertEquals(" North Korea",line.toString()); + Assert.assertEquals(" North Korea", line.toString()); lineReader.readLine(line); - Assert.assertEquals("",line.toString()); + Assert.assertEquals("", line.toString()); lineReader.readLine(line); - Assert.assertEquals("Guantanamo",line.toString()); + Assert.assertEquals("Guantanamo", line.toString()); lineReader.readLine(line); - Assert.assertEquals(("ecord"+"recor"+"core"),line.toString()); + Assert.assertEquals(("ecord"+"recor"+"core"), line.toString()); + + // Test 3 + // The test scenario is such that, + // aaaabccc split by aaab + TestData = "aaaabccc"; + Delimiter = "aaab"; + lineReader = new LineReader( + new ByteArrayInputStream(TestData.getBytes()), Delimiter.getBytes()); + + lineReader.readLine(line); + Assert.assertEquals("a", line.toString()); + lineReader.readLine(line); + Assert.assertEquals("ccc", line.toString()); } } --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org