METAMODEL-1109: Fixed Fixes #122
Project: http://git-wip-us.apache.org/repos/asf/metamodel/repo Commit: http://git-wip-us.apache.org/repos/asf/metamodel/commit/7e29fb89 Tree: http://git-wip-us.apache.org/repos/asf/metamodel/tree/7e29fb89 Diff: http://git-wip-us.apache.org/repos/asf/metamodel/diff/7e29fb89 Branch: refs/heads/5.x Commit: 7e29fb895703508fd793ba9604f3e893f55adf82 Parents: 7e355a1 Author: Jakub Horcicka <jakub.horci...@humaninference.com> Authored: Wed Aug 10 20:38:25 2016 -0700 Committer: Kasper Sørensen <i.am.kasper.soren...@gmail.com> Committed: Wed Aug 10 20:39:54 2016 -0700 ---------------------------------------------------------------------- .../metamodel/fixedwidth/EbcdicReader.java | 4 +++ .../metamodel/fixedwidth/FixedWidthReader.java | 37 ++++++++++++-------- .../fixedwidth/FixedWidthReaderTest.java | 28 +++++++++++++++ .../test/resources/example_diacritics_utf8.txt | 4 +++ 4 files changed, 59 insertions(+), 14 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/metamodel/blob/7e29fb89/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/EbcdicReader.java ---------------------------------------------------------------------- diff --git a/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/EbcdicReader.java b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/EbcdicReader.java index a7639fc..9e22dac 100644 --- a/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/EbcdicReader.java +++ b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/EbcdicReader.java @@ -26,6 +26,8 @@ import java.io.IOException; */ class EbcdicReader extends FixedWidthReader { + private final BufferedInputStream _stream; + private final String _charsetName; private final boolean _skipEbcdicHeader; private final boolean _eolPresent; private boolean _headerSkipped; @@ -33,6 +35,8 @@ class EbcdicReader extends FixedWidthReader { public EbcdicReader(BufferedInputStream stream, String charsetName, int[] valueWidths, boolean failOnInconsistentLineWidth, boolean skipEbcdicHeader, boolean eolPresent) { super(stream, charsetName, valueWidths, failOnInconsistentLineWidth); + _stream = stream; + _charsetName = charsetName; _skipEbcdicHeader = skipEbcdicHeader; _eolPresent = eolPresent; } http://git-wip-us.apache.org/repos/asf/metamodel/blob/7e29fb89/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthReader.java ---------------------------------------------------------------------- diff --git a/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthReader.java b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthReader.java index da17ff1..9f65ac7 100644 --- a/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthReader.java +++ b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthReader.java @@ -19,9 +19,13 @@ package org.apache.metamodel.fixedwidth; import java.io.BufferedInputStream; +import java.io.BufferedReader; import java.io.Closeable; import java.io.IOException; import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; +import java.io.UnsupportedEncodingException; import java.text.CharacterIterator; import java.text.StringCharacterIterator; import java.util.ArrayList; @@ -35,14 +39,13 @@ class FixedWidthReader implements Closeable { private static final int LINE_FEED = '\n'; private static final int CARRIAGE_RETURN = '\r'; - protected final String _charsetName; private final int _fixedValueWidth; private final int[] _valueWidths; private int _valueIndex = 0; private final boolean _failOnInconsistentLineWidth; private final boolean _constantWidth; private volatile int _rowNumber; - protected final BufferedInputStream _stream; + protected final Reader _reader; protected final int _expectedLineLength; public FixedWidthReader(InputStream stream, String charsetName, int fixedValueWidth, @@ -52,8 +55,7 @@ class FixedWidthReader implements Closeable { private FixedWidthReader(BufferedInputStream stream, String charsetName, int fixedValueWidth, boolean failOnInconsistentLineWidth) { - _stream = stream; - _charsetName = charsetName; + _reader = initReader(stream, charsetName); _fixedValueWidth = fixedValueWidth; _failOnInconsistentLineWidth = failOnInconsistentLineWidth; _rowNumber = 0; @@ -69,8 +71,7 @@ class FixedWidthReader implements Closeable { FixedWidthReader(BufferedInputStream stream, String charsetName, int[] valueWidths, boolean failOnInconsistentLineWidth) { - _stream = stream; - _charsetName = charsetName; + _reader = initReader(stream, charsetName); _fixedValueWidth = -1; _valueWidths = valueWidths; _failOnInconsistentLineWidth = failOnInconsistentLineWidth; @@ -85,6 +86,15 @@ class FixedWidthReader implements Closeable { _expectedLineLength = expectedLineLength; } + private Reader initReader(BufferedInputStream stream, String charsetName) { + try { + InputStreamReader inputStreamReader = new InputStreamReader(stream, charsetName); + return new BufferedReader(inputStreamReader); + } catch (UnsupportedEncodingException e) { + throw new IllegalArgumentException(String.format("Encoding '%s' was not recognized. ", charsetName)); + } + } + /** * This reads and returns the next record from the file. Usually, it is a line but in case the new line characters * are not present, the length of the content depends on the column-widths setting. @@ -106,7 +116,6 @@ class FixedWidthReader implements Closeable { * Empty hook that enables special behavior in sub-classed readers (by overriding this method). */ protected void beforeReadLine() { - return; } private String[] getValues() throws IOException { @@ -167,8 +176,8 @@ class FixedWidthReader implements Closeable { StringBuilder line = new StringBuilder(); int ch; - for (ch = _stream.read(); !isEndingCharacter(ch); ch = _stream.read()) { - line.append((char) ch); + for (ch = _reader.read(); !isEndingCharacter(ch); ch = _reader.read()) { + line.append((char)ch); } if (ch == CARRIAGE_RETURN) { @@ -179,10 +188,10 @@ class FixedWidthReader implements Closeable { } private void readLineFeedIfFollows() throws IOException { - _stream.mark(1); - - if (_stream.read() != LINE_FEED) { - _stream.reset(); + _reader.mark(1); + + if (_reader.read() != LINE_FEED) { + _reader.reset(); } } @@ -247,6 +256,6 @@ class FixedWidthReader implements Closeable { @Override public void close() throws IOException { - _stream.close(); + _reader.close(); } } http://git-wip-us.apache.org/repos/asf/metamodel/blob/7e29fb89/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthReaderTest.java ---------------------------------------------------------------------- diff --git a/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthReaderTest.java b/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthReaderTest.java index 8f40c1d..29b4b06 100644 --- a/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthReaderTest.java +++ b/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthReaderTest.java @@ -37,6 +37,34 @@ public class FixedWidthReaderTest { public final ExpectedException exception = ExpectedException.none(); @Test + public void testDiacritics() throws IOException { + assertExpectedDiacritics(CHARSET); + } + + @Test(expected=AssertionError.class) + public void testDiacriticsFails() throws IOException { + assertExpectedDiacritics("Windows-1250"); + } + + private void assertExpectedDiacritics(String charset) throws IOException { + final File file = new File("src/test/resources/example_diacritics_utf8.txt"); + final BufferedInputStream stream = new BufferedInputStream(new FileInputStream(file)); + int[] widths = new int[] { 10, 10 }; + final String[] expectedValues = { + "[name, surname]", + "[Å tÄpán, KnÞek]", + "[Lukáš, ŽáÄek]", + "[PÅemysl, Hývl]", + }; + try (final FixedWidthReader fixedWidthReader = new FixedWidthReader(stream, charset, widths, false)) { + for (String expectedLine : expectedValues) { + final String[] line = fixedWidthReader.readLine(); + assertEquals(expectedLine, Arrays.asList(line).toString()); + } + } + } + + @Test public void testBufferedReader1() throws IOException { final File file = new File("src/test/resources/example_simple1.txt"); final BufferedInputStream stream = new BufferedInputStream(new FileInputStream(file)); http://git-wip-us.apache.org/repos/asf/metamodel/blob/7e29fb89/fixedwidth/src/test/resources/example_diacritics_utf8.txt ---------------------------------------------------------------------- diff --git a/fixedwidth/src/test/resources/example_diacritics_utf8.txt b/fixedwidth/src/test/resources/example_diacritics_utf8.txt new file mode 100644 index 0000000..65b6a63 --- /dev/null +++ b/fixedwidth/src/test/resources/example_diacritics_utf8.txt @@ -0,0 +1,4 @@ +name surname +Å tÄpán KnÞek +Lukáš ŽáÄek +PÅemysl Hývl \ No newline at end of file