[
https://issues.apache.org/jira/browse/FLINK-1208?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14220734#comment-14220734
]
ASF GitHub Bot commented on FLINK-1208:
---------------------------------------
Github user fhueske commented on a diff in the pull request:
https://github.com/apache/incubator-flink/pull/201#discussion_r20706556
--- Diff:
flink-core/src/test/java/org/apache/flink/api/common/io/GenericCsvInputFormatTest.java
---
@@ -271,6 +271,138 @@ public void testSparseParseWithIndices() {
}
@Test
+ public void testIgnoreInvalidInput() throws IOException {
+ try {
+ final String fileContent = "#description of the data\n"
+
+
"header1|header2|header3|\n"+
+
"this is|1|2.0|\n"+
+ "//a
comment\n" +
+ "a
test|3|4.0|\n" +
+
"#next|5|6.0|\n";
+
+ final FileInputSplit split =
createTempFile(fileContent);
+
+ final Configuration parameters = new Configuration();
+ format.setFieldDelimiter('|');
+ format.setFieldTypesGeneric(StringValue.class,
IntValue.class, DoubleValue.class);
+ format.setLenient(true);
+
+ format.configure(parameters);
+ format.open(split);
+
+ Value[] values;
+
+ values = format.nextRecord(new Value[] { new
StringValue(), new IntValue(), new DoubleValue() });
+ assertNull(values);
+ values = format.nextRecord(new Value[] { new
StringValue(), new IntValue(), new DoubleValue() });
+ assertNull(values);
+
+ values = format.nextRecord(new Value[] { new
StringValue(), new IntValue(), new DoubleValue() });
+ assertEquals("this is", ((StringValue)
values[0]).getValue());
+ assertEquals(1, ((IntValue) values[1]).getValue());
+ assertEquals(2.0, ((DoubleValue) values[2]).getValue(),
0.001);
+
+ values = format.nextRecord(new Value[] { new
StringValue(), new IntValue(), new DoubleValue() });
+ assertNull(values);
+
+ values = format.nextRecord(new Value[] { new
StringValue(), new IntValue(), new DoubleValue() });
+ assertEquals("a test", ((StringValue)
values[0]).getValue());
+ assertEquals(3, ((IntValue) values[1]).getValue());
+ assertEquals(4.0, ((DoubleValue) values[2]).getValue(),
0.001);
+
+ values = format.nextRecord(new Value[] { new
StringValue(), new IntValue(), new DoubleValue() });
+ assertEquals("#next", ((StringValue)
values[0]).getValue());
+ assertEquals(5, ((IntValue) values[1]).getValue());
+ assertEquals(6.0, ((DoubleValue) values[2]).getValue(),
0.001);
+ }
+ catch (Exception ex) {
+ fail("Test failed due to a " +
ex.getClass().getSimpleName() + ": " + ex.getMessage());
+ }
+ }
+
+ @Test
+ public void testIgnoreSingleCharPrefixComments() throws IOException {
+ try {
+ final String fileContent = "#description of the data\n"
+
+
"this is|1|2.0|\n"+
+ "a
test|3|4.0|#comment after record\n" +
+
"#next|5|6.0|\n";
+
+ final FileInputSplit split =
createTempFile(fileContent);
+
+ final Configuration parameters = new Configuration();
+ format.setFieldDelimiter('|');
+ format.setFieldTypesGeneric(StringValue.class,
IntValue.class, DoubleValue.class);
+ format.setCommentPrefix("#");
+
+ format.configure(parameters);
+ format.open(split);
+
+ Value[] values;
+
+ values = format.nextRecord(new Value[] { new
StringValue(), new IntValue(), new DoubleValue() });
+ assertNull(values);
+
+ values = format.nextRecord(new Value[] { new
StringValue(), new IntValue(), new DoubleValue() });
+ assertEquals("this is", ((StringValue)
values[0]).getValue());
+ assertEquals(1, ((IntValue) values[1]).getValue());
+ assertEquals(2.0, ((DoubleValue) values[2]).getValue(),
0.001);
+
+ values = format.nextRecord(new Value[] { new
StringValue(), new IntValue(), new DoubleValue() });
+ assertEquals("a test", ((StringValue)
values[0]).getValue());
+ assertEquals(3, ((IntValue) values[1]).getValue());
+ assertEquals(4.0, ((DoubleValue) values[2]).getValue(),
0.001);
+
+ values = format.nextRecord(new Value[] { new
StringValue(), new IntValue(), new DoubleValue() });
+ assertNull(values);
+ }
+ catch (Exception ex) {
+ fail("Test failed due to a " +
ex.getClass().getSimpleName() + ": " + ex.getMessage());
+ }
+ }
+
+ @Test
+ public void testIgnoreMultiCharPrefixComments() throws IOException {
+ try {
--- End diff --
what about this test case?
> Skip comment lines in CSV input format. Allow user to specify comment
> character.
> --------------------------------------------------------------------------------
>
> Key: FLINK-1208
> URL: https://issues.apache.org/jira/browse/FLINK-1208
> Project: Flink
> Issue Type: Improvement
> Components: Java API, Scala API
> Affects Versions: 0.8-incubating
> Reporter: Aljoscha Krettek
> Assignee: Felix Neutatz
> Priority: Minor
> Labels: starter
>
> The current skipFirstLine is limited. Skipping arbitrary lines that start
> with a certain character would be much more flexible while still easy to
> implement.
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)