Github user rdblue commented on a diff in the pull request: https://github.com/apache/spark/pull/19269#discussion_r143524057 --- Diff: sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleWritableDataSource.java --- @@ -0,0 +1,297 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package test.org.apache.spark.sql.sources.v2; + +import java.io.FileWriter; +import java.io.IOException; +import java.io.PrintWriter; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Optional; + +import org.apache.spark.sql.Row; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.catalyst.InternalRow; +import org.apache.spark.sql.catalyst.expressions.GenericRow; +import org.apache.spark.sql.catalyst.expressions.UnsafeRow; +import org.apache.spark.sql.sources.v2.DataSourceV2; +import org.apache.spark.sql.sources.v2.DataSourceV2Options; +import org.apache.spark.sql.sources.v2.ReadSupport; +import org.apache.spark.sql.sources.v2.WriteSupport; +import org.apache.spark.sql.sources.v2.reader.DataReader; +import org.apache.spark.sql.sources.v2.reader.DataSourceV2Reader; +import org.apache.spark.sql.sources.v2.reader.ReadTask; +import org.apache.spark.sql.sources.v2.writer.*; +import org.apache.spark.sql.types.DataType; +import org.apache.spark.sql.types.StructType; + +public class JavaSimpleWritableDataSource implements DataSourceV2, ReadSupport, WriteSupport { + private StructType schema = new StructType().add("i", "long").add("j", "long"); + + class Reader implements DataSourceV2Reader { + private String path; + + Reader(String path) { + this.path = path; + } + + @Override + public StructType readSchema() { + return schema; + } + + @Override + public List<ReadTask<Row>> createReadTasks() { + return java.util.Arrays.asList(new JavaSimpleCSVReadTask(path)); + } + } + + static class JavaSimpleCSVReadTask implements ReadTask<Row>, DataReader<Row> { + private String path; + private volatile Iterator<String> lines; + private volatile String currentLine; + + JavaSimpleCSVReadTask(Iterator<String> lines) { + this.lines = lines; + } + + JavaSimpleCSVReadTask(String path) { + this.path = path; + } + + @Override + public DataReader<Row> createReader() { + assert path != null; + try { + if (Files.exists(Paths.get(path))) { + return new JavaSimpleCSVReadTask(Files.readAllLines(Paths.get(path)).iterator()); + } else { + return new JavaSimpleCSVReadTask(Collections.emptyIterator()); + } + } catch (IOException e) { + throw new RuntimeException(e); --- End diff -- Does Spark have a `RuntimeIOException`? I typically like to use those so that the IOException can still be caught and handled by code that chooses to.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org