This is an automated email from the ASF dual-hosted git repository.
blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-iceberg.git
The following commit(s) were added to refs/heads/master by this push:
new 93d51b9 IcebergGenerics: Support ORC format (#851)
93d51b9 is described below
commit 93d51b94d00e028f93334fa511551f35efb5b639
Author: Xuedong Luan <[email protected]>
AuthorDate: Mon Mar 23 07:35:58 2020 +0800
IcebergGenerics: Support ORC format (#851)
---
.../org/apache/iceberg/data/TableScanIterable.java | 10 ++++++++
.../org/apache/iceberg/data/TestLocalScan.java | 29 ++++++++++++++++++----
2 files changed, 34 insertions(+), 5 deletions(-)
diff --git a/data/src/main/java/org/apache/iceberg/data/TableScanIterable.java
b/data/src/main/java/org/apache/iceberg/data/TableScanIterable.java
index 27a625d..75c479e 100644
--- a/data/src/main/java/org/apache/iceberg/data/TableScanIterable.java
+++ b/data/src/main/java/org/apache/iceberg/data/TableScanIterable.java
@@ -35,6 +35,7 @@ import org.apache.iceberg.TableOperations;
import org.apache.iceberg.TableScan;
import org.apache.iceberg.avro.Avro;
import org.apache.iceberg.data.avro.DataReader;
+import org.apache.iceberg.data.orc.GenericOrcReader;
import org.apache.iceberg.data.parquet.GenericParquetReaders;
import org.apache.iceberg.exceptions.RuntimeIOException;
import org.apache.iceberg.expressions.Evaluator;
@@ -42,6 +43,7 @@ import org.apache.iceberg.expressions.Expressions;
import org.apache.iceberg.io.CloseableGroup;
import org.apache.iceberg.io.CloseableIterable;
import org.apache.iceberg.io.InputFile;
+import org.apache.iceberg.orc.ORC;
import org.apache.iceberg.parquet.Parquet;
class TableScanIterable extends CloseableGroup implements
CloseableIterable<Record> {
@@ -99,6 +101,14 @@ class TableScanIterable extends CloseableGroup implements
CloseableIterable<Reco
return parquet.build();
+ case ORC:
+ ORC.ReadBuilder orc = ORC.read(input)
+ .schema(projection)
+ .createReaderFunc(fileSchema ->
GenericOrcReader.buildReader(projection, fileSchema))
+ .split(task.start(), task.length());
+
+ return orc.build();
+
default:
throw new UnsupportedOperationException(String.format("Cannot read %s
file: %s",
task.file().format().name(), task.file().path()));
diff --git a/data/src/test/java/org/apache/iceberg/data/TestLocalScan.java
b/data/src/test/java/org/apache/iceberg/data/TestLocalScan.java
index e41c41d..6736730 100644
--- a/data/src/test/java/org/apache/iceberg/data/TestLocalScan.java
+++ b/data/src/test/java/org/apache/iceberg/data/TestLocalScan.java
@@ -46,11 +46,13 @@ import org.apache.iceberg.TableProperties;
import org.apache.iceberg.Tables;
import org.apache.iceberg.avro.Avro;
import org.apache.iceberg.data.avro.DataWriter;
+import org.apache.iceberg.data.orc.GenericOrcWriter;
import org.apache.iceberg.data.parquet.GenericParquetWriter;
import org.apache.iceberg.expressions.Expressions;
import org.apache.iceberg.hadoop.HadoopInputFile;
import org.apache.iceberg.hadoop.HadoopTables;
import org.apache.iceberg.io.FileAppender;
+import org.apache.iceberg.orc.ORC;
import org.apache.iceberg.parquet.Parquet;
import org.apache.iceberg.types.Types;
import org.junit.Assert;
@@ -87,6 +89,7 @@ public class TestLocalScan {
public static Object[][] parameters() {
return new Object[][] {
new Object[] { "parquet" },
+ new Object[] { "orc" },
new Object[] { "avro" }
};
}
@@ -393,7 +396,7 @@ public class TestLocalScan {
Preconditions.checkNotNull(fileFormat, "Cannot determine format for file:
%s", filename);
switch (fileFormat) {
case AVRO:
- FileAppender avroAppender = Avro.write(fromPath(path, CONF))
+ FileAppender<Record> avroAppender = Avro.write(fromPath(path, CONF))
.schema(SCHEMA)
.createWriterFunc(DataWriter::create)
.named(fileFormat.name())
@@ -410,20 +413,36 @@ public class TestLocalScan {
.build();
case PARQUET:
- FileAppender<Record> orcAppender = Parquet.write(fromPath(path, CONF))
+ FileAppender<Record> parquetAppender = Parquet.write(fromPath(path,
CONF))
.schema(SCHEMA)
.createWriterFunc(GenericParquetWriter::buildWriter)
.build();
try {
- orcAppender.addAll(records);
+ parquetAppender.addAll(records);
} finally {
- orcAppender.close();
+ parquetAppender.close();
}
return DataFiles.builder(PartitionSpec.unpartitioned())
.withInputFile(HadoopInputFile.fromPath(path, CONF))
- .withMetrics(orcAppender.metrics())
+ .withMetrics(parquetAppender.metrics())
+ .build();
+
+ case ORC:
+ FileAppender<Record> orcAppender = ORC.write(fromPath(path, CONF))
+ .schema(SCHEMA)
+ .createWriterFunc(GenericOrcWriter::buildWriter)
.build();
+ try {
+ orcAppender.addAll(records);
+ } finally {
+ orcAppender.close();
+ }
+
+ return DataFiles.builder(PartitionSpec.unpartitioned())
+ .withInputFile(HadoopInputFile.fromPath(path, CONF))
+ .withMetrics(orcAppender.metrics())
+ .build();
default:
throw new UnsupportedOperationException("Cannot write format: " +
fileFormat);