This is an automated email from the ASF dual-hosted git repository.
kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new dd22f8bca1 Add DECIMAL data type to orc record reader (#11377)
dd22f8bca1 is described below
commit dd22f8bca1aa25260aeb2b320debc3a716b13915
Author: Kartik Khare <[email protected]>
AuthorDate: Sat Aug 19 19:39:20 2023 +0530
Add DECIMAL data type to orc record reader (#11377)
* Add DECIMAL data type to orc record reader
* Add test
---------
Co-authored-by: Kartik Khare <[email protected]>
---
.../plugin/inputformat/orc/ORCRecordReader.java | 12 ++++++++++++
.../inputformat/orc/ORCRecordExtractorTest.java | 22 +++++++++++++---------
2 files changed, 25 insertions(+), 9 deletions(-)
diff --git
a/pinot-plugins/pinot-input-format/pinot-orc/src/main/java/org/apache/pinot/plugin/inputformat/orc/ORCRecordReader.java
b/pinot-plugins/pinot-input-format/pinot-orc/src/main/java/org/apache/pinot/plugin/inputformat/orc/ORCRecordReader.java
index 8a4d3fd709..fb1c04a2c2 100644
---
a/pinot-plugins/pinot-input-format/pinot-orc/src/main/java/org/apache/pinot/plugin/inputformat/orc/ORCRecordReader.java
+++
b/pinot-plugins/pinot-input-format/pinot-orc/src/main/java/org/apache/pinot/plugin/inputformat/orc/ORCRecordReader.java
@@ -32,6 +32,7 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
@@ -164,6 +165,7 @@ public class ORCRecordReader implements RecordReader {
case BINARY:
case VARCHAR:
case CHAR:
+ case DECIMAL:
return true;
default:
return false;
@@ -368,6 +370,16 @@ public class ORCRecordReader implements RecordReader {
} else {
return null;
}
+ case DECIMAL:
+ // Extract to string
+ DecimalColumnVector decimalColumnVector = (DecimalColumnVector)
columnVector;
+ if (decimalColumnVector.noNulls || !decimalColumnVector.isNull[rowId])
{
+ StringBuilder stringBuilder = new StringBuilder();
+ decimalColumnVector.stringifyValue(stringBuilder, rowId);
+ return stringBuilder.toString();
+ } else {
+ return null;
+ }
default:
// Unsupported types
throw new IllegalStateException("Unsupported field type: " + category
+ " for field: " + field);
diff --git
a/pinot-plugins/pinot-input-format/pinot-orc/src/test/java/org/apache/pinot/plugin/inputformat/orc/ORCRecordExtractorTest.java
b/pinot-plugins/pinot-input-format/pinot-orc/src/test/java/org/apache/pinot/plugin/inputformat/orc/ORCRecordExtractorTest.java
index b4e6f14ea3..347986d961 100644
---
a/pinot-plugins/pinot-input-format/pinot-orc/src/test/java/org/apache/pinot/plugin/inputformat/orc/ORCRecordExtractorTest.java
+++
b/pinot-plugins/pinot-input-format/pinot-orc/src/test/java/org/apache/pinot/plugin/inputformat/orc/ORCRecordExtractorTest.java
@@ -30,13 +30,16 @@ import java.util.Map;
import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.apache.orc.OrcFile;
import org.apache.orc.TypeDescription;
import org.apache.orc.Writer;
@@ -76,7 +79,8 @@ public class ORCRecordExtractorTest extends
AbstractRecordExtractorTest {
+
"simpleStruct:struct<structString:string,structLong:bigint,structDouble:double>,"
+
"complexStruct:struct<structString:string,nestedStruct:struct<nestedStructInt:int,"
+ "nestedStructLong:bigint>>,"
- +
"complexList:array<struct<complexListInt:int,complexListDouble:double>>," +
"simpleMap:map<string,int>,"
+ +
"complexList:array<struct<complexListInt:int,complexListDouble:decimal(10,5)>>,"
+ + "simpleMap:map<string,int>,"
+
"complexMap:map<string,struct<doubleField:double,stringField:string>>" + ">");
// @format:on
// CHECKSTYLE:ON
@@ -118,7 +122,7 @@ public class ORCRecordExtractorTest extends
AbstractRecordExtractorTest {
StructColumnVector complexListElementVector = (StructColumnVector)
complexListVector.child;
LongColumnVector complexListIntVector = (LongColumnVector)
complexListElementVector.fields[0];
complexListIntVector.ensureSize(5, false);
- DoubleColumnVector complexListDoubleVector = (DoubleColumnVector)
complexListElementVector.fields[1];
+ DecimalColumnVector complexListDoubleVector = (DecimalColumnVector)
complexListElementVector.fields[1];
complexListDoubleVector.ensureSize(5, false);
// simple map - string key and value long
@@ -141,7 +145,7 @@ public class ORCRecordExtractorTest extends
AbstractRecordExtractorTest {
complexMapValueBytesVector.ensureSize(6, false);
Writer writer = OrcFile.createWriter(new Path(_dataFile.getAbsolutePath()),
- OrcFile.writerOptions(new Configuration()).setSchema(schema));
+ OrcFile.writerOptions(new
Configuration()).setSchema(schema).overwrite(true));
for (int i = 0; i < numRecords; i++) {
Map<String, Object> record = _inputRecords.get(i);
@@ -218,7 +222,7 @@ public class ORCRecordExtractorTest extends
AbstractRecordExtractorTest {
for (Map<String, Object> complexElement : complexList) {
complexListIntVector.vector[complexListVector.childCount] = (int)
complexElement.get("complexListInt");
complexListDoubleVector.vector[complexListVector.childCount] =
- (double) complexElement.get("complexListDouble");
+ new HiveDecimalWritable(HiveDecimal.create((String)
complexElement.get("complexListDouble")));
complexListVector.childCount++;
}
} else {
@@ -271,11 +275,11 @@ public class ORCRecordExtractorTest extends
AbstractRecordExtractorTest {
// complex list element - each element contains a struct of int and double
List[] complexLists = new List[]{
- Arrays.asList(createStructInput("complexListInt", 10,
"complexListDouble", 100.0),
- createStructInput("complexListInt", 20, "complexListDouble",
200.0)), null,
- Collections.singletonList(createStructInput("complexListInt", 30,
"complexListDouble", 300.0)),
- Arrays.asList(createStructInput("complexListInt", 40,
"complexListDouble", 400.0),
- createStructInput("complexListInt", 50, "complexListDouble",
500.0))
+ Arrays.asList(createStructInput("complexListInt", 10,
"complexListDouble", "100"),
+ createStructInput("complexListInt", 20, "complexListDouble",
"200.212")), null,
+ Collections.singletonList(createStructInput("complexListInt", 30,
"complexListDouble", "300.378")),
+ Arrays.asList(createStructInput("complexListInt", 40,
"complexListDouble", "400.1"),
+ createStructInput("complexListInt", 50, "complexListDouble",
"500.2323"))
};
// single value integer
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]