This is an automated email from the ASF dual-hosted git repository.
lidavidm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new f2f3e95605 GH-43456: [Java] Add Opaque canonical extension type
(#43460)
f2f3e95605 is described below
commit f2f3e95605537e8ffea9886d5c4ee9bbf5f949fd
Author: David Li <[email protected]>
AuthorDate: Fri Aug 2 09:55:01 2024 +0900
GH-43456: [Java] Add Opaque canonical extension type (#43460)
### Rationale for this change
Add the newly ratified extension type.
### What changes are included in this PR?
The Java implementation only.
### Are these changes tested?
Yes
### Are there any user-facing changes?
No.
* GitHub Issue: #43456
Authored-by: David Li <[email protected]>
Signed-off-by: David Li <[email protected]>
---
.../adapter/jdbc/JdbcToArrowConfigBuilder.java | 2 +
.../arrow/adapter/jdbc/JdbcToArrowUtils.java | 22 +-
.../adapter/jdbc/h2/JdbcToArrowDataTypesTest.java | 51 +++
java/vector/src/main/java/module-info.java | 1 +
.../InvalidExtensionMetadataException.java | 28 ++
.../apache/arrow/vector/extension/OpaqueType.java | 393 +++++++++++++++++++++
.../arrow/vector/extension/OpaqueVector.java | 58 +++
.../arrow/vector/TestOpaqueExtensionType.java | 188 ++++++++++
8 files changed, 742 insertions(+), 1 deletion(-)
diff --git
a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java
b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java
index 783a373c6d..ea9ffe55d3 100644
---
a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java
+++
b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java
@@ -211,6 +211,8 @@ public class JdbcToArrowConfigBuilder {
*
* <p>Defaults to wrapping {@link
JdbcToArrowUtils#getArrowTypeFromJdbcType(JdbcFieldInfo,
* Calendar)}.
+ *
+ * @see JdbcToArrowUtils#reportUnsupportedTypesAsUnknown(Function)
*/
public JdbcToArrowConfigBuilder setJdbcToArrowTypeConverter(
Function<JdbcFieldInfo, ArrowType> jdbcToArrowTypeConverter) {
diff --git
a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
index 8397d4c9e0..aecb734a8b 100644
---
a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
+++
b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
@@ -18,6 +18,7 @@ package org.apache.arrow.adapter.jdbc;
import static org.apache.arrow.vector.types.FloatingPointPrecision.DOUBLE;
import static org.apache.arrow.vector.types.FloatingPointPrecision.SINGLE;
+import static org.apache.arrow.vector.types.Types.MinorType;
import java.io.IOException;
import java.math.RoundingMode;
@@ -37,6 +38,7 @@ import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.TimeZone;
+import java.util.function.Function;
import org.apache.arrow.adapter.jdbc.consumer.ArrayConsumer;
import org.apache.arrow.adapter.jdbc.consumer.BigIntConsumer;
import org.apache.arrow.adapter.jdbc.consumer.BinaryConsumer;
@@ -80,6 +82,7 @@ import org.apache.arrow.vector.VarCharVector;
import org.apache.arrow.vector.VectorSchemaRoot;
import org.apache.arrow.vector.complex.ListVector;
import org.apache.arrow.vector.complex.MapVector;
+import org.apache.arrow.vector.extension.OpaqueType;
import org.apache.arrow.vector.types.DateUnit;
import org.apache.arrow.vector.types.TimeUnit;
import org.apache.arrow.vector.types.pojo.ArrowType;
@@ -216,11 +219,28 @@ public class JdbcToArrowUtils {
case Types.STRUCT:
return new ArrowType.Struct();
default:
- // no-op, shouldn't get here
throw new UnsupportedOperationException("Unmapped JDBC type: " +
fieldInfo.getJdbcType());
}
}
+ /**
+ * Wrap a JDBC to Arrow type converter such that {@link
UnsupportedOperationException} becomes
+ * {@link OpaqueType}.
+ *
+ * @param typeConverter The type converter to wrap.
+ * @param vendorName The database name to report as the Opaque type's vendor
name.
+ */
+ public static Function<JdbcFieldInfo, ArrowType>
reportUnsupportedTypesAsOpaque(
+ Function<JdbcFieldInfo, ArrowType> typeConverter, String vendorName) {
+ return (final JdbcFieldInfo fieldInfo) -> {
+ try {
+ return typeConverter.apply(fieldInfo);
+ } catch (UnsupportedOperationException e) {
+ return new OpaqueType(MinorType.NULL.getType(),
fieldInfo.getTypeName(), vendorName);
+ }
+ };
+ }
+
/**
* Create Arrow {@link Schema} object for the given JDBC {@link
java.sql.ResultSetMetaData}.
*
diff --git
a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java
b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java
index 5537e1acba..c246bb2bec 100644
---
a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java
+++
b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java
@@ -32,19 +32,27 @@ import static
org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertTimeVect
import static
org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertTinyIntVectorValues;
import static
org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertVarBinaryVectorValues;
import static
org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertVarcharVectorValues;
+import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
+import static org.junit.jupiter.api.Assertions.assertEquals;
import java.io.IOException;
+import java.sql.DriverManager;
+import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
+import java.sql.Statement;
import java.util.Arrays;
import java.util.Calendar;
+import java.util.function.Function;
import java.util.stream.Stream;
import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest;
+import org.apache.arrow.adapter.jdbc.JdbcFieldInfo;
import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig;
import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder;
import org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper;
import org.apache.arrow.adapter.jdbc.JdbcToArrowUtils;
import org.apache.arrow.adapter.jdbc.Table;
+import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.BigIntVector;
import org.apache.arrow.vector.BitVector;
@@ -62,7 +70,12 @@ import org.apache.arrow.vector.VarBinaryVector;
import org.apache.arrow.vector.VarCharVector;
import org.apache.arrow.vector.VectorSchemaRoot;
import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.extension.OpaqueType;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
@@ -189,6 +202,44 @@ public class JdbcToArrowDataTypesTest extends
AbstractJdbcToArrowTest {
JdbcToArrowTestHelper.assertFieldMetadataMatchesResultSetMetadata(rsmd,
schema);
}
+ @Test
+ void testOpaqueType() throws SQLException, ClassNotFoundException {
+ try (BufferAllocator allocator = new RootAllocator()) {
+ String url = "jdbc:h2:mem:JdbcToArrowTest";
+ String driver = "org.h2.Driver";
+ Class.forName(driver);
+ conn = DriverManager.getConnection(url);
+ try (Statement stmt = conn.createStatement()) {
+ stmt.executeUpdate("CREATE TABLE unknowntype (a GEOMETRY, b INT)");
+ }
+
+ String query = "SELECT * FROM unknowntype";
+ Calendar calendar = Calendar.getInstance();
+ Function<JdbcFieldInfo, ArrowType> typeConverter =
+ (field) -> JdbcToArrowUtils.getArrowTypeFromJdbcType(field,
calendar);
+ JdbcToArrowConfig config =
+ new JdbcToArrowConfigBuilder()
+ .setAllocator(allocator)
+ .setJdbcToArrowTypeConverter(
+
JdbcToArrowUtils.reportUnsupportedTypesAsOpaque(typeConverter, "H2"))
+ .build();
+ Schema schema;
+ try (Statement stmt = conn.createStatement();
+ ResultSet rs = stmt.executeQuery(query)) {
+ schema =
+ assertDoesNotThrow(() ->
JdbcToArrowUtils.jdbcToArrowSchema(rs.getMetaData(), config));
+ }
+
+ Schema expected =
+ new Schema(
+ Arrays.asList(
+ Field.nullable(
+ "A", new OpaqueType(Types.MinorType.NULL.getType(),
"GEOMETRY", "H2")),
+ Field.nullable("B", Types.MinorType.INT.getType())));
+ assertEquals(expected, schema);
+ }
+ }
+
/**
* This method calls the assert methods for various DataSets.
*
diff --git a/java/vector/src/main/java/module-info.java
b/java/vector/src/main/java/module-info.java
index fdea2bd067..8ba1b3579e 100644
--- a/java/vector/src/main/java/module-info.java
+++ b/java/vector/src/main/java/module-info.java
@@ -25,6 +25,7 @@ module org.apache.arrow.vector {
exports org.apache.arrow.vector.complex.writer;
exports org.apache.arrow.vector.compression;
exports org.apache.arrow.vector.dictionary;
+ exports org.apache.arrow.vector.extension;
exports org.apache.arrow.vector.holders;
exports org.apache.arrow.vector.ipc;
exports org.apache.arrow.vector.ipc.message;
diff --git
a/java/vector/src/main/java/org/apache/arrow/vector/extension/InvalidExtensionMetadataException.java
b/java/vector/src/main/java/org/apache/arrow/vector/extension/InvalidExtensionMetadataException.java
new file mode 100644
index 0000000000..2349a7d4bc
--- /dev/null
+++
b/java/vector/src/main/java/org/apache/arrow/vector/extension/InvalidExtensionMetadataException.java
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.vector.extension;
+
+/** The extension metadata was malformed. */
+public class InvalidExtensionMetadataException extends RuntimeException {
+ public InvalidExtensionMetadataException(String message) {
+ super(message);
+ }
+
+ public InvalidExtensionMetadataException(String message, Throwable cause) {
+ super(message, cause);
+ }
+}
diff --git
a/java/vector/src/main/java/org/apache/arrow/vector/extension/OpaqueType.java
b/java/vector/src/main/java/org/apache/arrow/vector/extension/OpaqueType.java
new file mode 100644
index 0000000000..a0e898a543
--- /dev/null
+++
b/java/vector/src/main/java/org/apache/arrow/vector/extension/OpaqueType.java
@@ -0,0 +1,393 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.vector.extension;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.node.ObjectNode;
+import java.util.Collections;
+import java.util.Objects;
+import java.util.concurrent.atomic.AtomicBoolean;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.BitVector;
+import org.apache.arrow.vector.DateDayVector;
+import org.apache.arrow.vector.DateMilliVector;
+import org.apache.arrow.vector.Decimal256Vector;
+import org.apache.arrow.vector.DecimalVector;
+import org.apache.arrow.vector.DurationVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.FixedSizeBinaryVector;
+import org.apache.arrow.vector.Float2Vector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.IntervalDayVector;
+import org.apache.arrow.vector.IntervalMonthDayNanoVector;
+import org.apache.arrow.vector.IntervalYearVector;
+import org.apache.arrow.vector.LargeVarBinaryVector;
+import org.apache.arrow.vector.LargeVarCharVector;
+import org.apache.arrow.vector.NullVector;
+import org.apache.arrow.vector.TimeMicroVector;
+import org.apache.arrow.vector.TimeMilliVector;
+import org.apache.arrow.vector.TimeNanoVector;
+import org.apache.arrow.vector.TimeSecVector;
+import org.apache.arrow.vector.TimeStampMicroTZVector;
+import org.apache.arrow.vector.TimeStampMicroVector;
+import org.apache.arrow.vector.TimeStampMilliTZVector;
+import org.apache.arrow.vector.TimeStampMilliVector;
+import org.apache.arrow.vector.TimeStampNanoTZVector;
+import org.apache.arrow.vector.TimeStampNanoVector;
+import org.apache.arrow.vector.TimeStampSecTZVector;
+import org.apache.arrow.vector.TimeStampSecVector;
+import org.apache.arrow.vector.VarBinaryVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.ViewVarBinaryVector;
+import org.apache.arrow.vector.ViewVarCharVector;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.ExtensionTypeRegistry;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+
+/**
+ * Opaque is a placeholder for a type from an external (usually non-Arrow)
system that could not be
+ * interpreted.
+ */
+public class OpaqueType extends ArrowType.ExtensionType {
+ private static final AtomicBoolean registered = new AtomicBoolean(false);
+ public static final String EXTENSION_NAME = "arrow.opaque";
+ private final ArrowType storageType;
+ private final String typeName;
+ private final String vendorName;
+
+ /** Register the extension type so it can be used globally. */
+ public static void ensureRegistered() {
+ if (!registered.getAndSet(true)) {
+ // The values don't matter, we just need an instance
+ ExtensionTypeRegistry.register(new
OpaqueType(Types.MinorType.NULL.getType(), "", ""));
+ }
+ }
+
+ /**
+ * Create a new type instance.
+ *
+ * @param storageType The underlying Arrow type.
+ * @param typeName The name of the unknown type.
+ * @param vendorName The name of the originating system of the unknown type.
+ */
+ public OpaqueType(ArrowType storageType, String typeName, String vendorName)
{
+ this.storageType = Objects.requireNonNull(storageType, "storageType");
+ this.typeName = Objects.requireNonNull(typeName, "typeName");
+ this.vendorName = Objects.requireNonNull(vendorName, "vendorName");
+ }
+
+ @Override
+ public ArrowType storageType() {
+ return storageType;
+ }
+
+ public String typeName() {
+ return typeName;
+ }
+
+ public String vendorName() {
+ return vendorName;
+ }
+
+ @Override
+ public String extensionName() {
+ return EXTENSION_NAME;
+ }
+
+ @Override
+ public boolean extensionEquals(ExtensionType other) {
+ return other != null
+ && EXTENSION_NAME.equals(other.extensionName())
+ && other instanceof OpaqueType
+ && storageType.equals(other.storageType())
+ && typeName.equals(((OpaqueType) other).typeName())
+ && vendorName.equals(((OpaqueType) other).vendorName());
+ }
+
+ @Override
+ public String serialize() {
+ ObjectMapper mapper = new ObjectMapper();
+ ObjectNode object = mapper.createObjectNode();
+ object.put("type_name", typeName);
+ object.put("vendor_name", vendorName);
+ try {
+ return mapper.writeValueAsString(object);
+ } catch (JsonProcessingException e) {
+ throw new RuntimeException("Could not serialize " + this, e);
+ }
+ }
+
+ @Override
+ public ArrowType deserialize(ArrowType storageType, String serializedData) {
+ ObjectMapper mapper = new ObjectMapper();
+ JsonNode object;
+ try {
+ object = mapper.readTree(serializedData);
+ } catch (JsonProcessingException e) {
+ throw new InvalidExtensionMetadataException("Extension metadata is
invalid", e);
+ }
+ JsonNode typeName = object.get("type_name");
+ JsonNode vendorName = object.get("vendor_name");
+ if (typeName == null) {
+ throw new InvalidExtensionMetadataException("typeName is missing");
+ }
+ if (vendorName == null) {
+ throw new InvalidExtensionMetadataException("vendorName is missing");
+ }
+ if (!typeName.isTextual()) {
+ throw new InvalidExtensionMetadataException("typeName should be string,
was " + typeName);
+ }
+ if (!vendorName.isTextual()) {
+ throw new InvalidExtensionMetadataException("vendorName should be
string, was " + vendorName);
+ }
+ return new OpaqueType(storageType, typeName.asText(), vendorName.asText());
+ }
+
+ @Override
+ public FieldVector getNewVector(String name, FieldType fieldType,
BufferAllocator allocator) {
+ // XXX: fieldType is supposed to be the extension type
+ final Field field = new Field(name, fieldType, Collections.emptyList());
+ final FieldVector underlyingVector =
+ storageType.accept(new UnderlyingVectorTypeVisitor(name, allocator));
+ return new OpaqueVector(field, allocator, underlyingVector);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(super.hashCode(), storageType, typeName, vendorName);
+ }
+
+ @Override
+ public String toString() {
+ return "OpaqueType("
+ + storageType
+ + ", typeName='"
+ + typeName
+ + '\''
+ + ", vendorName='"
+ + vendorName
+ + '\''
+ + ')';
+ }
+
+ private static class UnderlyingVectorTypeVisitor implements
ArrowTypeVisitor<FieldVector> {
+ private final String name;
+ private final BufferAllocator allocator;
+
+ UnderlyingVectorTypeVisitor(String name, BufferAllocator allocator) {
+ this.name = name;
+ this.allocator = allocator;
+ }
+
+ @Override
+ public FieldVector visit(Null type) {
+ return new NullVector(name);
+ }
+
+ private RuntimeException unsupported(ArrowType type) {
+ throw new UnsupportedOperationException(
+ "OpaqueType#getUnderlyingVector is not supported for storage type: "
+ type);
+ }
+
+ @Override
+ public FieldVector visit(Struct type) {
+ throw unsupported(type);
+ }
+
+ @Override
+ public FieldVector visit(List type) {
+ throw unsupported(type);
+ }
+
+ @Override
+ public FieldVector visit(LargeList type) {
+ throw unsupported(type);
+ }
+
+ @Override
+ public FieldVector visit(FixedSizeList type) {
+ throw unsupported(type);
+ }
+
+ @Override
+ public FieldVector visit(Union type) {
+ throw unsupported(type);
+ }
+
+ @Override
+ public FieldVector visit(Map type) {
+ throw unsupported(type);
+ }
+
+ @Override
+ public FieldVector visit(Int type) {
+ return new IntVector(name, allocator);
+ }
+
+ @Override
+ public FieldVector visit(FloatingPoint type) {
+ switch (type.getPrecision()) {
+ case HALF:
+ return new Float2Vector(name, allocator);
+ case SINGLE:
+ return new Float4Vector(name, allocator);
+ case DOUBLE:
+ return new Float8Vector(name, allocator);
+ default:
+ throw unsupported(type);
+ }
+ }
+
+ @Override
+ public FieldVector visit(Utf8 type) {
+ return new VarCharVector(name, allocator);
+ }
+
+ @Override
+ public FieldVector visit(Utf8View type) {
+ return new ViewVarCharVector(name, allocator);
+ }
+
+ @Override
+ public FieldVector visit(LargeUtf8 type) {
+ return new LargeVarCharVector(name, allocator);
+ }
+
+ @Override
+ public FieldVector visit(Binary type) {
+ return new VarBinaryVector(name, allocator);
+ }
+
+ @Override
+ public FieldVector visit(BinaryView type) {
+ return new ViewVarBinaryVector(name, allocator);
+ }
+
+ @Override
+ public FieldVector visit(LargeBinary type) {
+ return new LargeVarBinaryVector(name, allocator);
+ }
+
+ @Override
+ public FieldVector visit(FixedSizeBinary type) {
+ return new FixedSizeBinaryVector(Field.nullable(name, type), allocator);
+ }
+
+ @Override
+ public FieldVector visit(Bool type) {
+ return new BitVector(name, allocator);
+ }
+
+ @Override
+ public FieldVector visit(Decimal type) {
+ if (type.getBitWidth() == 128) {
+ return new DecimalVector(Field.nullable(name, type), allocator);
+ } else if (type.getBitWidth() == 256) {
+ return new Decimal256Vector(Field.nullable(name, type), allocator);
+ }
+ throw unsupported(type);
+ }
+
+ @Override
+ public FieldVector visit(Date type) {
+ switch (type.getUnit()) {
+ case DAY:
+ return new DateDayVector(name, allocator);
+ case MILLISECOND:
+ return new DateMilliVector(name, allocator);
+ default:
+ throw unsupported(type);
+ }
+ }
+
+ @Override
+ public FieldVector visit(Time type) {
+ switch (type.getUnit()) {
+ case SECOND:
+ return new TimeSecVector(name, allocator);
+ case MILLISECOND:
+ return new TimeMilliVector(name, allocator);
+ case MICROSECOND:
+ return new TimeMicroVector(name, allocator);
+ case NANOSECOND:
+ return new TimeNanoVector(name, allocator);
+ default:
+ throw unsupported(type);
+ }
+ }
+
+ @Override
+ public FieldVector visit(Timestamp type) {
+ if (type.getTimezone() == null || type.getTimezone().isEmpty()) {
+ switch (type.getUnit()) {
+ case SECOND:
+ return new TimeStampSecVector(Field.nullable(name, type),
allocator);
+ case MILLISECOND:
+ return new TimeStampMilliVector(Field.nullable(name, type),
allocator);
+ case MICROSECOND:
+ return new TimeStampMicroVector(Field.nullable(name, type),
allocator);
+ case NANOSECOND:
+ return new TimeStampNanoVector(Field.nullable(name, type),
allocator);
+ default:
+ throw unsupported(type);
+ }
+ }
+ switch (type.getUnit()) {
+ case SECOND:
+ return new TimeStampSecTZVector(Field.nullable(name, type),
allocator);
+ case MILLISECOND:
+ return new TimeStampMilliTZVector(Field.nullable(name, type),
allocator);
+ case MICROSECOND:
+ return new TimeStampMicroTZVector(Field.nullable(name, type),
allocator);
+ case NANOSECOND:
+ return new TimeStampNanoTZVector(Field.nullable(name, type),
allocator);
+ default:
+ throw unsupported(type);
+ }
+ }
+
+ @Override
+ public FieldVector visit(Interval type) {
+ switch (type.getUnit()) {
+ case YEAR_MONTH:
+ return new IntervalYearVector(name, allocator);
+ case DAY_TIME:
+ return new IntervalDayVector(name, allocator);
+ case MONTH_DAY_NANO:
+ return new IntervalMonthDayNanoVector(name, allocator);
+ default:
+ throw unsupported(type);
+ }
+ }
+
+ @Override
+ public FieldVector visit(Duration type) {
+ return new DurationVector(Field.nullable(name, type), allocator);
+ }
+
+ @Override
+ public FieldVector visit(ListView type) {
+ throw unsupported(type);
+ }
+ }
+}
diff --git
a/java/vector/src/main/java/org/apache/arrow/vector/extension/OpaqueVector.java
b/java/vector/src/main/java/org/apache/arrow/vector/extension/OpaqueVector.java
new file mode 100644
index 0000000000..00eb9a984e
--- /dev/null
+++
b/java/vector/src/main/java/org/apache/arrow/vector/extension/OpaqueVector.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.vector.extension;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.vector.ExtensionTypeVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.ValueIterableVector;
+import org.apache.arrow.vector.types.pojo.Field;
+
+/**
+ * Opaque is a wrapper for (usually binary) data from an external (often
non-Arrow) system that
+ * could not be interpreted.
+ */
+public class OpaqueVector extends ExtensionTypeVector<FieldVector>
+ implements ValueIterableVector<Object> {
+ private final Field field;
+
+ public OpaqueVector(Field field, BufferAllocator allocator, FieldVector
underlyingVector) {
+ super(field, allocator, underlyingVector);
+ this.field = field;
+ }
+
+ @Override
+ public Field getField() {
+ return field;
+ }
+
+ @Override
+ public Object getObject(int index) {
+ return getUnderlyingVector().getObject(index);
+ }
+
+ @Override
+ public int hashCode(int index) {
+ return hashCode(index, null);
+ }
+
+ @Override
+ public int hashCode(int index, ArrowBufHasher hasher) {
+ return getUnderlyingVector().hashCode(index, hasher);
+ }
+}
diff --git
a/java/vector/src/test/java/org/apache/arrow/vector/TestOpaqueExtensionType.java
b/java/vector/src/test/java/org/apache/arrow/vector/TestOpaqueExtensionType.java
new file mode 100644
index 0000000000..9fd9b580b3
--- /dev/null
+++
b/java/vector/src/test/java/org/apache/arrow/vector/TestOpaqueExtensionType.java
@@ -0,0 +1,188 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.vector;
+
+import static org.junit.jupiter.api.Assertions.assertArrayEquals;
+import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertInstanceOf;
+import static org.junit.jupiter.api.Assertions.assertNotEquals;
+import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Collections;
+import java.util.stream.Stream;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.extension.InvalidExtensionMetadataException;
+import org.apache.arrow.vector.extension.OpaqueType;
+import org.apache.arrow.vector.extension.OpaqueVector;
+import org.apache.arrow.vector.ipc.ArrowStreamReader;
+import org.apache.arrow.vector.ipc.ArrowStreamWriter;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.MethodSource;
+import org.junit.jupiter.params.provider.ValueSource;
+
+class TestOpaqueExtensionType {
+ BufferAllocator allocator;
+
+ @BeforeEach
+ void beforeEach() {
+ allocator = new RootAllocator();
+ }
+
+ @AfterEach
+ void afterEach() {
+ allocator.close();
+ }
+
+ @ParameterizedTest
+ @ValueSource(
+ strings = {
+ "{\"type_name\": \"\", \"vendor_name\": \"\"}",
+ "{\"type_name\": \"\", \"vendor_name\": \"\", \"extra_field\": 42}",
+ "{\"type_name\": \"array\", \"vendor_name\": \"postgresql\"}",
+ "{\"type_name\": \"foo.bar\", \"vendor_name\": \"postgresql\"}",
+ })
+ void testDeserializeValid(String serialized) {
+ ArrowType storageType = Types.MinorType.NULL.getType();
+ OpaqueType type = new OpaqueType(storageType, "", "");
+
+ assertDoesNotThrow(() -> type.deserialize(storageType, serialized));
+ }
+
+ @ParameterizedTest
+ @ValueSource(
+ strings = {
+ "",
+ "{\"type_name\": \"\"}",
+ "{\"vendor_name\": \"\"}",
+ "{\"type_name\": null, \"vendor_name\": \"\"}",
+ "{\"type_name\": \"\", \"vendor_name\": null}",
+ "{\"type_name\": 42, \"vendor_name\": \"\"}",
+ "{\"type_name\": \"\", \"vendor_name\": 42}",
+ "{\"type_name\": \"\", \"vendor_name\": \"\"",
+ })
+ void testDeserializeInvalid(String serialized) {
+ ArrowType storageType = Types.MinorType.NULL.getType();
+ OpaqueType type = new OpaqueType(storageType, "", "");
+
+ assertThrows(
+ InvalidExtensionMetadataException.class, () ->
type.deserialize(storageType, serialized));
+ }
+
+ @ParameterizedTest
+ @MethodSource("storageType")
+ void testRoundTrip(ArrowType storageType) {
+ OpaqueType type = new OpaqueType(storageType, "foo", "bar");
+ assertEquals(storageType, type.storageType());
+ assertEquals("foo", type.typeName());
+ if (storageType.isComplex()) {
+ assertThrows(
+ UnsupportedOperationException.class,
+ () -> type.getNewVector("name", FieldType.nullable(type),
allocator));
+ } else {
+ assertDoesNotThrow(() -> type.getNewVector("name",
FieldType.nullable(type), allocator))
+ .close();
+ }
+
+ String serialized = assertDoesNotThrow(type::serialize);
+ OpaqueType holder = new OpaqueType(Types.MinorType.NULL.getType(), "", "");
+ OpaqueType deserialized = (OpaqueType) holder.deserialize(storageType,
serialized);
+ assertEquals(type, deserialized);
+ assertNotEquals(holder, deserialized);
+ }
+
+ @ParameterizedTest
+ @MethodSource("storageType")
+ void testIpcRoundTrip(ArrowType storageType) {
+ OpaqueType.ensureRegistered();
+
+ OpaqueType type = new OpaqueType(storageType, "foo", "bar");
+ Schema schema = new
Schema(Collections.singletonList(Field.nullable("unknown", type)));
+ byte[] serialized = schema.serializeAsMessage();
+ Schema deseralized =
Schema.deserializeMessage(ByteBuffer.wrap(serialized));
+ assertEquals(schema, deseralized);
+ }
+
+ @Test
+ void testVectorType() throws IOException {
+ OpaqueType.ensureRegistered();
+
+ ArrowType storageType = Types.MinorType.VARBINARY.getType();
+ OpaqueType type = new OpaqueType(storageType, "foo", "bar");
+ try (FieldVector vector = type.getNewVector("field",
FieldType.nullable(type), allocator)) {
+ OpaqueVector opaque = assertInstanceOf(OpaqueVector.class, vector);
+ assertEquals("field", opaque.getField().getName());
+ assertEquals(type, opaque.getField().getType());
+
+ VarBinaryVector binary =
+ assertInstanceOf(VarBinaryVector.class,
opaque.getUnderlyingVector());
+ binary.setSafe(0, new byte[] {0, 1, 2, 3});
+ binary.setNull(1);
+ opaque.setValueCount(2);
+
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ try (VectorSchemaRoot root = new
VectorSchemaRoot(Collections.singletonList(opaque));
+ ArrowStreamWriter writer =
+ new ArrowStreamWriter(root, new
DictionaryProvider.MapDictionaryProvider(), baos)) {
+ writer.start();
+ writer.writeBatch();
+ }
+
+ try (ArrowStreamReader reader =
+ new ArrowStreamReader(new ByteArrayInputStream(baos.toByteArray()),
allocator)) {
+ assertTrue(reader.loadNextBatch());
+ VectorSchemaRoot root = reader.getVectorSchemaRoot();
+ assertEquals(2, root.getRowCount());
+ assertEquals(new Schema(Collections.singletonList(opaque.getField())),
root.getSchema());
+
+ OpaqueVector actual = assertInstanceOf(OpaqueVector.class,
root.getVector("field"));
+ assertFalse(actual.isNull(0));
+ assertTrue(actual.isNull(1));
+ assertArrayEquals(new byte[] {0, 1, 2, 3}, (byte[])
actual.getObject(0));
+ assertNull(actual.getObject(1));
+ }
+ }
+ }
+
+ static Stream<ArrowType> storageType() {
+ return Stream.of(
+ Types.MinorType.NULL.getType(),
+ Types.MinorType.BIGINT.getType(),
+ Types.MinorType.BIT.getType(),
+ Types.MinorType.VARBINARY.getType(),
+ Types.MinorType.VARCHAR.getType(),
+ Types.MinorType.LIST.getType(),
+ new ArrowType.Decimal(12, 4, 128));
+ }
+}