This is an automated email from the ASF dual-hosted git repository.

lidavidm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new f2f3e95605 GH-43456: [Java] Add Opaque canonical extension type 
(#43460)
f2f3e95605 is described below

commit f2f3e95605537e8ffea9886d5c4ee9bbf5f949fd
Author: David Li <[email protected]>
AuthorDate: Fri Aug 2 09:55:01 2024 +0900

    GH-43456: [Java] Add Opaque canonical extension type (#43460)
    
    ### Rationale for this change
    
    Add the newly ratified extension type.
    
    ### What changes are included in this PR?
    
    The Java implementation only.
    
    ### Are these changes tested?
    
    Yes
    
    ### Are there any user-facing changes?
    
    No.
    * GitHub Issue: #43456
    
    Authored-by: David Li <[email protected]>
    Signed-off-by: David Li <[email protected]>
---
 .../adapter/jdbc/JdbcToArrowConfigBuilder.java     |   2 +
 .../arrow/adapter/jdbc/JdbcToArrowUtils.java       |  22 +-
 .../adapter/jdbc/h2/JdbcToArrowDataTypesTest.java  |  51 +++
 java/vector/src/main/java/module-info.java         |   1 +
 .../InvalidExtensionMetadataException.java         |  28 ++
 .../apache/arrow/vector/extension/OpaqueType.java  | 393 +++++++++++++++++++++
 .../arrow/vector/extension/OpaqueVector.java       |  58 +++
 .../arrow/vector/TestOpaqueExtensionType.java      | 188 ++++++++++
 8 files changed, 742 insertions(+), 1 deletion(-)

diff --git 
a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java
 
b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java
index 783a373c6d..ea9ffe55d3 100644
--- 
a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java
+++ 
b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java
@@ -211,6 +211,8 @@ public class JdbcToArrowConfigBuilder {
    *
    * <p>Defaults to wrapping {@link 
JdbcToArrowUtils#getArrowTypeFromJdbcType(JdbcFieldInfo,
    * Calendar)}.
+   *
+   * @see JdbcToArrowUtils#reportUnsupportedTypesAsUnknown(Function)
    */
   public JdbcToArrowConfigBuilder setJdbcToArrowTypeConverter(
       Function<JdbcFieldInfo, ArrowType> jdbcToArrowTypeConverter) {
diff --git 
a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
 
b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
index 8397d4c9e0..aecb734a8b 100644
--- 
a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
+++ 
b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
@@ -18,6 +18,7 @@ package org.apache.arrow.adapter.jdbc;
 
 import static org.apache.arrow.vector.types.FloatingPointPrecision.DOUBLE;
 import static org.apache.arrow.vector.types.FloatingPointPrecision.SINGLE;
+import static org.apache.arrow.vector.types.Types.MinorType;
 
 import java.io.IOException;
 import java.math.RoundingMode;
@@ -37,6 +38,7 @@ import java.util.List;
 import java.util.Locale;
 import java.util.Map;
 import java.util.TimeZone;
+import java.util.function.Function;
 import org.apache.arrow.adapter.jdbc.consumer.ArrayConsumer;
 import org.apache.arrow.adapter.jdbc.consumer.BigIntConsumer;
 import org.apache.arrow.adapter.jdbc.consumer.BinaryConsumer;
@@ -80,6 +82,7 @@ import org.apache.arrow.vector.VarCharVector;
 import org.apache.arrow.vector.VectorSchemaRoot;
 import org.apache.arrow.vector.complex.ListVector;
 import org.apache.arrow.vector.complex.MapVector;
+import org.apache.arrow.vector.extension.OpaqueType;
 import org.apache.arrow.vector.types.DateUnit;
 import org.apache.arrow.vector.types.TimeUnit;
 import org.apache.arrow.vector.types.pojo.ArrowType;
@@ -216,11 +219,28 @@ public class JdbcToArrowUtils {
       case Types.STRUCT:
         return new ArrowType.Struct();
       default:
-        // no-op, shouldn't get here
         throw new UnsupportedOperationException("Unmapped JDBC type: " + 
fieldInfo.getJdbcType());
     }
   }
 
+  /**
+   * Wrap a JDBC to Arrow type converter such that {@link 
UnsupportedOperationException} becomes
+   * {@link OpaqueType}.
+   *
+   * @param typeConverter The type converter to wrap.
+   * @param vendorName The database name to report as the Opaque type's vendor 
name.
+   */
+  public static Function<JdbcFieldInfo, ArrowType> 
reportUnsupportedTypesAsOpaque(
+      Function<JdbcFieldInfo, ArrowType> typeConverter, String vendorName) {
+    return (final JdbcFieldInfo fieldInfo) -> {
+      try {
+        return typeConverter.apply(fieldInfo);
+      } catch (UnsupportedOperationException e) {
+        return new OpaqueType(MinorType.NULL.getType(), 
fieldInfo.getTypeName(), vendorName);
+      }
+    };
+  }
+
   /**
    * Create Arrow {@link Schema} object for the given JDBC {@link 
java.sql.ResultSetMetaData}.
    *
diff --git 
a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java
 
b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java
index 5537e1acba..c246bb2bec 100644
--- 
a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java
+++ 
b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java
@@ -32,19 +32,27 @@ import static 
org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertTimeVect
 import static 
org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertTinyIntVectorValues;
 import static 
org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertVarBinaryVectorValues;
 import static 
org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertVarcharVectorValues;
+import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
+import static org.junit.jupiter.api.Assertions.assertEquals;
 
 import java.io.IOException;
+import java.sql.DriverManager;
+import java.sql.ResultSet;
 import java.sql.ResultSetMetaData;
 import java.sql.SQLException;
+import java.sql.Statement;
 import java.util.Arrays;
 import java.util.Calendar;
+import java.util.function.Function;
 import java.util.stream.Stream;
 import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest;
+import org.apache.arrow.adapter.jdbc.JdbcFieldInfo;
 import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig;
 import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder;
 import org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper;
 import org.apache.arrow.adapter.jdbc.JdbcToArrowUtils;
 import org.apache.arrow.adapter.jdbc.Table;
+import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
 import org.apache.arrow.vector.BigIntVector;
 import org.apache.arrow.vector.BitVector;
@@ -62,7 +70,12 @@ import org.apache.arrow.vector.VarBinaryVector;
 import org.apache.arrow.vector.VarCharVector;
 import org.apache.arrow.vector.VectorSchemaRoot;
 import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.extension.OpaqueType;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.jupiter.api.Test;
 import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.Arguments;
 import org.junit.jupiter.params.provider.MethodSource;
@@ -189,6 +202,44 @@ public class JdbcToArrowDataTypesTest extends 
AbstractJdbcToArrowTest {
     JdbcToArrowTestHelper.assertFieldMetadataMatchesResultSetMetadata(rsmd, 
schema);
   }
 
+  @Test
+  void testOpaqueType() throws SQLException, ClassNotFoundException {
+    try (BufferAllocator allocator = new RootAllocator()) {
+      String url = "jdbc:h2:mem:JdbcToArrowTest";
+      String driver = "org.h2.Driver";
+      Class.forName(driver);
+      conn = DriverManager.getConnection(url);
+      try (Statement stmt = conn.createStatement()) {
+        stmt.executeUpdate("CREATE TABLE unknowntype (a GEOMETRY, b INT)");
+      }
+
+      String query = "SELECT * FROM unknowntype";
+      Calendar calendar = Calendar.getInstance();
+      Function<JdbcFieldInfo, ArrowType> typeConverter =
+          (field) -> JdbcToArrowUtils.getArrowTypeFromJdbcType(field, 
calendar);
+      JdbcToArrowConfig config =
+          new JdbcToArrowConfigBuilder()
+              .setAllocator(allocator)
+              .setJdbcToArrowTypeConverter(
+                  
JdbcToArrowUtils.reportUnsupportedTypesAsOpaque(typeConverter, "H2"))
+              .build();
+      Schema schema;
+      try (Statement stmt = conn.createStatement();
+          ResultSet rs = stmt.executeQuery(query)) {
+        schema =
+            assertDoesNotThrow(() -> 
JdbcToArrowUtils.jdbcToArrowSchema(rs.getMetaData(), config));
+      }
+
+      Schema expected =
+          new Schema(
+              Arrays.asList(
+                  Field.nullable(
+                      "A", new OpaqueType(Types.MinorType.NULL.getType(), 
"GEOMETRY", "H2")),
+                  Field.nullable("B", Types.MinorType.INT.getType())));
+      assertEquals(expected, schema);
+    }
+  }
+
   /**
    * This method calls the assert methods for various DataSets.
    *
diff --git a/java/vector/src/main/java/module-info.java 
b/java/vector/src/main/java/module-info.java
index fdea2bd067..8ba1b3579e 100644
--- a/java/vector/src/main/java/module-info.java
+++ b/java/vector/src/main/java/module-info.java
@@ -25,6 +25,7 @@ module org.apache.arrow.vector {
   exports org.apache.arrow.vector.complex.writer;
   exports org.apache.arrow.vector.compression;
   exports org.apache.arrow.vector.dictionary;
+  exports org.apache.arrow.vector.extension;
   exports org.apache.arrow.vector.holders;
   exports org.apache.arrow.vector.ipc;
   exports org.apache.arrow.vector.ipc.message;
diff --git 
a/java/vector/src/main/java/org/apache/arrow/vector/extension/InvalidExtensionMetadataException.java
 
b/java/vector/src/main/java/org/apache/arrow/vector/extension/InvalidExtensionMetadataException.java
new file mode 100644
index 0000000000..2349a7d4bc
--- /dev/null
+++ 
b/java/vector/src/main/java/org/apache/arrow/vector/extension/InvalidExtensionMetadataException.java
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.vector.extension;
+
+/** The extension metadata was malformed. */
+public class InvalidExtensionMetadataException extends RuntimeException {
+  public InvalidExtensionMetadataException(String message) {
+    super(message);
+  }
+
+  public InvalidExtensionMetadataException(String message, Throwable cause) {
+    super(message, cause);
+  }
+}
diff --git 
a/java/vector/src/main/java/org/apache/arrow/vector/extension/OpaqueType.java 
b/java/vector/src/main/java/org/apache/arrow/vector/extension/OpaqueType.java
new file mode 100644
index 0000000000..a0e898a543
--- /dev/null
+++ 
b/java/vector/src/main/java/org/apache/arrow/vector/extension/OpaqueType.java
@@ -0,0 +1,393 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.vector.extension;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.node.ObjectNode;
+import java.util.Collections;
+import java.util.Objects;
+import java.util.concurrent.atomic.AtomicBoolean;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.BitVector;
+import org.apache.arrow.vector.DateDayVector;
+import org.apache.arrow.vector.DateMilliVector;
+import org.apache.arrow.vector.Decimal256Vector;
+import org.apache.arrow.vector.DecimalVector;
+import org.apache.arrow.vector.DurationVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.FixedSizeBinaryVector;
+import org.apache.arrow.vector.Float2Vector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.IntervalDayVector;
+import org.apache.arrow.vector.IntervalMonthDayNanoVector;
+import org.apache.arrow.vector.IntervalYearVector;
+import org.apache.arrow.vector.LargeVarBinaryVector;
+import org.apache.arrow.vector.LargeVarCharVector;
+import org.apache.arrow.vector.NullVector;
+import org.apache.arrow.vector.TimeMicroVector;
+import org.apache.arrow.vector.TimeMilliVector;
+import org.apache.arrow.vector.TimeNanoVector;
+import org.apache.arrow.vector.TimeSecVector;
+import org.apache.arrow.vector.TimeStampMicroTZVector;
+import org.apache.arrow.vector.TimeStampMicroVector;
+import org.apache.arrow.vector.TimeStampMilliTZVector;
+import org.apache.arrow.vector.TimeStampMilliVector;
+import org.apache.arrow.vector.TimeStampNanoTZVector;
+import org.apache.arrow.vector.TimeStampNanoVector;
+import org.apache.arrow.vector.TimeStampSecTZVector;
+import org.apache.arrow.vector.TimeStampSecVector;
+import org.apache.arrow.vector.VarBinaryVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.ViewVarBinaryVector;
+import org.apache.arrow.vector.ViewVarCharVector;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.ExtensionTypeRegistry;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+
+/**
+ * Opaque is a placeholder for a type from an external (usually non-Arrow) 
system that could not be
+ * interpreted.
+ */
+public class OpaqueType extends ArrowType.ExtensionType {
+  private static final AtomicBoolean registered = new AtomicBoolean(false);
+  public static final String EXTENSION_NAME = "arrow.opaque";
+  private final ArrowType storageType;
+  private final String typeName;
+  private final String vendorName;
+
+  /** Register the extension type so it can be used globally. */
+  public static void ensureRegistered() {
+    if (!registered.getAndSet(true)) {
+      // The values don't matter, we just need an instance
+      ExtensionTypeRegistry.register(new 
OpaqueType(Types.MinorType.NULL.getType(), "", ""));
+    }
+  }
+
+  /**
+   * Create a new type instance.
+   *
+   * @param storageType The underlying Arrow type.
+   * @param typeName The name of the unknown type.
+   * @param vendorName The name of the originating system of the unknown type.
+   */
+  public OpaqueType(ArrowType storageType, String typeName, String vendorName) 
{
+    this.storageType = Objects.requireNonNull(storageType, "storageType");
+    this.typeName = Objects.requireNonNull(typeName, "typeName");
+    this.vendorName = Objects.requireNonNull(vendorName, "vendorName");
+  }
+
+  @Override
+  public ArrowType storageType() {
+    return storageType;
+  }
+
+  public String typeName() {
+    return typeName;
+  }
+
+  public String vendorName() {
+    return vendorName;
+  }
+
+  @Override
+  public String extensionName() {
+    return EXTENSION_NAME;
+  }
+
+  @Override
+  public boolean extensionEquals(ExtensionType other) {
+    return other != null
+        && EXTENSION_NAME.equals(other.extensionName())
+        && other instanceof OpaqueType
+        && storageType.equals(other.storageType())
+        && typeName.equals(((OpaqueType) other).typeName())
+        && vendorName.equals(((OpaqueType) other).vendorName());
+  }
+
+  @Override
+  public String serialize() {
+    ObjectMapper mapper = new ObjectMapper();
+    ObjectNode object = mapper.createObjectNode();
+    object.put("type_name", typeName);
+    object.put("vendor_name", vendorName);
+    try {
+      return mapper.writeValueAsString(object);
+    } catch (JsonProcessingException e) {
+      throw new RuntimeException("Could not serialize " + this, e);
+    }
+  }
+
+  @Override
+  public ArrowType deserialize(ArrowType storageType, String serializedData) {
+    ObjectMapper mapper = new ObjectMapper();
+    JsonNode object;
+    try {
+      object = mapper.readTree(serializedData);
+    } catch (JsonProcessingException e) {
+      throw new InvalidExtensionMetadataException("Extension metadata is 
invalid", e);
+    }
+    JsonNode typeName = object.get("type_name");
+    JsonNode vendorName = object.get("vendor_name");
+    if (typeName == null) {
+      throw new InvalidExtensionMetadataException("typeName is missing");
+    }
+    if (vendorName == null) {
+      throw new InvalidExtensionMetadataException("vendorName is missing");
+    }
+    if (!typeName.isTextual()) {
+      throw new InvalidExtensionMetadataException("typeName should be string, 
was " + typeName);
+    }
+    if (!vendorName.isTextual()) {
+      throw new InvalidExtensionMetadataException("vendorName should be 
string, was " + vendorName);
+    }
+    return new OpaqueType(storageType, typeName.asText(), vendorName.asText());
+  }
+
+  @Override
+  public FieldVector getNewVector(String name, FieldType fieldType, 
BufferAllocator allocator) {
+    // XXX: fieldType is supposed to be the extension type
+    final Field field = new Field(name, fieldType, Collections.emptyList());
+    final FieldVector underlyingVector =
+        storageType.accept(new UnderlyingVectorTypeVisitor(name, allocator));
+    return new OpaqueVector(field, allocator, underlyingVector);
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(super.hashCode(), storageType, typeName, vendorName);
+  }
+
+  @Override
+  public String toString() {
+    return "OpaqueType("
+        + storageType
+        + ", typeName='"
+        + typeName
+        + '\''
+        + ", vendorName='"
+        + vendorName
+        + '\''
+        + ')';
+  }
+
+  private static class UnderlyingVectorTypeVisitor implements 
ArrowTypeVisitor<FieldVector> {
+    private final String name;
+    private final BufferAllocator allocator;
+
+    UnderlyingVectorTypeVisitor(String name, BufferAllocator allocator) {
+      this.name = name;
+      this.allocator = allocator;
+    }
+
+    @Override
+    public FieldVector visit(Null type) {
+      return new NullVector(name);
+    }
+
+    private RuntimeException unsupported(ArrowType type) {
+      throw new UnsupportedOperationException(
+          "OpaqueType#getUnderlyingVector is not supported for storage type: " 
+ type);
+    }
+
+    @Override
+    public FieldVector visit(Struct type) {
+      throw unsupported(type);
+    }
+
+    @Override
+    public FieldVector visit(List type) {
+      throw unsupported(type);
+    }
+
+    @Override
+    public FieldVector visit(LargeList type) {
+      throw unsupported(type);
+    }
+
+    @Override
+    public FieldVector visit(FixedSizeList type) {
+      throw unsupported(type);
+    }
+
+    @Override
+    public FieldVector visit(Union type) {
+      throw unsupported(type);
+    }
+
+    @Override
+    public FieldVector visit(Map type) {
+      throw unsupported(type);
+    }
+
+    @Override
+    public FieldVector visit(Int type) {
+      return new IntVector(name, allocator);
+    }
+
+    @Override
+    public FieldVector visit(FloatingPoint type) {
+      switch (type.getPrecision()) {
+        case HALF:
+          return new Float2Vector(name, allocator);
+        case SINGLE:
+          return new Float4Vector(name, allocator);
+        case DOUBLE:
+          return new Float8Vector(name, allocator);
+        default:
+          throw unsupported(type);
+      }
+    }
+
+    @Override
+    public FieldVector visit(Utf8 type) {
+      return new VarCharVector(name, allocator);
+    }
+
+    @Override
+    public FieldVector visit(Utf8View type) {
+      return new ViewVarCharVector(name, allocator);
+    }
+
+    @Override
+    public FieldVector visit(LargeUtf8 type) {
+      return new LargeVarCharVector(name, allocator);
+    }
+
+    @Override
+    public FieldVector visit(Binary type) {
+      return new VarBinaryVector(name, allocator);
+    }
+
+    @Override
+    public FieldVector visit(BinaryView type) {
+      return new ViewVarBinaryVector(name, allocator);
+    }
+
+    @Override
+    public FieldVector visit(LargeBinary type) {
+      return new LargeVarBinaryVector(name, allocator);
+    }
+
+    @Override
+    public FieldVector visit(FixedSizeBinary type) {
+      return new FixedSizeBinaryVector(Field.nullable(name, type), allocator);
+    }
+
+    @Override
+    public FieldVector visit(Bool type) {
+      return new BitVector(name, allocator);
+    }
+
+    @Override
+    public FieldVector visit(Decimal type) {
+      if (type.getBitWidth() == 128) {
+        return new DecimalVector(Field.nullable(name, type), allocator);
+      } else if (type.getBitWidth() == 256) {
+        return new Decimal256Vector(Field.nullable(name, type), allocator);
+      }
+      throw unsupported(type);
+    }
+
+    @Override
+    public FieldVector visit(Date type) {
+      switch (type.getUnit()) {
+        case DAY:
+          return new DateDayVector(name, allocator);
+        case MILLISECOND:
+          return new DateMilliVector(name, allocator);
+        default:
+          throw unsupported(type);
+      }
+    }
+
+    @Override
+    public FieldVector visit(Time type) {
+      switch (type.getUnit()) {
+        case SECOND:
+          return new TimeSecVector(name, allocator);
+        case MILLISECOND:
+          return new TimeMilliVector(name, allocator);
+        case MICROSECOND:
+          return new TimeMicroVector(name, allocator);
+        case NANOSECOND:
+          return new TimeNanoVector(name, allocator);
+        default:
+          throw unsupported(type);
+      }
+    }
+
+    @Override
+    public FieldVector visit(Timestamp type) {
+      if (type.getTimezone() == null || type.getTimezone().isEmpty()) {
+        switch (type.getUnit()) {
+          case SECOND:
+            return new TimeStampSecVector(Field.nullable(name, type), 
allocator);
+          case MILLISECOND:
+            return new TimeStampMilliVector(Field.nullable(name, type), 
allocator);
+          case MICROSECOND:
+            return new TimeStampMicroVector(Field.nullable(name, type), 
allocator);
+          case NANOSECOND:
+            return new TimeStampNanoVector(Field.nullable(name, type), 
allocator);
+          default:
+            throw unsupported(type);
+        }
+      }
+      switch (type.getUnit()) {
+        case SECOND:
+          return new TimeStampSecTZVector(Field.nullable(name, type), 
allocator);
+        case MILLISECOND:
+          return new TimeStampMilliTZVector(Field.nullable(name, type), 
allocator);
+        case MICROSECOND:
+          return new TimeStampMicroTZVector(Field.nullable(name, type), 
allocator);
+        case NANOSECOND:
+          return new TimeStampNanoTZVector(Field.nullable(name, type), 
allocator);
+        default:
+          throw unsupported(type);
+      }
+    }
+
+    @Override
+    public FieldVector visit(Interval type) {
+      switch (type.getUnit()) {
+        case YEAR_MONTH:
+          return new IntervalYearVector(name, allocator);
+        case DAY_TIME:
+          return new IntervalDayVector(name, allocator);
+        case MONTH_DAY_NANO:
+          return new IntervalMonthDayNanoVector(name, allocator);
+        default:
+          throw unsupported(type);
+      }
+    }
+
+    @Override
+    public FieldVector visit(Duration type) {
+      return new DurationVector(Field.nullable(name, type), allocator);
+    }
+
+    @Override
+    public FieldVector visit(ListView type) {
+      throw unsupported(type);
+    }
+  }
+}
diff --git 
a/java/vector/src/main/java/org/apache/arrow/vector/extension/OpaqueVector.java 
b/java/vector/src/main/java/org/apache/arrow/vector/extension/OpaqueVector.java
new file mode 100644
index 0000000000..00eb9a984e
--- /dev/null
+++ 
b/java/vector/src/main/java/org/apache/arrow/vector/extension/OpaqueVector.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.vector.extension;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.vector.ExtensionTypeVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.ValueIterableVector;
+import org.apache.arrow.vector.types.pojo.Field;
+
+/**
+ * Opaque is a wrapper for (usually binary) data from an external (often 
non-Arrow) system that
+ * could not be interpreted.
+ */
+public class OpaqueVector extends ExtensionTypeVector<FieldVector>
+    implements ValueIterableVector<Object> {
+  private final Field field;
+
+  public OpaqueVector(Field field, BufferAllocator allocator, FieldVector 
underlyingVector) {
+    super(field, allocator, underlyingVector);
+    this.field = field;
+  }
+
+  @Override
+  public Field getField() {
+    return field;
+  }
+
+  @Override
+  public Object getObject(int index) {
+    return getUnderlyingVector().getObject(index);
+  }
+
+  @Override
+  public int hashCode(int index) {
+    return hashCode(index, null);
+  }
+
+  @Override
+  public int hashCode(int index, ArrowBufHasher hasher) {
+    return getUnderlyingVector().hashCode(index, hasher);
+  }
+}
diff --git 
a/java/vector/src/test/java/org/apache/arrow/vector/TestOpaqueExtensionType.java
 
b/java/vector/src/test/java/org/apache/arrow/vector/TestOpaqueExtensionType.java
new file mode 100644
index 0000000000..9fd9b580b3
--- /dev/null
+++ 
b/java/vector/src/test/java/org/apache/arrow/vector/TestOpaqueExtensionType.java
@@ -0,0 +1,188 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.vector;
+
+import static org.junit.jupiter.api.Assertions.assertArrayEquals;
+import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertInstanceOf;
+import static org.junit.jupiter.api.Assertions.assertNotEquals;
+import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Collections;
+import java.util.stream.Stream;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.extension.InvalidExtensionMetadataException;
+import org.apache.arrow.vector.extension.OpaqueType;
+import org.apache.arrow.vector.extension.OpaqueVector;
+import org.apache.arrow.vector.ipc.ArrowStreamReader;
+import org.apache.arrow.vector.ipc.ArrowStreamWriter;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.MethodSource;
+import org.junit.jupiter.params.provider.ValueSource;
+
+class TestOpaqueExtensionType {
+  BufferAllocator allocator;
+
+  @BeforeEach
+  void beforeEach() {
+    allocator = new RootAllocator();
+  }
+
+  @AfterEach
+  void afterEach() {
+    allocator.close();
+  }
+
+  @ParameterizedTest
+  @ValueSource(
+      strings = {
+        "{\"type_name\": \"\", \"vendor_name\": \"\"}",
+        "{\"type_name\": \"\", \"vendor_name\": \"\", \"extra_field\": 42}",
+        "{\"type_name\": \"array\", \"vendor_name\": \"postgresql\"}",
+        "{\"type_name\": \"foo.bar\", \"vendor_name\": \"postgresql\"}",
+      })
+  void testDeserializeValid(String serialized) {
+    ArrowType storageType = Types.MinorType.NULL.getType();
+    OpaqueType type = new OpaqueType(storageType, "", "");
+
+    assertDoesNotThrow(() -> type.deserialize(storageType, serialized));
+  }
+
+  @ParameterizedTest
+  @ValueSource(
+      strings = {
+        "",
+        "{\"type_name\": \"\"}",
+        "{\"vendor_name\": \"\"}",
+        "{\"type_name\": null, \"vendor_name\": \"\"}",
+        "{\"type_name\": \"\", \"vendor_name\": null}",
+        "{\"type_name\": 42, \"vendor_name\": \"\"}",
+        "{\"type_name\": \"\", \"vendor_name\": 42}",
+        "{\"type_name\": \"\", \"vendor_name\": \"\"",
+      })
+  void testDeserializeInvalid(String serialized) {
+    ArrowType storageType = Types.MinorType.NULL.getType();
+    OpaqueType type = new OpaqueType(storageType, "", "");
+
+    assertThrows(
+        InvalidExtensionMetadataException.class, () -> 
type.deserialize(storageType, serialized));
+  }
+
+  @ParameterizedTest
+  @MethodSource("storageType")
+  void testRoundTrip(ArrowType storageType) {
+    OpaqueType type = new OpaqueType(storageType, "foo", "bar");
+    assertEquals(storageType, type.storageType());
+    assertEquals("foo", type.typeName());
+    if (storageType.isComplex()) {
+      assertThrows(
+          UnsupportedOperationException.class,
+          () -> type.getNewVector("name", FieldType.nullable(type), 
allocator));
+    } else {
+      assertDoesNotThrow(() -> type.getNewVector("name", 
FieldType.nullable(type), allocator))
+          .close();
+    }
+
+    String serialized = assertDoesNotThrow(type::serialize);
+    OpaqueType holder = new OpaqueType(Types.MinorType.NULL.getType(), "", "");
+    OpaqueType deserialized = (OpaqueType) holder.deserialize(storageType, 
serialized);
+    assertEquals(type, deserialized);
+    assertNotEquals(holder, deserialized);
+  }
+
+  @ParameterizedTest
+  @MethodSource("storageType")
+  void testIpcRoundTrip(ArrowType storageType) {
+    OpaqueType.ensureRegistered();
+
+    OpaqueType type = new OpaqueType(storageType, "foo", "bar");
+    Schema schema = new 
Schema(Collections.singletonList(Field.nullable("unknown", type)));
+    byte[] serialized = schema.serializeAsMessage();
+    Schema deseralized = 
Schema.deserializeMessage(ByteBuffer.wrap(serialized));
+    assertEquals(schema, deseralized);
+  }
+
+  @Test
+  void testVectorType() throws IOException {
+    OpaqueType.ensureRegistered();
+
+    ArrowType storageType = Types.MinorType.VARBINARY.getType();
+    OpaqueType type = new OpaqueType(storageType, "foo", "bar");
+    try (FieldVector vector = type.getNewVector("field", 
FieldType.nullable(type), allocator)) {
+      OpaqueVector opaque = assertInstanceOf(OpaqueVector.class, vector);
+      assertEquals("field", opaque.getField().getName());
+      assertEquals(type, opaque.getField().getType());
+
+      VarBinaryVector binary =
+          assertInstanceOf(VarBinaryVector.class, 
opaque.getUnderlyingVector());
+      binary.setSafe(0, new byte[] {0, 1, 2, 3});
+      binary.setNull(1);
+      opaque.setValueCount(2);
+
+      ByteArrayOutputStream baos = new ByteArrayOutputStream();
+      try (VectorSchemaRoot root = new 
VectorSchemaRoot(Collections.singletonList(opaque));
+          ArrowStreamWriter writer =
+              new ArrowStreamWriter(root, new 
DictionaryProvider.MapDictionaryProvider(), baos)) {
+        writer.start();
+        writer.writeBatch();
+      }
+
+      try (ArrowStreamReader reader =
+          new ArrowStreamReader(new ByteArrayInputStream(baos.toByteArray()), 
allocator)) {
+        assertTrue(reader.loadNextBatch());
+        VectorSchemaRoot root = reader.getVectorSchemaRoot();
+        assertEquals(2, root.getRowCount());
+        assertEquals(new Schema(Collections.singletonList(opaque.getField())), 
root.getSchema());
+
+        OpaqueVector actual = assertInstanceOf(OpaqueVector.class, 
root.getVector("field"));
+        assertFalse(actual.isNull(0));
+        assertTrue(actual.isNull(1));
+        assertArrayEquals(new byte[] {0, 1, 2, 3}, (byte[]) 
actual.getObject(0));
+        assertNull(actual.getObject(1));
+      }
+    }
+  }
+
+  static Stream<ArrowType> storageType() {
+    return Stream.of(
+        Types.MinorType.NULL.getType(),
+        Types.MinorType.BIGINT.getType(),
+        Types.MinorType.BIT.getType(),
+        Types.MinorType.VARBINARY.getType(),
+        Types.MinorType.VARCHAR.getType(),
+        Types.MinorType.LIST.getType(),
+        new ArrowType.Decimal(12, 4, 128));
+  }
+}


Reply via email to