This is an automated email from the ASF dual-hosted git repository.

mchades pushed a commit to branch branch-lance-namepspace-dev
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/branch-lance-namepspace-dev by 
this push:
     new bc1b77a3bb [#8946] improvement(lance): supports more dataTypes for 
lance table creation (#8947)
bc1b77a3bb is described below

commit bc1b77a3bb357dd26bd64f025899b0069ece17b3
Author: mchades <[email protected]>
AuthorDate: Wed Oct 29 23:13:59 2025 +0800

    [#8946] improvement(lance): supports more dataTypes for lance table 
creation (#8947)
    
    ### What changes were proposed in this pull request?
    
    supports more dataTypes for lance table creation
    
    ### Why are the changes needed?
    
    Fix: #8946
    
    ### Does this PR introduce _any_ user-facing change?
    
    yes, more column data types supports
    
    ### How was this patch tested?
    
    tests added
---
 .../lakehouse/lance/LanceCatalogOperations.java    |  37 +--
 .../lakehouse/lance/LanceDataTypeConverter.java    | 268 +++++++++--------
 .../lance/TestLanceDataTypeConverter.java          | 327 +++++++++++++++++++++
 docs/generic-lakehouse-catalog.md                  | 140 +++++++++
 4 files changed, 615 insertions(+), 157 deletions(-)

diff --git 
a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceCatalogOperations.java
 
b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceCatalogOperations.java
index dcfe6bd489..9572c656d2 100644
--- 
a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceCatalogOperations.java
+++ 
b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceCatalogOperations.java
@@ -38,7 +38,6 @@ import java.util.Map;
 import java.util.Optional;
 import java.util.stream.Collectors;
 import org.apache.arrow.memory.RootAllocator;
-import org.apache.arrow.vector.types.pojo.ArrowType;
 import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.commons.lang3.ArrayUtils;
 import org.apache.gravitino.Catalog;
@@ -129,7 +128,7 @@ public class LanceCatalogOperations implements 
LakehouseCatalogOperations {
         Dataset.create(
             new RootAllocator(),
             location,
-            convertColumnsToSchema(columns),
+            convertColumnsToArrowSchema(columns),
             new 
WriteParams.Builder().withStorageOptions(storageProps).build())) {
       GenericLakehouseTable.Builder builder = GenericLakehouseTable.builder();
       return builder
@@ -151,39 +150,13 @@ public class LanceCatalogOperations implements 
LakehouseCatalogOperations {
     }
   }
 
-  private org.apache.arrow.vector.types.pojo.Schema 
convertColumnsToSchema(Column[] columns) {
-    LanceDataTypeConverter converter = new LanceDataTypeConverter();
+  private org.apache.arrow.vector.types.pojo.Schema 
convertColumnsToArrowSchema(Column[] columns) {
     List<Field> fields =
         Arrays.stream(columns)
             .map(
-                col -> {
-                  boolean nullable = col.nullable();
-                  ArrowType parentType = 
converter.fromGravitino(col.dataType());
-                  List<ArrowType> childTypes = 
converter.getChildTypes(col.dataType());
-                  List<Field> childFields =
-                      childTypes.stream()
-                          .map(
-                              childType ->
-                                  new org.apache.arrow.vector.types.pojo.Field(
-                                      "",
-                                      
org.apache.arrow.vector.types.pojo.FieldType.nullable(
-                                          childType),
-                                      null))
-                          .collect(Collectors.toList());
-
-                  if (nullable) {
-                    return new org.apache.arrow.vector.types.pojo.Field(
-                        col.name(),
-                        
org.apache.arrow.vector.types.pojo.FieldType.nullable(parentType),
-                        childFields);
-                  }
-
-                  // not nullable
-                  return new org.apache.arrow.vector.types.pojo.Field(
-                      col.name(),
-                      
org.apache.arrow.vector.types.pojo.FieldType.notNullable(parentType),
-                      childFields);
-                })
+                col ->
+                    LanceDataTypeConverter.CONVERTER.toArrowField(
+                        col.name(), col.dataType(), col.nullable()))
             .collect(Collectors.toList());
     return new org.apache.arrow.vector.types.pojo.Schema(fields);
   }
diff --git 
a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceDataTypeConverter.java
 
b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceDataTypeConverter.java
index d7966edd5e..9cd5783de1 100644
--- 
a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceDataTypeConverter.java
+++ 
b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceDataTypeConverter.java
@@ -19,82 +19,183 @@
 
 package org.apache.gravitino.catalog.lakehouse.lance;
 
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
+import java.util.Arrays;
 import java.util.List;
+import org.apache.arrow.vector.complex.MapVector;
 import org.apache.arrow.vector.types.DateUnit;
 import org.apache.arrow.vector.types.FloatingPointPrecision;
+import org.apache.arrow.vector.types.IntervalUnit;
 import org.apache.arrow.vector.types.TimeUnit;
+import org.apache.arrow.vector.types.UnionMode;
 import org.apache.arrow.vector.types.pojo.ArrowType;
 import org.apache.arrow.vector.types.pojo.ArrowType.Bool;
 import org.apache.arrow.vector.types.pojo.ArrowType.FloatingPoint;
 import org.apache.arrow.vector.types.pojo.ArrowType.Int;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.gravitino.connector.DataTypeConverter;
 import org.apache.gravitino.json.JsonUtils;
 import org.apache.gravitino.rel.types.Type;
 import org.apache.gravitino.rel.types.Types;
 import org.apache.gravitino.rel.types.Types.FixedType;
-import org.apache.gravitino.rel.types.Types.UnparsedType;
 
 public class LanceDataTypeConverter implements DataTypeConverter<ArrowType, 
ArrowType> {
 
+  public static final LanceDataTypeConverter CONVERTER = new 
LanceDataTypeConverter();
+
+  public Field toArrowField(String name, Type type, boolean nullable) {
+    switch (type.name()) {
+      case LIST:
+        Types.ListType listType = (Types.ListType) type;
+        FieldType listField = new FieldType(nullable, ArrowType.List.INSTANCE, 
null);
+        return new Field(
+            name,
+            listField,
+            Lists.newArrayList(
+                toArrowField("element", listType.elementType(), 
listType.elementNullable())));
+
+      case STRUCT:
+        Types.StructType structType = (Types.StructType) type;
+        FieldType structField = new FieldType(nullable, 
ArrowType.Struct.INSTANCE, null);
+        return new Field(
+            name,
+            structField,
+            Arrays.stream(structType.fields())
+                .map(field -> toArrowField(field.name(), field.type(), 
field.nullable()))
+                .toList());
+
+      case MAP:
+        Types.MapType mapType = (Types.MapType) type;
+        FieldType mapField = new FieldType(nullable, new ArrowType.Map(false), 
null);
+        return new Field(
+            name,
+            mapField,
+            Lists.newArrayList(
+                toArrowField(
+                    MapVector.DATA_VECTOR_NAME,
+                    Types.StructType.of(
+                        Types.StructType.Field.of(
+                            // Note: Arrow MapVector requires key field to be 
non-nullable
+                            MapVector.KEY_NAME,
+                            mapType.keyType(),
+                            false /*nullable*/,
+                            null /*comment*/),
+                        Types.StructType.Field.of(
+                            MapVector.VALUE_NAME,
+                            mapType.valueType(),
+                            mapType.valueNullable(),
+                            null)),
+                    false /*nullable*/)));
+
+      case UNION:
+        Types.UnionType unionType = (Types.UnionType) type;
+        List<Field> types =
+            Arrays.stream(unionType.types())
+                .map(
+                    t ->
+                        toArrowField(
+                            t.simpleString(), t, true /*nullable*/) // union 
members are nullable
+                    )
+                .toList();
+        int[] typeIds =
+            types.stream()
+                .mapToInt(
+                    f ->
+                        
org.apache.arrow.vector.types.Types.getMinorTypeForArrowType(f.getType())
+                            .ordinal())
+                .toArray();
+        FieldType unionField =
+            new FieldType(nullable, new ArrowType.Union(UnionMode.Sparse, 
typeIds), null);
+        return new Field(name, unionField, types);
+
+      case EXTERNAL:
+        Types.ExternalType externalType = (Types.ExternalType) type;
+        Field field;
+        try {
+          field = 
JsonUtils.anyFieldMapper().readValue(externalType.catalogString(), Field.class);
+        } catch (JsonProcessingException e) {
+          throw new RuntimeException(
+              "Failed to parse external type catalog string: " + 
externalType.catalogString(), e);
+        }
+        Preconditions.checkArgument(
+            name.equals(field.getName()),
+            "expected field name %s but got %s",
+            name,
+            field.getName());
+        Preconditions.checkArgument(
+            nullable == field.isNullable(),
+            "expected field nullable %s but got %s",
+            nullable,
+            field.isNullable());
+        return field;
+
+      default:
+        // non-complex type
+        FieldType fieldType = new FieldType(nullable, fromGravitino(type), 
null);
+        return new Field(name, fieldType, null);
+    }
+  }
+
   @Override
   public ArrowType fromGravitino(Type type) {
     switch (type.name()) {
       case BOOLEAN:
         return Bool.INSTANCE;
       case BYTE:
-        return new Int(8, true);
+        return new Int(8, ((Types.ByteType) type).signed());
       case SHORT:
-        return new Int(16, true);
+        return new Int(8 * 2, ((Types.ShortType) type).signed());
       case INTEGER:
-        return new Int(32, true);
+        return new Int(8 * 4, ((Types.IntegerType) type).signed());
       case LONG:
-        return new Int(64, true);
+        return new Int(8 * 8, ((Types.LongType) type).signed());
       case FLOAT:
         return new FloatingPoint(FloatingPointPrecision.SINGLE);
       case DOUBLE:
         return new FloatingPoint(FloatingPointPrecision.DOUBLE);
+      case STRING:
+        return ArrowType.Utf8.INSTANCE;
+      case BINARY:
+        return ArrowType.Binary.INSTANCE;
       case DECIMAL:
-        // Lance uses FIXED_SIZE_BINARY for decimal types
-        return new ArrowType.FixedSizeBinary(16); // assuming 16 bytes for 
decimal
+        Types.DecimalType decimalType = (Types.DecimalType) type;
+        return new ArrowType.Decimal(decimalType.precision(), 
decimalType.scale(), 8 * 16);
       case DATE:
         return new ArrowType.Date(DateUnit.DAY);
-      case TIME:
-        return new ArrowType.Time(TimeUnit.MILLISECOND, 32);
       case TIMESTAMP:
-        return new ArrowType.Timestamp(TimeUnit.MILLISECOND, null);
-      case VARCHAR:
-      case STRING:
-        return new ArrowType.Utf8();
+        Types.TimestampType timestampType = (Types.TimestampType) type;
+        TimeUnit timeUnit = TimeUnit.MICROSECOND;
+        if (timestampType.hasPrecisionSet()) {
+          timeUnit =
+              switch (timestampType.precision()) {
+                case 0 -> TimeUnit.SECOND;
+                case 3 -> TimeUnit.MILLISECOND;
+                case 6 -> TimeUnit.MICROSECOND;
+                case 9 -> TimeUnit.NANOSECOND;
+                default -> throw new UnsupportedOperationException(
+                    "Expected precision to be one of 0, 3, 6, 9 but got: "
+                        + timestampType.precision());
+              };
+        }
+        if (timestampType.hasTimeZone()) {
+          // todo: need timeZoneId for timestamp with time zone
+          return new ArrowType.Timestamp(timeUnit, "UTC");
+        }
+        return new ArrowType.Timestamp(timeUnit, null);
+      case TIME:
+        return new ArrowType.Time(TimeUnit.NANOSECOND, 8 * 8);
+      case NULL:
+        return ArrowType.Null.INSTANCE;
+      case INTERVAL_YEAR:
+        return new ArrowType.Interval(IntervalUnit.YEAR_MONTH);
+      case INTERVAL_DAY:
+        return new ArrowType.Duration(TimeUnit.MICROSECOND);
       case FIXED:
         FixedType fixedType = (FixedType) type;
         return new ArrowType.FixedSizeBinary(fixedType.length());
-      case BINARY:
-        return new ArrowType.Binary();
-      case UNPARSED:
-        String typeStr = ((UnparsedType) type).unparsedType().toString();
-        try {
-          Type t = JsonUtils.anyFieldMapper().readValue(typeStr, Type.class);
-          if (t instanceof Types.ListType) {
-            return ArrowType.List.INSTANCE;
-          } else if (t instanceof Types.MapType) {
-            return new ArrowType.Map(false);
-          } else if (t instanceof Types.StructType) {
-            return ArrowType.Struct.INSTANCE;
-          } else {
-            throw new UnsupportedOperationException(
-                "Unsupported UnparsedType conversion: " + t.simpleString());
-          }
-        } catch (Exception e) {
-          // FixedSizeListArray(integer, 3)
-          if (typeStr.startsWith("FixedSizeListArray")) {
-            int size =
-                Integer.parseInt(
-                    typeStr.substring(typeStr.indexOf(',') + 1, 
typeStr.indexOf(')')).trim());
-            return new ArrowType.FixedSizeList(size);
-          }
-          throw new UnsupportedOperationException("Failed to parse 
UnparsedType: " + typeStr, e);
-        }
       default:
         throw new UnsupportedOperationException("Unsupported Gravitino type: " 
+ type.name());
     }
@@ -102,91 +203,8 @@ public class LanceDataTypeConverter implements 
DataTypeConverter<ArrowType, Arro
 
   @Override
   public Type toGravitino(ArrowType arrowType) {
-    if (arrowType instanceof Bool) {
-      return Types.BooleanType.get();
-    } else if (arrowType instanceof Int intType) {
-      switch (intType.getBitWidth()) {
-        case 8 -> {
-          return Types.ByteType.get();
-        }
-        case 16 -> {
-          return Types.ShortType.get();
-        }
-        case 32 -> {
-          return Types.IntegerType.get();
-        }
-        case 64 -> {
-          return Types.LongType.get();
-        }
-        default -> throw new UnsupportedOperationException(
-            "Unsupported Int bit width: " + intType.getBitWidth());
-      }
-    } else if (arrowType instanceof FloatingPoint floatingPoint) {
-      switch (floatingPoint.getPrecision()) {
-        case SINGLE:
-          return Types.FloatType.get();
-        case DOUBLE:
-          return Types.DoubleType.get();
-        default:
-          throw new UnsupportedOperationException(
-              "Unsupported FloatingPoint precision: " + 
floatingPoint.getPrecision());
-      }
-    } else if (arrowType instanceof ArrowType.FixedSizeBinary) {
-      ArrowType.FixedSizeBinary fixedSizeBinary = (ArrowType.FixedSizeBinary) 
arrowType;
-      return Types.FixedType.of(fixedSizeBinary.getByteWidth());
-    } else if (arrowType instanceof ArrowType.Date) {
-      return Types.DateType.get();
-    } else if (arrowType instanceof ArrowType.Time) {
-      return Types.TimeType.get();
-    } else if (arrowType instanceof ArrowType.Timestamp) {
-      return Types.TimestampType.withoutTimeZone();
-    } else if (arrowType instanceof ArrowType.Utf8) {
-      return Types.StringType.get();
-    } else if (arrowType instanceof ArrowType.Binary) {
-      return Types.BinaryType.get();
-      // TODO handle complex types like List, Map, Struct
-    } else {
-      throw new UnsupportedOperationException("Unsupported Arrow type: " + 
arrowType);
-    }
-  }
-
-  public List<ArrowType> getChildTypes(Type parentType) {
-    if (parentType.name() != Type.Name.UNPARSED) {
-      return List.of();
-    }
-
-    List<ArrowType> arrowTypes = Lists.newArrayList();
-    String typeStr = ((UnparsedType) parentType).unparsedType().toString();
-    try {
-      Type t = JsonUtils.anyFieldMapper().readValue(typeStr, Type.class);
-      if (t instanceof Types.ListType listType) {
-        arrowTypes.add(fromGravitino(listType.elementType()));
-      } else if (t instanceof Types.MapType mapType) {
-        arrowTypes.add(fromGravitino(mapType.keyType()));
-        arrowTypes.add(fromGravitino(mapType.valueType()));
-      } else {
-        // TODO support struct type.
-        throw new UnsupportedOperationException(
-            "Unsupported UnparsedType conversion: " + t.simpleString());
-      }
-
-      return arrowTypes;
-    } catch (Exception e) {
-      // FixedSizeListArray(integer, 3)
-
-      try {
-        if (typeStr.startsWith("FixedSizeListArray")) {
-          String type = typeStr.substring(typeStr.indexOf('(') + 1, 
typeStr.indexOf(',')).trim();
-          Type childType = JsonUtils.anyFieldMapper().readValue("\"" + type + 
"\"", Type.class);
-          arrowTypes.add(fromGravitino(childType));
-
-          return arrowTypes;
-        }
-      } catch (Exception e1) {
-        throw new UnsupportedOperationException("Failed to parse UnparsedType: 
" + typeStr, e1);
-      }
-
-      throw new UnsupportedOperationException("Failed to parse UnparsedType: " 
+ typeStr, e);
-    }
+    // since the table metadata will load from Gravitino storage directly, we 
don't need to
+    // implement this method for now.
+    throw new UnsupportedOperationException("toGravitino is not implemented 
yet.");
   }
 }
diff --git 
a/catalogs/catalog-generic-lakehouse/src/test/java/org/apache/gravitino/catalog/lakehouse/lance/TestLanceDataTypeConverter.java
 
b/catalogs/catalog-generic-lakehouse/src/test/java/org/apache/gravitino/catalog/lakehouse/lance/TestLanceDataTypeConverter.java
new file mode 100644
index 0000000000..cf28ee7434
--- /dev/null
+++ 
b/catalogs/catalog-generic-lakehouse/src/test/java/org/apache/gravitino/catalog/lakehouse/lance/TestLanceDataTypeConverter.java
@@ -0,0 +1,327 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.gravitino.catalog.lakehouse.lance;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertInstanceOf;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.util.function.Consumer;
+import java.util.stream.Stream;
+import org.apache.arrow.vector.complex.MapVector;
+import org.apache.arrow.vector.types.TimeUnit;
+import org.apache.arrow.vector.types.UnionMode;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.gravitino.rel.types.Type;
+import org.apache.gravitino.rel.types.Types;
+import org.junit.jupiter.api.DisplayName;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.CsvSource;
+import org.junit.jupiter.params.provider.MethodSource;
+
+public class TestLanceDataTypeConverter {
+  private static final LanceDataTypeConverter CONVERTER = 
LanceDataTypeConverter.CONVERTER;
+
+  // Gravitino complex type definitions for testing
+  private static final Types.StructType SIMPLE_STRUCT =
+      Types.StructType.of(
+          Types.StructType.Field.of("id", Types.LongType.get(), false, null),
+          Types.StructType.Field.of("name", Types.StringType.get(), true, 
null));
+
+  private static final Types.StructType NESTED_STRUCT =
+      Types.StructType.of(
+          Types.StructType.Field.of("id", Types.LongType.get(), false, null),
+          Types.StructType.Field.of(
+              "address",
+              Types.StructType.of(
+                  Types.StructType.Field.of("street", Types.StringType.get(), 
false, null),
+                  Types.StructType.Field.of("city", Types.StringType.get(), 
false, null)),
+              true,
+              null));
+  private static final String NESTED_STRUCT_JSON =
+      
"{\"name\":\"person_nested_json\",\"nullable\":false,\"type\":{\"name\":\"struct\"},\"children\":["
+          + 
"{\"name\":\"id\",\"nullable\":false,\"type\":{\"name\":\"int\",\"bitWidth\":64,\"isSigned\":true},\"children\":[]},"
+          + 
"{\"name\":\"address\",\"nullable\":true,\"type\":{\"name\":\"struct\"},\"children\":["
+          + 
"{\"name\":\"street\",\"nullable\":false,\"type\":{\"name\":\"utf8\"},\"children\":[]},"
+          + 
"{\"name\":\"city\",\"nullable\":false,\"type\":{\"name\":\"utf8\"},\"children\":[]}"
+          + "]}"
+          + "]}";
+
+  private static final Types.ListType LIST_OF_STRUCTS =
+      Types.ListType.of(
+          Types.StructType.of(
+              Types.StructType.Field.of("sku", Types.StringType.get(), false, 
null),
+              Types.StructType.Field.of("quantity", Types.IntegerType.get(), 
false, null)),
+          true);
+
+  // Field validators for Arrow conversion tests
+  private static Consumer<Field> INT_VALIDATOR =
+      field -> assertInstanceOf(ArrowType.Int.class, 
field.getFieldType().getType());
+  private static Consumer<Field> STRING_VALIDATOR =
+      field -> assertInstanceOf(ArrowType.Utf8.class, 
field.getFieldType().getType());
+  private static Consumer<Field> LARGE_UTF8_VALIDATOR =
+      field -> assertInstanceOf(ArrowType.LargeUtf8.class, 
field.getFieldType().getType());
+  private static Consumer<Field> BOOLEAN_VALIDATOR =
+      field -> assertInstanceOf(ArrowType.Bool.class, 
field.getFieldType().getType());
+  private static Consumer<Field> DECIMAL_VALIDATOR =
+      field -> {
+        assertInstanceOf(ArrowType.Decimal.class, 
field.getFieldType().getType());
+        ArrowType.Decimal decimal = (ArrowType.Decimal) 
field.getFieldType().getType();
+
+        assertEquals(10, decimal.getPrecision());
+        assertEquals(2, decimal.getScale());
+      };
+  private static Consumer<Field> LIST_VALIDATOR =
+      field -> {
+        assertInstanceOf(ArrowType.List.class, field.getFieldType().getType());
+        assertEquals(1, field.getChildren().size());
+
+        Field elementField = field.getChildren().get(0);
+        assertEquals("element", elementField.getName());
+        assertTrue(elementField.isNullable());
+        assertInstanceOf(ArrowType.Int.class, 
elementField.getFieldType().getType());
+      };
+  private static Consumer<Field> MAP_VALIDATOR =
+      field -> {
+        assertInstanceOf(ArrowType.Map.class, field.getFieldType().getType());
+        assertEquals(1, field.getChildren().size());
+
+        Field structField = field.getChildren().get(0);
+        assertEquals(MapVector.DATA_VECTOR_NAME, structField.getName());
+        assertEquals(2, structField.getChildren().size());
+
+        Field keyField = structField.getChildren().get(0);
+        assertEquals(MapVector.KEY_NAME, keyField.getName());
+        assertFalse(keyField.isNullable());
+        assertInstanceOf(ArrowType.Utf8.class, 
keyField.getFieldType().getType());
+
+        Field valueField = structField.getChildren().get(1);
+        assertEquals(MapVector.VALUE_NAME, valueField.getName());
+        assertTrue(valueField.isNullable());
+        assertInstanceOf(ArrowType.Int.class, 
valueField.getFieldType().getType());
+      };
+  private static Consumer<Field> STRUCT_VALIDATOR =
+      field -> {
+        assertInstanceOf(ArrowType.Struct.class, 
field.getFieldType().getType());
+        assertEquals(2, field.getChildren().size());
+
+        Field idField = field.getChildren().get(0);
+        assertEquals("id", idField.getName());
+        assertFalse(idField.isNullable());
+        assertInstanceOf(ArrowType.Int.class, 
idField.getFieldType().getType());
+
+        Field nameField = field.getChildren().get(1);
+        assertEquals("name", nameField.getName());
+        assertTrue(nameField.isNullable());
+        assertInstanceOf(ArrowType.Utf8.class, 
nameField.getFieldType().getType());
+      };
+  private static Consumer<Field> NESTED_STRUCT_VALIDATOR =
+      field -> {
+        assertInstanceOf(ArrowType.Struct.class, 
field.getFieldType().getType());
+        assertEquals(2, field.getChildren().size());
+
+        Field addressField = field.getChildren().get(1);
+        assertEquals("address", addressField.getName());
+        assertTrue(addressField.isNullable());
+
+        assertInstanceOf(ArrowType.Struct.class, 
addressField.getFieldType().getType());
+        assertEquals(2, addressField.getChildren().size());
+      };
+  private static Consumer<Field> LIST_OF_STRUCTS_VALIDATOR =
+      field -> {
+        assertInstanceOf(ArrowType.List.class, field.getFieldType().getType());
+        assertEquals(1, field.getChildren().size());
+
+        Field elementField = field.getChildren().get(0);
+        assertEquals("element", elementField.getName());
+        assertTrue(elementField.isNullable());
+        assertInstanceOf(ArrowType.Struct.class, 
elementField.getFieldType().getType());
+        assertEquals(2, elementField.getChildren().size());
+      };
+  private static Consumer<Field> UNION_VALIDATOR =
+      field -> {
+        assertInstanceOf(ArrowType.Union.class, 
field.getFieldType().getType());
+        ArrowType.Union unionType = (ArrowType.Union) 
field.getFieldType().getType();
+        assertEquals(UnionMode.Sparse, unionType.getMode());
+        assertEquals(2, field.getChildren().size());
+        assertInstanceOf(ArrowType.Int.class, 
field.getChildren().get(0).getFieldType().getType());
+        assertInstanceOf(ArrowType.Utf8.class, 
field.getChildren().get(1).getFieldType().getType());
+      };
+
+  @ParameterizedTest
+  @DisplayName("Test conversion of Integer types (Byte, Short, Integer, Long)")
+  @CsvSource({"BYTE, 8, true", "SHORT, 16, true", "INTEGER, 32, true", "LONG, 
64, true"})
+  public void testFromGravitinoIntegerTypes(
+      String typeName, int expectedBitWidth, boolean expectedSigned) {
+    Type type =
+        switch (typeName) {
+          case "BYTE" -> Types.ByteType.get();
+          case "SHORT" -> Types.ShortType.get();
+          case "INTEGER" -> Types.IntegerType.get();
+          case "LONG" -> Types.LongType.get();
+          default -> throw new IllegalArgumentException("Unknown type: " + 
typeName);
+        };
+
+    ArrowType arrowType = CONVERTER.fromGravitino(type);
+    assertInstanceOf(ArrowType.Int.class, arrowType);
+
+    ArrowType.Int intType = (ArrowType.Int) arrowType;
+    assertEquals(expectedBitWidth, intType.getBitWidth());
+    assertEquals(expectedSigned, intType.getIsSigned());
+  }
+
+  @Test
+  public void testFromGravitinoTimestampWithTz() {
+    Types.TimestampType timestampType = Types.TimestampType.withTimeZone();
+    ArrowType arrowType = CONVERTER.fromGravitino(timestampType);
+    assertInstanceOf(ArrowType.Timestamp.class, arrowType);
+
+    ArrowType.Timestamp tsArrow = (ArrowType.Timestamp) arrowType;
+    assertEquals(TimeUnit.MICROSECOND, tsArrow.getUnit());
+    assertEquals("UTC", tsArrow.getTimezone());
+  }
+
+  @Test
+  public void testExternalTypeConversion() {
+    String expectedColumnName = "col_name";
+    boolean expectedNullable = true;
+    Types.ExternalType externalType =
+        Types.ExternalType.of(
+            "{\"name\":\"col_name\",\"nullable\":true,"
+                + "\"type\":{\"name\":\"largeutf8\"},\"children\":[]}");
+    Field arrowField = CONVERTER.toArrowField(expectedColumnName, 
externalType, expectedNullable);
+    assertEquals(expectedColumnName, arrowField.getName());
+    assertEquals(expectedNullable, arrowField.isNullable());
+    assertInstanceOf(ArrowType.LargeUtf8.class, 
arrowField.getFieldType().getType());
+
+    externalType =
+        Types.ExternalType.of(
+            "{\"name\":\"col_name\",\"nullable\":true,"
+                + "\"type\":{\"name\":\"largebinary\"},\"children\":[]}");
+    arrowField = CONVERTER.toArrowField(expectedColumnName, externalType, 
expectedNullable);
+    assertEquals(expectedColumnName, arrowField.getName());
+    assertEquals(expectedNullable, arrowField.isNullable());
+    assertInstanceOf(ArrowType.LargeBinary.class, 
arrowField.getFieldType().getType());
+
+    externalType =
+        Types.ExternalType.of(
+            "{\"name\":\"col_name\",\"nullable\":true,"
+                + "\"type\":{\"name\":\"largelist\"},"
+                + "\"children\":["
+                + "{\"name\":\"element\",\"nullable\":true,"
+                + "\"type\":{\"name\":\"int\", \"bitWidth\":32, \"isSigned\": 
true},"
+                + "\"children\":[]}]}");
+    arrowField = CONVERTER.toArrowField(expectedColumnName, externalType, 
expectedNullable);
+    assertEquals(expectedColumnName, arrowField.getName());
+    assertEquals(expectedNullable, arrowField.isNullable());
+    assertInstanceOf(ArrowType.LargeList.class, 
arrowField.getFieldType().getType());
+
+    externalType =
+        Types.ExternalType.of(
+            "{\"name\":\"col_name\",\"nullable\":true,"
+                + "\"type\":{\"name\":\"fixedsizelist\", \"listSize\":10},"
+                + "\"children\":["
+                + "{\"name\":\"element\",\"nullable\":true,"
+                + "\"type\":{\"name\":\"int\", \"bitWidth\":32, \"isSigned\": 
true},"
+                + "\"children\":[]}]}");
+    arrowField = CONVERTER.toArrowField(expectedColumnName, externalType, 
expectedNullable);
+    assertEquals(expectedColumnName, arrowField.getName());
+    assertEquals(expectedNullable, arrowField.isNullable());
+    assertInstanceOf(ArrowType.FixedSizeList.class, 
arrowField.getFieldType().getType());
+    assertEquals(10, ((ArrowType.FixedSizeList) 
arrowField.getFieldType().getType()).getListSize());
+  }
+
+  @ParameterizedTest(name = "[{index}] name={0}, type={1}, nullable={2}")
+  @MethodSource("toArrowFieldArguments")
+  @DisplayName("Test toArrowField for various types")
+  public void testToArrowField(
+      String name, Type gravitinoType, boolean nullable, Consumer<Field> 
validator) {
+    Field field = CONVERTER.toArrowField(name, gravitinoType, nullable);
+
+    assertEquals(name, field.getName());
+    assertEquals(nullable, field.isNullable());
+    validator.accept(field);
+  }
+
+  @Test
+  void testUnsupportedTypeThrowsException() {
+    Types.UnparsedType unparsedType = Types.UnparsedType.of("UNKNOWN_TYPE");
+    assertThrows(UnsupportedOperationException.class, () -> 
CONVERTER.fromGravitino(unparsedType));
+  }
+
+  @Test
+  void testToGravitinoNotImplemented() {
+    assertThrows(
+        UnsupportedOperationException.class, () -> 
CONVERTER.toGravitino(ArrowType.Utf8.INSTANCE));
+  }
+
+  private static Stream<Arguments> toArrowFieldArguments() {
+    return Stream.of(
+        // Simple types
+        Arguments.of("age", Types.IntegerType.get(), true, INT_VALIDATOR),
+        Arguments.of("id", Types.LongType.get(), false, INT_VALIDATOR),
+        Arguments.of("name", Types.StringType.get(), true, STRING_VALIDATOR),
+        Arguments.of(
+            "description",
+            Types.ExternalType.of(
+                "{\n"
+                    + "  \"name\": \"description\",\n"
+                    + "  \"nullable\": true,\n"
+                    + "  \"type\": {\n"
+                    + "    \"name\": \"largeutf8\"\n"
+                    + "  }\n"
+                    + "}"),
+            true,
+            LARGE_UTF8_VALIDATOR),
+        Arguments.of("active", Types.BooleanType.get(), false, 
BOOLEAN_VALIDATOR),
+        // Decimal
+        Arguments.of("price", Types.DecimalType.of(10, 2), false, 
DECIMAL_VALIDATOR),
+        // List
+        Arguments.of(
+            "numbers", Types.ListType.of(Types.IntegerType.get(), true), 
false, LIST_VALIDATOR),
+        // Map
+        Arguments.of(
+            "properties",
+            Types.MapType.of(Types.StringType.get(), Types.IntegerType.get(), 
true),
+            true,
+            MAP_VALIDATOR),
+        // Struct
+        Arguments.of("person", SIMPLE_STRUCT, true, STRUCT_VALIDATOR),
+        // Nested Struct
+        Arguments.of("person_nested", NESTED_STRUCT, false, 
NESTED_STRUCT_VALIDATOR),
+        Arguments.of(
+            "person_nested_json",
+            Types.ExternalType.of(NESTED_STRUCT_JSON),
+            false,
+            NESTED_STRUCT_VALIDATOR),
+        // List of Structs
+        Arguments.of("items", LIST_OF_STRUCTS, false, 
LIST_OF_STRUCTS_VALIDATOR),
+        // Union
+        Arguments.of(
+            "union_field",
+            Types.UnionType.of(Types.IntegerType.get(), 
Types.StringType.get()),
+            true,
+            UNION_VALIDATOR));
+  }
+}
diff --git a/docs/generic-lakehouse-catalog.md 
b/docs/generic-lakehouse-catalog.md
new file mode 100644
index 0000000000..35eaeb4660
--- /dev/null
+++ b/docs/generic-lakehouse-catalog.md
@@ -0,0 +1,140 @@
+---
+title: "Lakehouse catalog"
+slug: /lakehouse-catalog
+keywords:
+  - lakehouse
+  - lance
+  - metadata
+license: "This software is licensed under the Apache License version 2."
+---
+
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+## Introduction
+
+TBD.
+
+### Requirements and limitations
+
+TBD.
+
+## Catalog
+
+### Catalog capabilities
+
+TBD.
+
+### Catalog properties
+
+TBD.
+
+### Catalog operations
+
+TBD.
+
+## Schema
+
+### Schema capabilities
+
+TBD.
+
+### Schema properties
+
+TBD.
+
+### Schema operations
+
+Please refer to [Manage Relational Metadata Using 
Gravitino](./manage-relational-metadata-using-gravitino.md#schema-operations) 
for more details.
+
+## Table
+
+### Table capabilities
+
+TBD.
+
+### Table partitions
+
+TBD.
+
+### Table sort orders
+
+TBD.
+
+### Table distributions
+
+TBD.
+
+### Table column types
+
+Since Lance uses Apache Arrow as the table schema, the following table shows 
the mapping between Gravitino types and Arrow types:
+
+| Gravitino Type                   | Arrow Type                              |
+|----------------------------------|-----------------------------------------|
+| `Struct`                         | `Struct`                                |
+| `Map`                            | `Map`                                   |
+| `List`                           | `Array`                                 |
+| `Boolean`                        | `Boolean`                               |
+| `Byte`                           | `Int8`                                  |
+| `Short`                          | `Int16`                                 |
+| `Integer`                        | `Int32`                                 |
+| `Long`                           | `Int64`                                 |
+| `Float`                          | `Float`                                 |
+| `Double`                         | `Double`                                |
+| `String`                         | `Utf8`                                  |
+| `Binary`                         | `Binary`                                |
+| `Decimal(p, s)`                  | `Decimal(p, s)` (128-bit)               |
+| `Date`                           | `Date`                                  |
+| `Timestamp`/`Timestamp(6)`       | `TimestampType withoutZone`             |
+| `Timestamp(0)`                   | `TimestampType Second withoutZone`      |
+| `Timestamp(3)`                   | `TimestampType Millisecond withoutZone` |
+| `Timestamp(9)`                   | `TimestampType Nanosecond withoutZone`  |
+| `Timestamp_tz`/`Timestamp_tz(6)` | `TimestampType Microsecond withUtc`     |
+| `Timestamp_tz(0)`                | `TimestampType Second withUtc`          |
+| `Timestamp_tz(3)`                | `TimestampType Millisecond withUtc`     |
+| `Timestamp_tz(9)`                | `TimestampType Nanosecond withUtc`      |
+| `Time`/`Time(9)`                 | `Time Nanosecond`                       |
+| `Null`                           | `Null`                                  |
+| `Fixed(n)`                       | `Fixed-Size Binary(n)`                  |
+| `Interval_year`                  | `Interval(YearMonth)`                   |
+| `Interval_day`                   | `Duration(Microsecond)`                 |
+| `External(arrow_field_json_str)` | Any Arrow Field (see note below)        |
+
+`External(arrow_field_json_str)`:
+
+As the table above shows, Gravitino provides mappings for most common data 
types. However, 
+in some cases, you may need to use an Arrow data type that is not directly 
supported by Gravitino.
+
+To address this, Gravitino introduces the `External(arrow_field_json_str)` 
type, 
+which allows you to define any Arrow data type by providing the JSON string of 
an Arrow `Field`.
+
+The JSON string must conform to the Apache Arrow `Field` 
[specification](https://github.com/apache/arrow-java/blob/ed81e5981a2bee40584b3a411ed755cb4cc5b91f/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java#L80C1-L86C68),
 
+including details such as the field name, data type, and nullability.
+Here are some examples of how to use `External` type for various Arrow types 
that are not natively supported by Gravitino:
+
+| Arrow Type        | External type                                            
                                                                                
                                                                                
                               | 
+|-------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `Large Utf8`      | 
`External("{\"name\":\"col_name\",\"nullable\":true,\"type\":{\"name\":\"largeutf8\"},\"children\":[]}")`
                                                                                
                                                               |
+| `Large Binary`    | 
`External("{\"name\":\"col_name\",\"nullable\":true,\"type\":{\"name\":\"largebinary\"},\"children\":[]}")`
                                                                                
                                                             |         
+| `Large List`      | 
`External("{\"name\":\"col_name\",\"nullable\":true,\"type\":{\"name\":\"largelist\"},\"children\":[{\"name\":\"element\",\"nullable\":true,\"type\":{\"name\":\"int\",
 \"bitWidth\":32, \"isSigned\": true},\"children\":[]}]}")`                     
 |
+| `Fixed-Size List` | 
`External("{\"name\":\"col_name\",\"nullable\":true,\"type\":{\"name\":\"fixedsizelist\",
 
\"listSize\":10},\"children\":[{\"name\":\"element\",\"nullable\":true,\"type\":{\"name\":\"int\",
 \"bitWidth\":32, \"isSigned\": true},\"children\":[]}]}")` |
+
+**Important considerations:**
+- The `name` attribute and `nullable` attribute in the JSON string must 
exactly match the corresponding column name and nullable in the Gravitino table.
+- The `children` array should be empty for primitive types. For complex types 
like `Struct` or `List`, it must contain the definitions of the child fields.
+
+### Table properties
+
+TBD.
+
+### Table indexes
+
+TBD.
+
+### Table operations
+
+Please refer to [Manage Relational Metadata Using 
Gravitino](./manage-relational-metadata-using-gravitino.md#table-operations) 
for more details.
+
+## Object store configuration
+
+TBD.


Reply via email to