This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-go.git
The following commit(s) were added to refs/heads/main by this push:
new 6a4c96ea feat(table): Adding geometry and geography type + schema
plumbing (#984)
6a4c96ea is described below
commit 6a4c96ea311b36beec9387934e5a0c644f5247e5
Author: David Zhao <[email protected]>
AuthorDate: Fri May 22 20:14:14 2026 +0200
feat(table): Adding geometry and geography type + schema plumbing (#984)
Based on #628 and addresses #990
Note that this PR sets up a PR to address #991
Co-authored-by: David Zhao <[email protected]>
---
exprs.go | 8 +-
go.mod | 1 +
go.sum | 2 +
schema.go | 6 +
schema_test.go | 275 ++++++++++++++++++++++++++++++++
table/arrow_utils.go | 17 ++
table/metadata_builder_internal_test.go | 129 +++++++++++++++
table/metadata_schema_compatibility.go | 41 +++--
table/substrait/substrait.go | 14 +-
table/update_schema_test.go | 73 +++++++++
table/update_spec_test.go | 47 ++++++
transforms.go | 4 +
transforms_test.go | 16 ++
types.go | 183 +++++++++++++++++++++
types_test.go | 245 ++++++++++++++++++++++++++++
visitors.go | 2 +-
16 files changed, 1046 insertions(+), 17 deletions(-)
diff --git a/exprs.go b/exprs.go
index 4d678356..6c58c87c 100644
--- a/exprs.go
+++ b/exprs.go
@@ -445,7 +445,7 @@ func createBoundRef(field NestedField, acc accessor)
BoundReference {
return &boundRef[Timestamp]{field: field, acc: acc}
case StringType:
return &boundRef[string]{field: field, acc: acc}
- case FixedType, BinaryType:
+ case FixedType, BinaryType, GeographyType, GeometryType:
return &boundRef[[]byte]{field: field, acc: acc}
case DecimalType:
return &boundRef[Decimal]{field: field, acc: acc}
@@ -635,7 +635,7 @@ func createBoundUnaryPredicate(op Operation, term
BoundTerm) BoundUnaryPredicate
return newBoundUnaryPred[Timestamp](op, term)
case StringType:
return newBoundUnaryPred[string](op, term)
- case FixedType, BinaryType:
+ case FixedType, BinaryType, GeographyType, GeometryType:
return newBoundUnaryPred[[]byte](op, term)
case DecimalType:
return newBoundUnaryPred[Decimal](op, term)
@@ -799,7 +799,7 @@ func createBoundLiteralPredicate(op Operation, term
BoundTerm, lit Literal) (Bou
return newBoundLiteralPredicate[Timestamp](op, term, finalLit),
nil
case StringType:
return newBoundLiteralPredicate[string](op, term, finalLit), nil
- case FixedType, BinaryType:
+ case FixedType, BinaryType, GeographyType, GeometryType:
return newBoundLiteralPredicate[[]byte](op, term, finalLit), nil
case DecimalType:
return newBoundLiteralPredicate[Decimal](op, term, finalLit),
nil
@@ -967,7 +967,7 @@ func createBoundSetPredicate(op Operation, term BoundTerm,
lits Set[Literal]) (B
return newBoundSetPredicate[Timestamp](op, term, typedSet), nil
case StringType:
return newBoundSetPredicate[string](op, term, typedSet), nil
- case BinaryType, FixedType:
+ case BinaryType, FixedType, GeographyType, GeometryType:
return newBoundSetPredicate[[]byte](op, term, typedSet), nil
case DecimalType:
return newBoundSetPredicate[Decimal](op, term, typedSet), nil
diff --git a/go.mod b/go.mod
index d49f5010..937fc5a2 100644
--- a/go.mod
+++ b/go.mod
@@ -144,6 +144,7 @@ require (
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/fsnotify/fsevents v0.2.0 // indirect
github.com/fvbommel/sortorder v1.1.0 // indirect
+ github.com/geoarrow/geoarrow-go v0.0.0-20260403143023-f54751c3e3a1 //
indirect
github.com/go-jose/go-jose/v4 v4.1.4 // indirect
github.com/go-logr/logr v1.4.3 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
diff --git a/go.sum b/go.sum
index 4f38aa24..56a6eb69 100644
--- a/go.sum
+++ b/go.sum
@@ -271,6 +271,8 @@ github.com/fsnotify/fsevents v0.2.0
h1:BRlvlqjvNTfogHfeBOFvSC9N0Ddy+wzQCQukyoD7o
github.com/fsnotify/fsevents v0.2.0/go.mod
h1:B3eEk39i4hz8y1zaWS/wPrAP4O6wkIl7HQwKBr1qH/w=
github.com/fvbommel/sortorder v1.1.0
h1:fUmoe+HLsBTctBDoaBwpQo5N+nrCp8g/BjKb/6ZQmYw=
github.com/fvbommel/sortorder v1.1.0/go.mod
h1:uk88iVf1ovNn1iLfgUVU2F9o5eO30ui720w+kxuqRs0=
+github.com/geoarrow/geoarrow-go v0.0.0-20260403143023-f54751c3e3a1
h1:8VcjuP5pKZ717K4zOLS9Lm4Zsn8JwD9X+CQGFCiRT2E=
+github.com/geoarrow/geoarrow-go v0.0.0-20260403143023-f54751c3e3a1/go.mod
h1:XMOIOA5J96jEzAOpBROPyz0n14sHsooeF+y2FtGdrCY=
github.com/go-jose/go-jose/v4 v4.1.4
h1:moDMcTHmvE6Groj34emNPLs/qtYXRVcd6S7NHbHz3kA=
github.com/go-jose/go-jose/v4 v4.1.4/go.mod
h1:x4oUasVrzR7071A4TnHLGSPpNOm2a21K9Kf04k1rs08=
github.com/go-logr/logr v1.2.2/go.mod
h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
diff --git a/schema.go b/schema.go
index 22a0ddc0..dc88e4a6 100644
--- a/schema.go
+++ b/schema.go
@@ -569,6 +569,8 @@ type SchemaVisitorPerPrimitiveType[T any] interface {
VisitUUID() T
VisitUnknown() T
VisitVariant() T
+ VisitGeometry(GeometryType) T
+ VisitGeography(GeographyType) T
}
// Visit accepts a visitor and performs a post-order traversal of the given
schema.
@@ -721,6 +723,10 @@ func visitField[T any](f NestedField, visitor
SchemaVisitor[T]) T {
return perPrimitive.VisitFixed(t)
case UnknownType:
return perPrimitive.VisitUnknown()
+ case GeographyType:
+ return perPrimitive.VisitGeography(t)
+ case GeometryType:
+ return perPrimitive.VisitGeometry(t)
}
}
diff --git a/schema_test.go b/schema_test.go
index b0919ede..8f689dc1 100644
--- a/schema_test.go
+++ b/schema_test.go
@@ -1071,3 +1071,278 @@ func TestSchemaSelectCaseInsensitiveMissingColumn(t
*testing.T) {
assert.ErrorIs(t, err, iceberg.ErrInvalidSchema)
assert.ErrorContains(t, err, "could not find column missing_col")
}
+
+func TestSchemaWithGeometryGeographyTypes(t *testing.T) {
+ geom, err := iceberg.GeometryTypeOf("srid:4326")
+ require.NoError(t, err)
+ geog, err := iceberg.GeographyTypeOf("srid:4269", "karney")
+ require.NoError(t, err)
+
+ schema := iceberg.NewSchema(1,
+ iceberg.NestedField{ID: 1, Name: "id", Type:
iceberg.PrimitiveTypes.Int64, Required: true},
+ iceberg.NestedField{ID: 2, Name: "simple_point", Type:
iceberg.GeometryType{}, Required: false},
+ iceberg.NestedField{ID: 3, Name: "location", Type: geom,
Required: false},
+ iceberg.NestedField{ID: 4, Name: "service_area", Type: geog,
Required: false},
+ )
+
+ data, err := json.Marshal(schema)
+ require.NoError(t, err)
+
+ assert.JSONEq(t, `{
+ "type": "struct",
+ "schema-id": 1,
+ "identifier-field-ids": [],
+ "fields": [
+ {"id": 1, "name": "id", "type": "long", "required":
true},
+ {"id": 2, "name": "simple_point", "type": "geometry",
"required": false},
+ {"id": 3, "name": "location", "type":
"geometry(srid:4326)", "required": false},
+ {"id": 4, "name": "service_area", "type":
"geography(srid:4269, karney)", "required": false}
+ ]
+ }`, string(data))
+
+ var unmarshaledSchema iceberg.Schema
+ require.NoError(t, json.Unmarshal(data, &unmarshaledSchema))
+ assert.True(t, schema.Equals(&unmarshaledSchema))
+}
+
+func TestNestedFieldToStringGeographyGeometry(t *testing.T) {
+ geom, err := iceberg.GeometryTypeOf("srid:3857")
+ require.NoError(t, err)
+ geog, err := iceberg.GeographyTypeOf("srid:4269", "karney")
+ require.NoError(t, err)
+
+ tests := []struct {
+ field iceberg.NestedField
+ expected string
+ }{
+ {
+ iceberg.NestedField{ID: 1, Name: "point", Type:
iceberg.GeometryType{}, Required: false},
+ "1: point: optional geometry",
+ },
+ {
+ iceberg.NestedField{ID: 2, Name: "location", Type:
geom, Required: true},
+ "2: location: required geometry(srid:3857)",
+ },
+ {
+ iceberg.NestedField{ID: 3, Name: "area", Type:
iceberg.GeographyType{}, Required: false},
+ "3: area: optional geography",
+ },
+ {
+ iceberg.NestedField{ID: 4, Name: "region", Type: geog,
Required: false},
+ "4: region: optional geography(srid:4269, karney)",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.field.Name, func(t *testing.T) {
+ assert.Equal(t, tt.expected, tt.field.String())
+ })
+ }
+}
+
+func TestSchemaWithGeometryInNestedStructures(t *testing.T) {
+ geom, err := iceberg.GeometryTypeOf("srid:4326")
+ require.NoError(t, err)
+ geog, err := iceberg.GeographyTypeOf("srid:4269", "spherical")
+ require.NoError(t, err)
+
+ schema := iceberg.NewSchema(1,
+ iceberg.NestedField{
+ ID: 1,
+ Name: "locations",
+ Type: &iceberg.ListType{
+ ElementID: 2,
+ Element: geom,
+ ElementRequired: true,
+ },
+ Required: true,
+ },
+ iceberg.NestedField{
+ ID: 3,
+ Name: "region_data",
+ Type: &iceberg.MapType{
+ KeyID: 4,
+ KeyType: iceberg.PrimitiveTypes.String,
+ ValueID: 5,
+ ValueType: geog,
+ ValueRequired: false,
+ },
+ Required: false,
+ },
+ iceberg.NestedField{
+ ID: 6,
+ Name: "place",
+ Type: &iceberg.StructType{
+ FieldList: []iceberg.NestedField{
+ {ID: 7, Name: "name", Type:
iceberg.PrimitiveTypes.String, Required: true},
+ {ID: 8, Name: "coords", Type:
iceberg.GeometryType{}, Required: false},
+ },
+ },
+ Required: false,
+ },
+ )
+
+ data, err := json.MarshalIndent(schema, "", " ")
+ require.NoError(t, err)
+
+ var unmarshaledSchema iceberg.Schema
+ require.NoError(t, json.Unmarshal(data, &unmarshaledSchema))
+ assert.True(t, schema.Equals(&unmarshaledSchema))
+
+ assert.Equal(t, "1: locations: required list<geometry(srid:4326)>",
schema.Field(0).String())
+ assert.Equal(t, "3: region_data: optional map<string,
geography(srid:4269)>", schema.Field(1).String())
+}
+
+func TestPruneColumnsWithGeometry(t *testing.T) {
+ geom, err := iceberg.GeometryTypeOf("srid:4326")
+ require.NoError(t, err)
+
+ schema := iceberg.NewSchema(1,
+ iceberg.NestedField{ID: 1, Name: "id", Type:
iceberg.PrimitiveTypes.Int32, Required: true},
+ iceberg.NestedField{ID: 2, Name: "name", Type:
iceberg.PrimitiveTypes.String, Required: false},
+ iceberg.NestedField{ID: 3, Name: "location", Type: geom,
Required: false},
+ )
+
+ pruned, err := iceberg.PruneColumns(schema, map[int]iceberg.Void{1: {},
3: {}}, false)
+ require.NoError(t, err)
+
+ expected := iceberg.NewSchema(1,
+ iceberg.NestedField{ID: 1, Name: "id", Type:
iceberg.PrimitiveTypes.Int32, Required: true},
+ iceberg.NestedField{ID: 3, Name: "location", Type: geom,
Required: false},
+ )
+
+ assert.True(t, pruned.Equals(expected))
+}
+
+func TestSchemaIndexByIDWithGeography(t *testing.T) {
+ geog, err := iceberg.GeographyTypeOf("srid:4269", "karney")
+ require.NoError(t, err)
+
+ schema := iceberg.NewSchema(1,
+ iceberg.NestedField{ID: 1, Name: "id", Type:
iceberg.PrimitiveTypes.Int32, Required: true},
+ iceberg.NestedField{ID: 2, Name: "area", Type: geog, Required:
false},
+ )
+
+ index, err := iceberg.IndexByID(schema)
+ require.NoError(t, err)
+
+ assert.Len(t, index, 2)
+ assert.Equal(t, geog, index[2].Type)
+ assert.Equal(t, "area", index[2].Name)
+}
+
+func TestSchemaFindColumnNameWithGeometryGeography(t *testing.T) {
+ schema := iceberg.NewSchema(1,
+ iceberg.NestedField{ID: 1, Name: "point", Type:
iceberg.GeometryType{}, Required: false},
+ iceberg.NestedField{ID: 2, Name: "region", Type:
iceberg.GeographyType{}, Required: false},
+ )
+
+ name, ok := schema.FindColumnName(1)
+ assert.True(t, ok)
+ assert.Equal(t, "point", name)
+
+ name, ok = schema.FindColumnName(2)
+ assert.True(t, ok)
+ assert.Equal(t, "region", name)
+}
+
+type noopSchemaVisitor struct{}
+
+func (noopSchemaVisitor) Schema(_ *iceberg.Schema, result iceberg.Void)
iceberg.Void { return result }
+
+func (noopSchemaVisitor) Struct(_ iceberg.StructType, _ []iceberg.Void)
iceberg.Void {
+ return iceberg.Void{}
+}
+
+func (noopSchemaVisitor) Field(_ iceberg.NestedField, _ iceberg.Void)
iceberg.Void {
+ return iceberg.Void{}
+}
+
+func (noopSchemaVisitor) List(_ iceberg.ListType, _ iceberg.Void) iceberg.Void
{ return iceberg.Void{} }
+
+func (noopSchemaVisitor) Map(_ iceberg.MapType, _, _ iceberg.Void)
iceberg.Void {
+ return iceberg.Void{}
+}
+
+func (noopSchemaVisitor) Primitive(_ iceberg.PrimitiveType) iceberg.Void {
return iceberg.Void{} }
+
+func (noopSchemaVisitor) Variant(_ iceberg.VariantType) iceberg.Void { return
iceberg.Void{} }
+
+func (noopSchemaVisitor) VisitFixed(_ iceberg.FixedType) iceberg.Void { return
iceberg.Void{} }
+
+func (noopSchemaVisitor) VisitDecimal(_ iceberg.DecimalType) iceberg.Void {
return iceberg.Void{} }
+
+func (noopSchemaVisitor) VisitBoolean() iceberg.Void { return iceberg.Void{} }
+
+func (noopSchemaVisitor) VisitInt32() iceberg.Void { return iceberg.Void{} }
+
+func (noopSchemaVisitor) VisitInt64() iceberg.Void { return iceberg.Void{} }
+
+func (noopSchemaVisitor) VisitFloat32() iceberg.Void { return iceberg.Void{} }
+
+func (noopSchemaVisitor) VisitFloat64() iceberg.Void { return iceberg.Void{} }
+
+func (noopSchemaVisitor) VisitDate() iceberg.Void { return iceberg.Void{} }
+
+func (noopSchemaVisitor) VisitTime() iceberg.Void { return iceberg.Void{} }
+
+func (noopSchemaVisitor) VisitTimestamp() iceberg.Void { return iceberg.Void{}
}
+
+func (noopSchemaVisitor) VisitTimestampNs() iceberg.Void { return
iceberg.Void{} }
+
+func (noopSchemaVisitor) VisitTimestampTz() iceberg.Void { return
iceberg.Void{} }
+
+func (noopSchemaVisitor) VisitTimestampNsTz() iceberg.Void { return
iceberg.Void{} }
+
+func (noopSchemaVisitor) VisitString() iceberg.Void { return iceberg.Void{} }
+
+func (noopSchemaVisitor) VisitBinary() iceberg.Void { return iceberg.Void{} }
+
+func (noopSchemaVisitor) VisitUUID() iceberg.Void { return iceberg.Void{} }
+
+func (noopSchemaVisitor) VisitUnknown() iceberg.Void { return iceberg.Void{} }
+
+func (noopSchemaVisitor) VisitGeometry(_ iceberg.GeometryType) iceberg.Void {
return iceberg.Void{} }
+
+func (noopSchemaVisitor) VisitGeography(_ iceberg.GeographyType) iceberg.Void
{ return iceberg.Void{} }
+
+func (noopSchemaVisitor) VisitVariant() iceberg.Void { return iceberg.Void{} }
+
+var _ iceberg.SchemaVisitorPerPrimitiveType[iceberg.Void] = noopSchemaVisitor{}
+
+type failIfGeoFallsBackToPrimitive struct {
+ noopSchemaVisitor
+ geometryCalls int
+ geographyCalls int
+}
+
+func (*failIfGeoFallsBackToPrimitive) Primitive(_ iceberg.PrimitiveType)
iceberg.Void {
+ panic("primitive fallback should not be called for geometry/geography")
+}
+
+func (v *failIfGeoFallsBackToPrimitive) VisitGeometry(_ iceberg.GeometryType)
iceberg.Void {
+ v.geometryCalls++
+
+ return iceberg.Void{}
+}
+
+func (v *failIfGeoFallsBackToPrimitive) VisitGeography(_
iceberg.GeographyType) iceberg.Void {
+ v.geographyCalls++
+
+ return iceberg.Void{}
+}
+
+var _ iceberg.SchemaVisitorPerPrimitiveType[iceberg.Void] =
(*failIfGeoFallsBackToPrimitive)(nil)
+
+func TestVisitGeoSchemaWithSchemaVisitorPerPrimitiveType(t *testing.T) {
+ schema := iceberg.NewSchema(1,
+ iceberg.NestedField{ID: 1, Name: "point", Type:
iceberg.GeometryType{}, Required: false},
+ iceberg.NestedField{ID: 2, Name: "region", Type:
iceberg.GeographyType{}, Required: false},
+ )
+
+ v := &failIfGeoFallsBackToPrimitive{}
+ _, err := iceberg.Visit(schema, v)
+ require.NoError(t, err)
+ assert.Equal(t, 1, v.geometryCalls)
+ assert.Equal(t, 1, v.geographyCalls)
+}
diff --git a/table/arrow_utils.go b/table/arrow_utils.go
index a296980a..48c04c74 100644
--- a/table/arrow_utils.go
+++ b/table/arrow_utils.go
@@ -41,6 +41,7 @@ import (
"github.com/apache/iceberg-go/internal"
iceio "github.com/apache/iceberg-go/io"
tblutils "github.com/apache/iceberg-go/table/internal"
+ "github.com/geoarrow/geoarrow-go"
"github.com/google/uuid"
"github.com/pterm/pterm"
"golang.org/x/sync/errgroup"
@@ -656,6 +657,22 @@ func (c convertToArrow) VisitVariant() arrow.Field {
return arrow.Field{Type: extensions.NewDefaultVariantType()}
}
+func (c convertToArrow) VisitGeometry(iceberg.GeometryType) arrow.Field {
+ if c.useLargeTypes {
+ return arrow.Field{Type:
geoarrow.NewWKBType(geoarrow.WKBWithLargeBinaryStorage())}
+ }
+
+ return arrow.Field{Type:
geoarrow.NewWKBType(geoarrow.WKBWithBinaryStorage())}
+}
+
+func (c convertToArrow) VisitGeography(iceberg.GeographyType) arrow.Field {
+ if c.useLargeTypes {
+ return arrow.Field{Type:
geoarrow.NewWKBType(geoarrow.WKBWithLargeBinaryStorage())}
+ }
+
+ return arrow.Field{Type:
geoarrow.NewWKBType(geoarrow.WKBWithBinaryStorage())}
+}
+
var _ iceberg.SchemaVisitorPerPrimitiveType[arrow.Field] = convertToArrow{}
// SchemaToArrowSchema converts an Iceberg schema to an Arrow schema. If the
metadata parameter
diff --git a/table/metadata_builder_internal_test.go
b/table/metadata_builder_internal_test.go
index b27b9776..1594b438 100644
--- a/table/metadata_builder_internal_test.go
+++ b/table/metadata_builder_internal_test.go
@@ -20,12 +20,14 @@ package table
import (
"fmt"
"slices"
+ "strings"
"testing"
"time"
"github.com/apache/iceberg-go"
"github.com/davecgh/go-spew/spew"
"github.com/google/uuid"
+ "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
@@ -1412,6 +1414,8 @@ func TestUnsupportedTypes(t *testing.T) {
TestTypes := []iceberg.Type{
iceberg.TimestampNsType{},
iceberg.TimestampTzNsType{},
+ iceberg.GeometryType{},
+ iceberg.GeographyType{},
}
for _, typ := range TestTypes {
for unsupportedVersion := 1; unsupportedVersion <
minFormatVersionForType(typ); unsupportedVersion++ {
@@ -1878,6 +1882,131 @@ func TestVariantTypeValidation(t *testing.T) {
})
}
+func TestGeometryGeographyNullOnlyDefaults(t *testing.T) {
+ testTypes := []struct {
+ name string
+ typ iceberg.Type
+ }{
+ {"geometry", iceberg.GeometryType{}},
+ {"geography", iceberg.GeographyType{}},
+ }
+
+ for _, tt := range testTypes {
+ t.Run(tt.name+" with non-null initial default", func(t
*testing.T) {
+ defaultValue := "POINT(0 0)"
+ sc := iceberg.NewSchema(0,
+ iceberg.NestedField{
+ Type: tt.typ,
+ ID: 1,
+ Name: "location",
+ Required: false,
+ InitialDefault: &defaultValue,
+ },
+ )
+
+ err := checkSchemaCompatibility(sc, 3)
+ require.Error(t, err)
+ require.ErrorContains(t, err, "columns must default to
null")
+ require.ErrorIs(t, err, iceberg.ErrInvalidSchema)
+ })
+
+ t.Run(tt.name+" with non-null write default", func(t
*testing.T) {
+ defaultValue := "POINT(0 0)"
+ sc := iceberg.NewSchema(0,
+ iceberg.NestedField{
+ Type: tt.typ,
+ ID: 1,
+ Name: "location",
+ Required: false,
+ WriteDefault: &defaultValue,
+ },
+ )
+
+ err := checkSchemaCompatibility(sc, 3)
+ require.Error(t, err)
+ require.ErrorContains(t, err, "columns must default to
null")
+ require.ErrorIs(t, err, iceberg.ErrInvalidSchema)
+ })
+
+ t.Run(tt.name+" with null defaults", func(t *testing.T) {
+ sc := iceberg.NewSchema(0,
+ iceberg.NestedField{
+ Type: tt.typ,
+ ID: 1,
+ Name: "location",
+ Required: false,
+ },
+ )
+
+ err := checkSchemaCompatibility(sc, 3)
+ require.NoError(t, err)
+ })
+
+ t.Run(tt.name+" in v2 type unsupported", func(t *testing.T) {
+ defaultValue := "POINT(0 0)"
+ sc := iceberg.NewSchema(0,
+ iceberg.NestedField{
+ Type: tt.typ,
+ ID: 1,
+ Name: "location",
+ Required: false,
+ InitialDefault: &defaultValue,
+ },
+ )
+
+ err := checkSchemaCompatibility(sc, 2)
+ require.Error(t, err)
+ require.ErrorContains(t, err, "is not supported until
v3")
+ require.ErrorIs(t, err, iceberg.ErrInvalidSchema)
+ })
+
+ t.Run(tt.name+" with v3 must default to null", func(t
*testing.T) {
+ defaultValue := "POINT(0 0)"
+ sc := iceberg.NewSchema(0,
+ iceberg.NestedField{
+ Type: tt.typ,
+ ID: 1,
+ Name: "location",
+ Required: false,
+ InitialDefault: &defaultValue,
+ WriteDefault: &defaultValue,
+ },
+ )
+
+ err := checkSchemaCompatibility(sc, 3)
+ require.Error(t, err)
+ require.ErrorIs(t, err, iceberg.ErrInvalidSchema)
+
+ require.ErrorContains(t, err, "invalid initial default")
+ require.ErrorContains(t, err, "invalid write default")
+ })
+
+ t.Run(tt.name+" with both non-null defaults produces exactly
two error lines", func(t *testing.T) {
+ defaultValue := "POINT(0 0)"
+ sc := iceberg.NewSchema(0,
+ iceberg.NestedField{
+ Type: tt.typ,
+ ID: 1,
+ Name: "location",
+ Required: false,
+ InitialDefault: &defaultValue,
+ WriteDefault: &defaultValue,
+ },
+ )
+
+ err := checkSchemaCompatibility(sc, 3)
+ require.Error(t, err)
+ require.ErrorIs(t, err, iceberg.ErrInvalidSchema)
+
+ errStr := err.Error()
+ initialCount := strings.Count(errStr, "invalid initial
default")
+ writeCount := strings.Count(errStr, "invalid write
default")
+ assert.Equal(t, 1, initialCount, "expected exactly one
'invalid initial default' line, got %d in: %s", initialCount, errStr)
+ assert.Equal(t, 1, writeCount, "expected exactly one
'invalid write default' line, got %d in: %s", writeCount, errStr)
+ })
+ }
+}
+
func TestComplexTypeDefaultValidation(t *testing.T) {
t.Run("InvalidStructInitialDefault", func(t *testing.T) {
schema := iceberg.NewSchema(1,
diff --git a/table/metadata_schema_compatibility.go
b/table/metadata_schema_compatibility.go
index 4e0450a9..38c764b2 100644
--- a/table/metadata_schema_compatibility.go
+++ b/table/metadata_schema_compatibility.go
@@ -39,7 +39,16 @@ func (e ErrIncompatibleSchema) Error() string {
fmt.Fprintf(&problems, "\n- invalid type for %s: %s is
not supported until v%d", f.ColName, f.Field.Type,
f.UnsupportedType.MinFormatVersion)
}
if f.InvalidDefault != nil {
- fmt.Fprintf(&problems, "\n- invalid initial default for
%s: non-null default (%v) is not supported until v%d", f.ColName,
f.Field.InitialDefault, f.InvalidDefault.MinFormatVersion)
+ if f.InvalidDefault.MustBeNullForType {
+ if f.Field.InitialDefault != nil {
+ fmt.Fprintf(&problems, "\n- invalid
initial default for %s: %s columns must default to null", f.ColName,
f.Field.Type)
+ }
+ if f.Field.WriteDefault != nil {
+ fmt.Fprintf(&problems, "\n- invalid
write default for %s: %s columns must default to null", f.ColName, f.Field.Type)
+ }
+ } else {
+ fmt.Fprintf(&problems, "\n- invalid initial
default for %s: non-null default (%v) is not supported until v%d", f.ColName,
f.Field.InitialDefault, f.InvalidDefault.MinFormatVersion)
+ }
}
}
@@ -62,8 +71,9 @@ type UnsupportedType struct {
}
type InvalidDefault struct {
- MinFormatVersion int
- WriteDefault any
+ MinFormatVersion int
+ WriteDefault any
+ MustBeNullForType bool
}
// checkSchemaCompatibility checks that the schema is compatible with the
table's format version.
@@ -113,12 +123,23 @@ func checkSchemaCompatibility(sc *iceberg.Schema,
formatVersion int) error {
})
}
- if field.InitialDefault != nil && formatVersion <
defaultValuesMinFormatVersion {
- problems = append(problems, IncompatibleField{
- Field: field,
- ColName: colName,
- InvalidDefault:
&InvalidDefault{MinFormatVersion: defaultValuesMinFormatVersion, WriteDefault:
field.InitialDefault},
- })
+ switch field.Type.(type) {
+ case iceberg.GeometryType, iceberg.GeographyType:
+ if field.InitialDefault != nil || field.WriteDefault !=
nil {
+ problems = append(problems, IncompatibleField{
+ Field: field,
+ ColName: colName,
+ InvalidDefault:
&InvalidDefault{MustBeNullForType: true},
+ })
+ }
+ default:
+ if field.InitialDefault != nil && formatVersion <
defaultValuesMinFormatVersion {
+ problems = append(problems, IncompatibleField{
+ Field: field,
+ ColName: colName,
+ InvalidDefault:
&InvalidDefault{MinFormatVersion: defaultValuesMinFormatVersion, WriteDefault:
field.InitialDefault},
+ })
+ }
}
}
@@ -134,7 +155,7 @@ func checkSchemaCompatibility(sc *iceberg.Schema,
formatVersion int) error {
// version number for types that require newer format versions.
func minFormatVersionForType(t iceberg.Type) int {
switch t.(type) {
- case iceberg.TimestampNsType, iceberg.TimestampTzNsType,
iceberg.UnknownType, iceberg.VariantType:
+ case iceberg.TimestampNsType, iceberg.TimestampTzNsType,
iceberg.UnknownType, iceberg.VariantType, iceberg.GeometryType,
iceberg.GeographyType:
return 3
default:
// All other types supported in v1+
diff --git a/table/substrait/substrait.go b/table/substrait/substrait.go
index 702a1acf..2f486d35 100644
--- a/table/substrait/substrait.go
+++ b/table/substrait/substrait.go
@@ -171,6 +171,14 @@ func (convertToSubstrait) VisitUnknown() types.Type {
return nil
}
+func (convertToSubstrait) VisitGeometry(iceberg.GeometryType) types.Type {
+ return &types.BinaryType{}
+}
+
+func (convertToSubstrait) VisitGeography(iceberg.GeographyType) types.Type {
+ return &types.BinaryType{}
+}
+
func (convertToSubstrait) VisitVariant() types.Type {
// Variant has no Substrait equivalent today. We return BinaryType as a
// structural placeholder so that schema conversion and expression
binding
@@ -360,7 +368,8 @@ func (t *toSubstraitExpr) VisitIsNan(term
iceberg.BoundTerm) expr.Builder {
func (t *toSubstraitExpr) VisitNotNan(term iceberg.BoundTerm) expr.Builder {
return t.bldr.ScalarFunc(notID).Args(
- t.makeRefFunc(isNaNID, term).(expr.FuncArgBuilder))
+ t.makeRefFunc(isNaNID, term).(expr.FuncArgBuilder),
+ )
}
func (t *toSubstraitExpr) VisitIsNull(term iceberg.BoundTerm) expr.Builder {
@@ -406,5 +415,6 @@ func (t *toSubstraitExpr) VisitStartsWith(term
iceberg.BoundTerm, lit iceberg.Li
func (t *toSubstraitExpr) VisitNotStartsWith(term iceberg.BoundTerm, lit
iceberg.Literal) expr.Builder {
return t.bldr.ScalarFunc(notID).Args(
- t.makeLitFunc(startsWithID, term, lit).(expr.FuncArgBuilder))
+ t.makeLitFunc(startsWithID, term, lit).(expr.FuncArgBuilder),
+ )
}
diff --git a/table/update_schema_test.go b/table/update_schema_test.go
index 16245c51..a2bd1a1e 100644
--- a/table/update_schema_test.go
+++ b/table/update_schema_test.go
@@ -344,6 +344,54 @@ func TestAddColumn(t *testing.T) {
}},
}, newSchema.Fields())
})
+
+ t.Run("test update schema with add geometry and geography columns",
func(t *testing.T) {
+ metaV3, err := NewMetadata(originalSchema, nil,
UnsortedSortOrder, "", iceberg.Properties{
+ PropertyFormatVersion: "3",
+ })
+ assert.NoError(t, err)
+
+ table := New([]string{"id"}, metaV3, "", nil, nil)
+ txn := table.NewTransaction()
+
+ geog, err := iceberg.GeographyTypeOf("srid:4269", "karney")
+ assert.NoError(t, err)
+
+ upd := NewUpdateSchema(txn, true, true).
+ AddColumn([]string{"geom"}, iceberg.GeometryType{}, "",
false, nil).
+ AddColumn([]string{"geog"}, geog, "", false, nil)
+ err = upd.Commit()
+ assert.NoError(t, err)
+
+ newSchema := txn.meta.CurrentSchema()
+ assert.NotNil(t, newSchema)
+
+ geomField, ok := newSchema.FindFieldByName("geom")
+ assert.True(t, ok)
+ assert.Equal(t, 12, geomField.ID)
+ assert.Equal(t, iceberg.GeometryType{}, geomField.Type)
+
+ geogField, ok := newSchema.FindFieldByName("geog")
+ assert.True(t, ok)
+ assert.Equal(t, 13, geogField.ID)
+ assert.True(t, geogField.Type.Equals(geog))
+ })
+
+ t.Run("test update schema with add geometry and geography columns
errors in v2", func(t *testing.T) {
+ table := New([]string{"id"}, testMetadata, "", nil, nil)
+ txn := table.NewTransaction()
+
+ geog, err := iceberg.GeographyTypeOf("srid:4269", "karney")
+ assert.NoError(t, err)
+
+ upd := NewUpdateSchema(txn, true, true).
+ AddColumn([]string{"geom"}, iceberg.GeometryType{}, "",
false, nil).
+ AddColumn([]string{"geog"}, geog, "", false, nil)
+ err = upd.Commit()
+ assert.Error(t, err)
+ assert.ErrorIs(t, err, iceberg.ErrInvalidSchema)
+ assert.Contains(t, err.Error(), "is not supported until v3")
+ })
}
func TestApplyChanges(t *testing.T) {
@@ -861,6 +909,31 @@ func TestErrorHandling(t *testing.T) {
assert.Contains(t, err.Error(), "cannot change column
nullability from optional to required")
})
+ t.Run("test update geography CRS and edge algorithm without
allowIncompatibleChanges", func(t *testing.T) {
+ currentGeog, err := iceberg.GeographyTypeOf("srid:4269",
"karney")
+ assert.NoError(t, err)
+ targetGeog, err := iceberg.GeographyTypeOf("srid:4326",
"spherical")
+ assert.NoError(t, err)
+
+ geoSchema := iceberg.NewSchema(1,
+ iceberg.NestedField{ID: 1, Name: "id", Type:
iceberg.PrimitiveTypes.Int32, Required: true},
+ iceberg.NestedField{ID: 2, Name: "geog", Type:
currentGeog, Required: false},
+ )
+ geoMeta, err := NewMetadata(geoSchema, nil, UnsortedSortOrder,
"", iceberg.Properties{
+ PropertyFormatVersion: "3",
+ })
+ assert.NoError(t, err)
+
+ table := New([]string{"geo"}, geoMeta, "", nil, nil)
+ txn := table.NewTransaction()
+
+ _, err = NewUpdateSchema(txn, true,
false).UpdateColumn([]string{"geog"}, ColumnUpdate{
+ FieldType: iceberg.Optional[iceberg.Type]{Valid: true,
Val: targetGeog},
+ }).Apply()
+ assert.Error(t, err)
+ assert.Contains(t, err.Error(), "cannot promote
geography(srid:4269, karney) to geography(srid:4326)")
+ })
+
t.Run("test add required field without default value", func(t
*testing.T) {
table := New([]string{"id"}, testMetadata, "", nil, nil)
txn := table.NewTransaction()
diff --git a/table/update_spec_test.go b/table/update_spec_test.go
index 0359490b..48747c94 100644
--- a/table/update_spec_test.go
+++ b/table/update_spec_test.go
@@ -204,6 +204,53 @@ func TestUpdateSpecAddField(t *testing.T) {
assert.NotNil(t, newSpec)
assert.Equal(t, "street_void_1001",
newSpec.FieldsBySourceID(5)[0].Name)
})
+
+ t.Run("reject geometry source for identity partition transform", func(t
*testing.T) {
+ geoSchema := iceberg.NewSchema(1,
+ iceberg.NestedField{ID: 1, Name: "id", Required: true,
Type: iceberg.PrimitiveTypes.Int64},
+ iceberg.NestedField{ID: 2, Name: "geom", Required:
false, Type: iceberg.GeometryType{}},
+ )
+ metadata, err := table.NewMetadata(geoSchema,
iceberg.UnpartitionedSpec, table.UnsortedSortOrder, "", iceberg.Properties{
+ table.PropertyFormatVersion: "3",
+ })
+ assert.NoError(t, err)
+
+ tbl := table.New([]string{"geo_geometry"}, metadata, "", nil,
nil)
+ specUpdate := table.NewUpdateSpec(tbl.NewTransaction(), true)
+
+ updates, reqs, err := specUpdate.
+ AddField("geom", iceberg.IdentityTransform{},
"geom_identity").
+ BuildUpdates()
+ assert.Error(t, err)
+ assert.Nil(t, updates)
+ assert.Nil(t, reqs)
+ })
+
+ t.Run("reject geography source for identity partition transform",
func(t *testing.T) {
+ geog, err := iceberg.GeographyTypeOf("srid:4269", "karney")
+ assert.NoError(t, err)
+
+ geoSchema := iceberg.NewSchema(1,
+ iceberg.NestedField{ID: 1, Name: "id", Required: true,
Type: iceberg.PrimitiveTypes.Int64},
+ iceberg.NestedField{ID: 2, Name: "geog", Required:
false, Type: geog},
+ )
+ metadata, err := table.NewMetadata(geoSchema,
iceberg.UnpartitionedSpec, table.UnsortedSortOrder, "", iceberg.Properties{
+ table.PropertyFormatVersion: "3",
+ })
+ assert.NoError(t, err)
+
+ tbl := table.New([]string{"geo_geography"}, metadata, "", nil,
nil)
+ specUpdate := table.NewUpdateSpec(tbl.NewTransaction(), true)
+
+ updates, reqs, err := specUpdate.
+ AddField("geog", iceberg.IdentityTransform{},
"geog_identity").
+ BuildUpdates()
+ assert.Error(t, err)
+ assert.ErrorContains(t, err, "cannot transform")
+ assert.ErrorContains(t, err, "geog")
+ assert.Nil(t, updates)
+ assert.Nil(t, reqs)
+ })
}
func TestUpdateSpecAddIdentityField(t *testing.T) {
diff --git a/transforms.go b/transforms.go
index c3179892..1f9fa73d 100644
--- a/transforms.go
+++ b/transforms.go
@@ -113,6 +113,10 @@ func (t IdentityTransform) MarshalText() ([]byte, error) {
func (IdentityTransform) String() string { return "identity" }
func (IdentityTransform) CanTransform(t Type) bool {
+ switch t.(type) {
+ case GeometryType, GeographyType:
+ return false
+ }
_, ok := t.(PrimitiveType)
return ok
diff --git a/transforms_test.go b/transforms_test.go
index 0244f177..3c9c44d8 100644
--- a/transforms_test.go
+++ b/transforms_test.go
@@ -265,6 +265,8 @@ func TestCanTransform(t *testing.T) {
notAllowed: []iceberg.Type{
&iceberg.StructType{}, &iceberg.ListType{},
&iceberg.MapType{},
iceberg.VariantType{},
+ iceberg.GeometryType{},
+ iceberg.GeographyType{},
},
},
{
@@ -277,6 +279,8 @@ func TestCanTransform(t *testing.T) {
iceberg.PrimitiveTypes.String,
iceberg.PrimitiveTypes.Binary, iceberg.PrimitiveTypes.UUID,
iceberg.DecimalTypeOf(2, 1),
iceberg.FixedTypeOf(2), &iceberg.StructType{}, &iceberg.ListType{},
&iceberg.MapType{},
iceberg.VariantType{},
+ iceberg.GeographyType{},
+ iceberg.GeometryType{},
},
notAllowed: []iceberg.Type{},
},
@@ -293,6 +297,8 @@ func TestCanTransform(t *testing.T) {
iceberg.PrimitiveTypes.Bool,
iceberg.PrimitiveTypes.Float32, iceberg.PrimitiveTypes.Float64,
&iceberg.StructType{}, &iceberg.ListType{},
&iceberg.MapType{},
iceberg.VariantType{},
+ iceberg.GeometryType{},
+ iceberg.GeographyType{},
},
},
{
@@ -308,6 +314,8 @@ func TestCanTransform(t *testing.T) {
iceberg.PrimitiveTypes.TimestampNs,
iceberg.PrimitiveTypes.TimestampTzNs,
&iceberg.StructType{}, &iceberg.ListType{},
&iceberg.MapType{},
iceberg.VariantType{},
+ iceberg.GeometryType{},
+ iceberg.GeographyType{},
},
},
{
@@ -322,6 +330,8 @@ func TestCanTransform(t *testing.T) {
iceberg.PrimitiveTypes.String,
iceberg.PrimitiveTypes.Binary, iceberg.PrimitiveTypes.UUID,
iceberg.DecimalTypeOf(2, 1),
iceberg.FixedTypeOf(2), &iceberg.StructType{}, &iceberg.ListType{},
&iceberg.MapType{},
iceberg.VariantType{},
+ iceberg.GeographyType{},
+ iceberg.GeometryType{},
},
},
{
@@ -336,6 +346,8 @@ func TestCanTransform(t *testing.T) {
iceberg.PrimitiveTypes.String,
iceberg.PrimitiveTypes.Binary, iceberg.PrimitiveTypes.UUID,
iceberg.DecimalTypeOf(2, 1),
iceberg.FixedTypeOf(2), &iceberg.StructType{}, &iceberg.ListType{},
&iceberg.MapType{},
iceberg.VariantType{},
+ iceberg.GeographyType{},
+ iceberg.GeometryType{},
},
},
{
@@ -350,6 +362,8 @@ func TestCanTransform(t *testing.T) {
iceberg.PrimitiveTypes.String,
iceberg.PrimitiveTypes.Binary, iceberg.PrimitiveTypes.UUID,
iceberg.DecimalTypeOf(2, 1),
iceberg.FixedTypeOf(2), &iceberg.StructType{}, &iceberg.ListType{},
&iceberg.MapType{},
iceberg.VariantType{},
+ iceberg.GeographyType{},
+ iceberg.GeometryType{},
},
},
{
@@ -365,6 +379,8 @@ func TestCanTransform(t *testing.T) {
iceberg.PrimitiveTypes.Date,
iceberg.DecimalTypeOf(2, 1), iceberg.FixedTypeOf(2),
&iceberg.StructType{}, &iceberg.ListType{},
&iceberg.MapType{},
iceberg.VariantType{},
+ iceberg.GeographyType{},
+ iceberg.GeometryType{},
},
},
}
diff --git a/types.go b/types.go
index de77985f..c87f2c1f 100644
--- a/types.go
+++ b/types.go
@@ -27,11 +27,14 @@ import (
"time"
"github.com/apache/arrow-go/v18/arrow/decimal"
+ "github.com/geoarrow/geoarrow-go"
)
var (
regexFromBrackets = regexp.MustCompile(`^\w+\[(\d+)\]$`)
decimalRegex =
regexp.MustCompile(`decimal\(\s*(\d+)\s*,\s*(\d+)\s*\)`)
+ geometryRegex =
regexp.MustCompile(`(?i)^geometry\s*(?:\(\s*([^),]+?)\s*\))?$`)
+ geographyRegex =
regexp.MustCompile(`(?i)^geography\s*(?:\(\s*([^\s,)]+)\s*(?:,\s*(\w+)\s*)?\))?$`)
)
type Properties map[string]string
@@ -155,6 +158,10 @@ func (t *typeIFace) UnmarshalJSON(b []byte) error {
t.Type = UnknownType{}
case "variant":
t.Type = VariantType{}
+ case "geometry":
+ t.Type = GeometryType{}
+ case "geography":
+ t.Type = GeographyType{}
default:
switch {
case strings.HasPrefix(typename, "fixed"):
@@ -174,6 +181,44 @@ func (t *typeIFace) UnmarshalJSON(b []byte) error {
prec, _ := strconv.Atoi(matches[1])
scale, _ := strconv.Atoi(matches[2])
t.Type = DecimalType{precision: prec, scale:
scale}
+ // note that geo type names are case insensitive but
other type names are case sensitive.
+ // matches java behavior - this behavior is intentional
+ case strings.HasPrefix(strings.ToLower(typename),
"geometry"):
+ matches :=
geometryRegex.FindStringSubmatch(typename)
+ if len(matches) != 2 {
+ return fmt.Errorf("%w: %s",
ErrInvalidTypeString, typename)
+ }
+
+ if matches[1] != "" {
+ geom, err :=
GeometryTypeOf(strings.TrimSpace(matches[1]))
+ if err != nil {
+ return err
+ }
+ t.Type = geom
+ } else {
+ t.Type = GeometryType{}
+ }
+ case strings.HasPrefix(strings.ToLower(typename),
"geography"):
+ matches :=
geographyRegex.FindStringSubmatch(typename)
+ if len(matches) != 3 {
+ return fmt.Errorf("%w: %s",
ErrInvalidTypeString, typename)
+ }
+
+ crs := defaultGeoCRS
+ if matches[1] != "" {
+ crs = strings.TrimSpace(matches[1])
+ }
+
+ algorithm := defaultGeographyAlgorithm
+ if matches[2] != "" {
+ algorithm =
strings.TrimSpace(matches[2])
+ }
+
+ geog, err := GeographyTypeOf(crs, algorithm)
+ if err != nil {
+ return err
+ }
+ t.Type = geog
default:
return fmt.Errorf("%w: unrecognized field
type", ErrInvalidSchema)
}
@@ -792,6 +837,144 @@ func (VariantType) Equals(other Type) bool {
func (VariantType) Type() string { return "variant" }
func (VariantType) String() string { return "variant" }
+const defaultGeoCRS = "OGC:CRS84"
+
+type GeometryType struct {
+ crs string
+}
+
+func GeometryTypeOf(crs string) (GeometryType, error) {
+ if crs == "" {
+ return GeometryType{}, fmt.Errorf("%w: invalid CRS: (empty
string)", ErrInvalidTypeString)
+ }
+ crs = strings.TrimSpace(crs)
+ if crs == defaultGeoCRS {
+ return GeometryType{}, nil
+ }
+
+ return GeometryType{crs: crs}, nil
+}
+
+func (g GeometryType) CRS() string {
+ if g.crs == "" {
+ return defaultGeoCRS
+ }
+
+ return g.crs
+}
+
+func (g GeometryType) Equals(other Type) bool {
+ rhs, ok := other.(GeometryType)
+ if !ok {
+ return false
+ }
+
+ return g.crs == rhs.crs
+}
+
+func (GeometryType) primitive() {}
+func (g GeometryType) Type() string {
+ if g.crs == "" {
+ return "geometry"
+ }
+
+ return fmt.Sprintf("geometry(%s)", g.crs)
+}
+
+func (g GeometryType) String() string {
+ return g.Type()
+}
+
+const defaultGeographyAlgorithm = string(geoarrow.EdgeSpherical)
+
+func toGeoArrowEdgeInterpolation(s string) (geoarrow.EdgeInterpolation, error)
{
+ algo :=
geoarrow.EdgeInterpolation(strings.ToLower(strings.TrimSpace(s)))
+ switch algo {
+ case geoarrow.EdgeSpherical, geoarrow.EdgeVincenty,
+ geoarrow.EdgeThomas, geoarrow.EdgeAndoyer, geoarrow.EdgeKarney:
+ return algo, nil
+ default:
+ return "", fmt.Errorf("%w: invalid edge interpolation
algorithm", ErrInvalidTypeString)
+ }
+}
+
+type GeographyType struct {
+ crs string
+ algorithm string
+}
+
+func GeographyTypeOf(crs string, algorithm string) (GeographyType, error) {
+ if crs == "" {
+ return GeographyType{}, fmt.Errorf("%w: invalid CRS: (empty
string)", ErrInvalidTypeString)
+ }
+ if algorithm == "" {
+ return GeographyType{}, fmt.Errorf("%w: invalid algorithm:
(empty string)", ErrInvalidTypeString)
+ }
+ crs = strings.TrimSpace(crs)
+ normalizedCRS := crs
+ if normalizedCRS == defaultGeoCRS {
+ normalizedCRS = ""
+ }
+
+ validatedAlg, err := toGeoArrowEdgeInterpolation(algorithm) // validate
algorithm
+ if err != nil {
+ return GeographyType{}, fmt.Errorf("invalid algorithm: %w", err)
+ }
+ normalizedAlgorithm := string(validatedAlg)
+ if validatedAlg ==
geoarrow.EdgeInterpolation(defaultGeographyAlgorithm) {
+ normalizedAlgorithm = ""
+ }
+
+ return GeographyType{crs: normalizedCRS, algorithm:
normalizedAlgorithm}, nil
+}
+
+func (g GeographyType) CRS() string {
+ if g.crs == "" {
+ return defaultGeoCRS
+ }
+
+ return g.crs
+}
+
+func (g GeographyType) Algorithm() string {
+ if g.algorithm == "" {
+ return defaultGeographyAlgorithm
+ }
+
+ return g.algorithm
+}
+
+func (g GeographyType) Equals(other Type) bool {
+ rhs, ok := other.(GeographyType)
+ if !ok {
+ return false
+ }
+
+ return g.crs == rhs.crs && g.algorithm == rhs.algorithm
+}
+
+func (GeographyType) primitive() {}
+func (g GeographyType) Type() string {
+ hasCRS := g.crs != ""
+ hasAlgo := g.algorithm != ""
+
+ if !hasCRS && !hasAlgo {
+ return "geography"
+ }
+ if hasCRS && !hasAlgo {
+ return fmt.Sprintf("geography(%s)", g.crs)
+ }
+ if !hasCRS && hasAlgo {
+ return fmt.Sprintf("geography(%s, %s)", defaultGeoCRS,
g.algorithm)
+ }
+
+ return fmt.Sprintf("geography(%s, %s)", g.crs, g.algorithm)
+}
+
+func (g GeographyType) String() string {
+ return g.Type()
+}
+
var PrimitiveTypes = struct {
Bool PrimitiveType
Int32 PrimitiveType
diff --git a/types_test.go b/types_test.go
index 704de5ca..1610b1f8 100644
--- a/types_test.go
+++ b/types_test.go
@@ -572,3 +572,248 @@ func TestPropUInt(t *testing.T) {
assert.Equal(t, uint(77), iceberg.PropUInt(props, "garbage", 77),
"falls back on parse error")
assert.Equal(t, uint(5), iceberg.PropUInt(props, "missing", 5), "falls
back on missing key")
}
+
+func TestGeometryType(t *testing.T) {
+ t.Run("default CRS", func(t *testing.T) {
+ geom := iceberg.GeometryType{}
+ assert.Equal(t, "OGC:CRS84", geom.CRS())
+ assert.Equal(t, "geometry", geom.String())
+ assert.True(t, geom.Equals(iceberg.GeometryType{}))
+ })
+
+ t.Run("default CRS explicit", func(t *testing.T) {
+ geom, err := iceberg.GeometryTypeOf("OGC:CRS84")
+ require.NoError(t, err)
+ assert.True(t, geom.Equals(iceberg.GeometryType{}))
+ })
+
+ t.Run("custom CRS", func(t *testing.T) {
+ geom, err := iceberg.GeometryTypeOf("srid:3857")
+ require.NoError(t, err)
+ assert.Equal(t, "srid:3857", geom.CRS())
+ assert.Equal(t, "geometry(srid:3857)", geom.String())
+ })
+
+ t.Run("CRS normalization", func(t *testing.T) {
+ geom1, err := iceberg.GeometryTypeOf("OGC:CRS84")
+ require.NoError(t, err)
+ geom2 := iceberg.GeometryType{}
+ assert.True(t, geom1.Equals(geom2))
+ assert.Equal(t, "geometry", geom1.String())
+ })
+
+ t.Run("empty CRS error", func(t *testing.T) {
+ _, err := iceberg.GeometryTypeOf("")
+ assert.ErrorContains(t, err, "invalid CRS: (empty string)")
+ })
+
+ t.Run("whitespace default CRS", func(t *testing.T) {
+ geom, err := iceberg.GeometryTypeOf(" OGC:CRS84 ")
+ assert.NoError(t, err)
+ assert.Equal(t, "OGC:CRS84", geom.CRS())
+ assert.Equal(t, "geometry", geom.String())
+ })
+
+ t.Run("whitespace custom CRS", func(t *testing.T) {
+ geom, err := iceberg.GeometryTypeOf(" srid:3857 ")
+ assert.NoError(t, err)
+ assert.Equal(t, "srid:3857", geom.CRS())
+ assert.Equal(t, "geometry(srid:3857)", geom.String())
+ })
+
+ t.Run("JSON parsing - default", func(t *testing.T) {
+ data := `{"id": 1, "name": "location", "type": "geometry",
"required": false}`
+ var n iceberg.NestedField
+ require.NoError(t, json.Unmarshal([]byte(data), &n))
+ geom, ok := n.Type.(iceberg.GeometryType)
+ require.True(t, ok)
+ assert.Equal(t, "OGC:CRS84", geom.CRS())
+
+ out, err := json.Marshal(n)
+ require.NoError(t, err)
+ assert.JSONEq(t, data, string(out))
+ })
+
+ t.Run("JSON parsing - custom CRS", func(t *testing.T) {
+ data := `{"id": 1, "name": "location", "type":
"geometry(srid:4326)", "required": false}`
+ var n iceberg.NestedField
+ require.NoError(t, json.Unmarshal([]byte(data), &n))
+ geom, ok := n.Type.(iceberg.GeometryType)
+ require.True(t, ok)
+ assert.Equal(t, "srid:4326", geom.CRS())
+
+ out, err := json.Marshal(n)
+ require.NoError(t, err)
+ assert.JSONEq(t, data, string(out))
+ })
+
+ t.Run("JSON parsing - case insensitive", func(t *testing.T) {
+ data := `{"id": 1, "name": "location", "type":
"Geometry(SRID:4326)", "required": false}`
+ var n iceberg.NestedField
+ require.NoError(t, json.Unmarshal([]byte(data), &n))
+ geom, ok := n.Type.(iceberg.GeometryType)
+ require.True(t, ok)
+ assert.Equal(t, "SRID:4326", geom.CRS())
+ })
+
+ t.Run("JSON parsing - whitespace tolerance", func(t *testing.T) {
+ data := `{"id": 1, "name": "location", "type": "geometry(
srid:4326 )", "required": false}`
+ var n iceberg.NestedField
+ require.NoError(t, json.Unmarshal([]byte(data), &n))
+ geom, ok := n.Type.(iceberg.GeometryType)
+ require.True(t, ok)
+ assert.Equal(t, "srid:4326", geom.CRS())
+ })
+
+ t.Run("rejects extra argument", func(t *testing.T) {
+ data := `{"id": 1, "name": "loc", "type": "geometry(srid:4326,
extra)", "required": false}`
+ var n iceberg.NestedField
+ err := json.Unmarshal([]byte(data), &n)
+ assert.ErrorIs(t, err, iceberg.ErrInvalidTypeString)
+ })
+}
+
+func TestGeographyType(t *testing.T) {
+ t.Run("default CRS and algorithm", func(t *testing.T) {
+ geog := iceberg.GeographyType{}
+ assert.Equal(t, "OGC:CRS84", geog.CRS())
+ assert.Equal(t, "spherical", geog.Algorithm())
+ assert.Equal(t, "geography", geog.String())
+ assert.True(t, geog.Equals(iceberg.GeographyType{}))
+ })
+
+ t.Run("custom CRS only", func(t *testing.T) {
+ geog, err := iceberg.GeographyTypeOf("srid:4269", "spherical")
+ require.NoError(t, err)
+ assert.Equal(t, "srid:4269", geog.CRS())
+ assert.Equal(t, "spherical", geog.Algorithm())
+ assert.Equal(t, "geography(srid:4269)", geog.String())
+ })
+
+ t.Run("default CRS and algorithm explicit", func(t *testing.T) {
+ geog, err := iceberg.GeographyTypeOf("OGC:CRS84", "spherical")
+ require.NoError(t, err)
+ assert.True(t, geog.Equals(iceberg.GeographyType{}))
+ })
+
+ t.Run("default CRS with algorithm", func(t *testing.T) {
+ geog, err := iceberg.GeographyTypeOf("OGC:CRS84", "karney")
+ require.NoError(t, err)
+ assert.Equal(t, "OGC:CRS84", geog.CRS())
+ assert.Equal(t, "karney", geog.Algorithm())
+ assert.Equal(t, "geography(OGC:CRS84, karney)", geog.String())
+ })
+
+ t.Run("custom CRS with algorithm", func(t *testing.T) {
+ geog, err := iceberg.GeographyTypeOf("srid:4269", "karney")
+ require.NoError(t, err)
+ assert.Equal(t, "srid:4269", geog.CRS())
+ assert.Equal(t, "karney", geog.Algorithm())
+ assert.Equal(t, "geography(srid:4269, karney)", geog.String())
+ })
+
+ t.Run("empty CRS error", func(t *testing.T) {
+ _, err := iceberg.GeographyTypeOf("", "")
+ assert.ErrorContains(t, err, "invalid CRS: (empty string)")
+ })
+
+ t.Run("whitespace default algorithm", func(t *testing.T) {
+ geog, err := iceberg.GeographyTypeOf("OGC:CRS84", "
spherical ")
+ assert.NoError(t, err)
+ assert.Equal(t, "OGC:CRS84", geog.CRS())
+ assert.Equal(t, "spherical", geog.Algorithm())
+ assert.Equal(t, "geography", geog.String())
+ })
+
+ t.Run("whitespace default CRS and default algorithm", func(t
*testing.T) {
+ geog, err := iceberg.GeographyTypeOf(" OGC:CRS84 ", "
spherical ")
+ assert.NoError(t, err)
+ assert.Equal(t, "OGC:CRS84", geog.CRS())
+ assert.Equal(t, "spherical", geog.Algorithm())
+ assert.Equal(t, "geography", geog.String())
+ })
+
+ t.Run("whitespace custom CRS and custom algorithm", func(t *testing.T) {
+ geog, err := iceberg.GeographyTypeOf(" srid:3857 ", "
karney ")
+ assert.NoError(t, err)
+ assert.Equal(t, "srid:3857", geog.CRS())
+ assert.Equal(t, "karney", geog.Algorithm())
+ assert.Equal(t, "geography(srid:3857, karney)", geog.String())
+ })
+
+ t.Run("JSON parsing - default", func(t *testing.T) {
+ data := `{"id": 1, "name": "area", "type": "geography",
"required": false}`
+ var n iceberg.NestedField
+ require.NoError(t, json.Unmarshal([]byte(data), &n))
+ geog, ok := n.Type.(iceberg.GeographyType)
+ require.True(t, ok)
+ assert.Equal(t, "OGC:CRS84", geog.CRS())
+ assert.Equal(t, "spherical", geog.Algorithm())
+
+ out, err := json.Marshal(n)
+ require.NoError(t, err)
+ assert.JSONEq(t, data, string(out))
+ })
+
+ t.Run("JSON parsing - custom CRS", func(t *testing.T) {
+ data := `{"id": 1, "name": "area", "type":
"geography(srid:4269)", "required": false}`
+ var n iceberg.NestedField
+ require.NoError(t, json.Unmarshal([]byte(data), &n))
+ geog, ok := n.Type.(iceberg.GeographyType)
+ require.True(t, ok)
+ assert.Equal(t, "srid:4269", geog.CRS())
+ assert.Equal(t, "spherical", geog.Algorithm())
+
+ out, err := json.Marshal(n)
+ require.NoError(t, err)
+ assert.JSONEq(t, data, string(out))
+ })
+
+ t.Run("JSON parsing - custom CRS with algorithm", func(t *testing.T) {
+ data := `{"id": 1, "name": "area", "type":
"geography(srid:4269, karney)", "required": false}`
+ var n iceberg.NestedField
+ require.NoError(t, json.Unmarshal([]byte(data), &n))
+ geog, ok := n.Type.(iceberg.GeographyType)
+ require.True(t, ok)
+ assert.Equal(t, "srid:4269", geog.CRS())
+ assert.Equal(t, "karney", geog.Algorithm())
+
+ out, err := json.Marshal(n)
+ require.NoError(t, err)
+ assert.JSONEq(t, data, string(out))
+ })
+
+ t.Run("JSON parsing - case insensitive", func(t *testing.T) {
+ data := `{"id": 1, "name": "area", "type":
"Geography(SRID:4269, KARNEY)", "required": false}`
+ var n iceberg.NestedField
+ require.NoError(t, json.Unmarshal([]byte(data), &n))
+ geog, ok := n.Type.(iceberg.GeographyType)
+ require.True(t, ok)
+ assert.Equal(t, "SRID:4269", geog.CRS())
+ assert.Equal(t, "karney", geog.Algorithm())
+ })
+
+ t.Run("JSON parsing - whitespace tolerance", func(t *testing.T) {
+ data := `{"id": 1, "name": "area", "type": "geography(
srid:4269 , karney )", "required": false}`
+ var n iceberg.NestedField
+ require.NoError(t, json.Unmarshal([]byte(data), &n))
+ geog, ok := n.Type.(iceberg.GeographyType)
+ require.True(t, ok)
+ assert.Equal(t, "srid:4269", geog.CRS())
+ assert.Equal(t, "karney", geog.Algorithm())
+ })
+
+ t.Run("JSON parsing - invalid algorithm", func(t *testing.T) {
+ data := `{"id": 1, "name": "area", "type":
"geography(srid:4269, invalid)", "required": false}`
+ var n iceberg.NestedField
+ err := json.Unmarshal([]byte(data), &n)
+ assert.ErrorContains(t, err, "invalid edge interpolation
algorithm")
+ })
+
+ t.Run("rejects missing comma between CRS and algorithm", func(t
*testing.T) {
+ data := `{"id": 1, "name": "area", "type": "geography(srid:4269
karney)", "required": false}`
+ var n iceberg.NestedField
+ err := json.Unmarshal([]byte(data), &n)
+ assert.ErrorIs(t, err, iceberg.ErrInvalidTypeString)
+ })
+}
diff --git a/visitors.go b/visitors.go
index 7bdcee8f..84f93855 100644
--- a/visitors.go
+++ b/visitors.go
@@ -320,7 +320,7 @@ func doCmp(st StructLike, term BoundTerm, lit Literal) int {
return typedCmp[Time](st, term, lit)
case TimestampType, TimestampTzType:
return typedCmp[Timestamp](st, term, lit)
- case BinaryType, FixedType:
+ case BinaryType, FixedType, GeographyType, GeometryType:
return typedCmp[[]byte](st, term, lit)
case StringType:
return typedCmp[string](st, term, lit)