This is an automated email from the ASF dual-hosted git repository.
fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-go.git
The following commit(s) were added to refs/heads/main by this push:
new f5a9d5e feat(table): Simple location provider (#319)
f5a9d5e is described below
commit f5a9d5ed7abd6ec2704399e2a801a90aa25f5864
Author: Matt Topol <[email protected]>
AuthorDate: Fri Mar 7 07:50:41 2025 -0500
feat(table): Simple location provider (#319)
Add the location provider interface and simple implementation
---
table/locations.go | 118 ++++++++++++++++++++++++++++++++++++++++++++++++
table/locations_test.go | 76 +++++++++++++++++++++++++++++++
table/properties.go | 27 +++++++++++
table/table.go | 13 ++++--
4 files changed, 229 insertions(+), 5 deletions(-)
diff --git a/table/locations.go b/table/locations.go
new file mode 100644
index 0000000..a95ef78
--- /dev/null
+++ b/table/locations.go
@@ -0,0 +1,118 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package table
+
+import (
+ "fmt"
+ "net/url"
+
+ "github.com/apache/iceberg-go"
+ "github.com/google/uuid"
+)
+
+type LocationProvider interface {
+ NewTableMetadataFileLocation(newVersion int) (string, error)
+ NewMetadataLocation(metadataFileName string) string
+}
+
+type simpleLocationProvider struct {
+ tableLoc *url.URL
+ tableProps iceberg.Properties
+ dataPath *url.URL
+ metadataPath *url.URL
+}
+
+func (slp *simpleLocationProvider) NewTableMetadataFileLocation(newVersion
int) (string, error) {
+ if newVersion < 0 {
+ return "", fmt.Errorf("%w: table metadata version %d must be a
non-negative integer",
+ iceberg.ErrInvalidArgument, newVersion)
+ }
+
+ newUUID, err := uuid.NewRandom()
+ if err != nil {
+ return "", err
+ }
+
+ fname := fmt.Sprintf("%05d-%s.metadata.json", newVersion, newUUID)
+
+ return slp.NewMetadataLocation(fname), nil
+}
+
+func (slp *simpleLocationProvider) NewMetadataLocation(metadataFileName
string) string {
+ return slp.metadataPath.JoinPath(metadataFileName).String()
+}
+
+func newSimpleLocationProvider(tableLoc *url.URL, tableProps
iceberg.Properties) (*simpleLocationProvider, error) {
+ out := &simpleLocationProvider{
+ tableLoc: tableLoc,
+ tableProps: tableProps,
+ }
+
+ var err error
+ if propPath, ok := tableProps[WriteDataPathKey]; ok {
+ out.dataPath, err = url.Parse(propPath)
+ if err != nil {
+ return nil, err
+ }
+ } else {
+ out.dataPath = out.tableLoc.JoinPath("data")
+ }
+
+ if propPath, ok := tableProps[WriteMetadataPathKey]; ok {
+ out.metadataPath, err = url.Parse(propPath)
+ if err != nil {
+ return nil, err
+ }
+ } else {
+ out.metadataPath = out.tableLoc.JoinPath("metadata")
+ }
+
+ return out, nil
+}
+
+type objectStoreLocationProvider struct {
+ *simpleLocationProvider
+
+ includePartitionPaths bool
+}
+
+func newObjectStoreLocationProvider(tableLoc *url.URL, tableProps
iceberg.Properties) (*objectStoreLocationProvider, error) {
+ slp, err := newSimpleLocationProvider(tableLoc, tableProps)
+ if err != nil {
+ return nil, err
+ }
+
+ return &objectStoreLocationProvider{
+ simpleLocationProvider: slp,
+ includePartitionPaths:
tableProps.GetBool(WriteObjectStorePartitionedPathsKey,
+ WriteObjectStorePartitionedPathsDefault),
+ }, nil
+}
+
+func LoadLocationProvider(tableLocation string, tableProps iceberg.Properties)
(LocationProvider, error) {
+ u, err := url.Parse(tableLocation)
+ if err != nil {
+ return nil, err
+ }
+
+ if tableProps.GetBool(ObjectStoreEnabledKey, ObjectStoreEnabledDefault)
{
+ return newObjectStoreLocationProvider(u, tableProps)
+ }
+
+ return newSimpleLocationProvider(u, tableProps)
+}
diff --git a/table/locations_test.go b/table/locations_test.go
new file mode 100644
index 0000000..2ef2e53
--- /dev/null
+++ b/table/locations_test.go
@@ -0,0 +1,76 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package table_test
+
+import (
+ "strings"
+ "testing"
+
+ "github.com/apache/iceberg-go"
+ "github.com/apache/iceberg-go/table"
+ "github.com/google/uuid"
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+)
+
+func TestLocationProviderMetadataDefaultLocation(t *testing.T) {
+ provider, err := table.LoadLocationProvider("table_location", nil)
+ require.NoError(t, err)
+ assert.Equal(t, "table_location/metadata/manifest.avro",
provider.NewMetadataLocation("manifest.avro"))
+}
+
+func TestLocationProviderMetadataLocationCustomPath(t *testing.T) {
+ provider, err := table.LoadLocationProvider("table_location",
+ iceberg.Properties{table.WriteMetadataPathKey:
"s3://table-location/custom/path"})
+ require.NoError(t, err)
+
+ assert.Equal(t, "s3://table-location/custom/path/metadata.json",
provider.NewMetadataLocation("metadata.json"))
+}
+
+func TestLocationProviderMetadataLocationTrailingSlash(t *testing.T) {
+ provider, err := table.LoadLocationProvider("table_location",
+ iceberg.Properties{table.WriteMetadataPathKey:
"s3://table-location/custom/path/"})
+ require.NoError(t, err)
+
+ assert.Equal(t, "s3://table-location/custom/path/metadata.json",
provider.NewMetadataLocation("metadata.json"))
+}
+
+func TestLocationProviderMetadataFileLocation(t *testing.T) {
+ uuid.SetRand(strings.NewReader("0123456789abcdefghijkl"))
+ defer uuid.SetRand(nil)
+
+ provider, err := table.LoadLocationProvider("table_location", nil)
+ require.NoError(t, err)
+
+ loc, err := provider.NewTableMetadataFileLocation(1)
+ require.NoError(t, err)
+ assert.Equal(t,
"table_location/metadata/00001-30313233-3435-4637-b839-616263646566.metadata.json",
loc)
+}
+
+func TestLocationProviderMetadataFileLocationCustomPath(t *testing.T) {
+ uuid.SetRand(strings.NewReader("0123456789abcdefghijkl"))
+ defer uuid.SetRand(nil)
+
+ provider, err := table.LoadLocationProvider("table_location",
+ iceberg.Properties{table.WriteMetadataPathKey:
"s3://table-location/custom/path/"})
+ require.NoError(t, err)
+
+ loc, err := provider.NewTableMetadataFileLocation(1)
+ require.NoError(t, err)
+ assert.Equal(t,
"s3://table-location/custom/path/00001-30313233-3435-4637-b839-616263646566.metadata.json",
loc)
+}
diff --git a/table/properties.go b/table/properties.go
new file mode 100644
index 0000000..6c1d688
--- /dev/null
+++ b/table/properties.go
@@ -0,0 +1,27 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package table
+
+const (
+ WriteDataPathKey = "write.data.path"
+ WriteMetadataPathKey = "write.metadata.path"
+ WriteObjectStorePartitionedPathsKey =
"write.object-storage.partitioned-paths"
+ WriteObjectStorePartitionedPathsDefault = true
+ ObjectStoreEnabledKey = "write.object-storage.enabled"
+ ObjectStoreEnabledDefault = false
+)
diff --git a/table/table.go b/table/table.go
index f8eb3b2..b6b7786 100644
--- a/table/table.go
+++ b/table/table.go
@@ -47,11 +47,10 @@ func (t Table) Equals(other Table) bool {
t.metadata.Equals(other.metadata)
}
-func (t Table) Identifier() Identifier { return t.identifier }
-func (t Table) Metadata() Metadata { return t.metadata }
-func (t Table) MetadataLocation() string { return t.metadataLocation }
-func (t Table) FS() io.IO { return t.fs }
-
+func (t Table) Identifier() Identifier { return t.identifier }
+func (t Table) Metadata() Metadata { return t.metadata }
+func (t Table) MetadataLocation() string { return
t.metadataLocation }
+func (t Table) FS() io.IO { return t.fs }
func (t Table) Schema() *iceberg.Schema { return
t.metadata.CurrentSchema() }
func (t Table) Spec() iceberg.PartitionSpec { return
t.metadata.PartitionSpec() }
func (t Table) SortOrder() SortOrder { return
t.metadata.SortOrder() }
@@ -69,6 +68,10 @@ func (t Table) Schemas() map[int]*iceberg.Schema {
return m
}
+func (t Table) LocationProvider() (LocationProvider, error) {
+ return LoadLocationProvider(t.metadataLocation, t.metadata.Properties())
+}
+
type ScanOption func(*Scan)
func noopOption(*Scan) {}