This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-go.git
The following commit(s) were added to refs/heads/main by this push:
new 564c5e67 fix(table): prevent index out of range error in
buildManifestEvaluator (#692)
564c5e67 is described below
commit 564c5e67be88b9b78994a624acc89fe7569a7fa5
Author: Eren Dursun <[email protected]>
AuthorDate: Wed Jan 28 22:34:21 2026 +0100
fix(table): prevent index out of range error in buildManifestEvaluator
(#692)
fixes #691
Previously, the buildManifestEvaluator method directly accessed the
PartitionSpecs array by index without validating that the requested
specID actually exists in the partition specs. This could cause an index
out of bounds error.
This change adds a proper lookup using slices.IndexFunc to find the
partition spec by its ID and returns an error if the spec is not found,
improving error handling and robustness.
---
table/metadata.go | 16 +++++-
table/metadata_builder_internal_test.go | 3 +-
table/scanner.go | 35 +++++++-----
table/scanner_internal_test.go | 96 +++++++++++++++++++++++++++++++++
table/update_spec.go | 6 +--
5 files changed, 138 insertions(+), 18 deletions(-)
diff --git a/table/metadata.go b/table/metadata.go
index 794c58f8..92b976ca 100644
--- a/table/metadata.go
+++ b/table/metadata.go
@@ -92,6 +92,9 @@ type Metadata interface {
PartitionSpecs() []iceberg.PartitionSpec
// PartitionSpec returns the current partition spec that the table is
using.
PartitionSpec() iceberg.PartitionSpec
+ // PartitionSpecByID returns the partition spec with the given ID.
Returns
+ // nil if the ID is not found in the list of partition specs.
+ PartitionSpecByID(int) *iceberg.PartitionSpec
// DefaultPartitionSpec is the ID of the current spec that writers
should
// use by default.
DefaultPartitionSpec() int
@@ -925,7 +928,7 @@ func (b *MetadataBuilder) GetSpecByID(id int)
(*iceberg.PartitionSpec, error) {
}
}
- return nil, fmt.Errorf("partition spec with id %d not found", id)
+ return nil, fmt.Errorf("%w: id %d", ErrPartitionSpecNotFound, id)
}
func (b *MetadataBuilder) GetSortOrderByID(id int) (*SortOrder, error) {
@@ -1155,6 +1158,7 @@ func maxBy[S ~[]E, E any](elems S, extract func(e E) int)
int {
var (
ErrInvalidMetadataFormatVersion = errors.New("invalid or missing
format-version in table metadata")
ErrInvalidMetadata = errors.New("invalid metadata")
+ ErrPartitionSpecNotFound = errors.New("partition spec not found")
)
// ParseMetadata parses json metadata provided by the passed in reader,
@@ -1323,6 +1327,16 @@ func (c *commonMetadata) PartitionSpec()
iceberg.PartitionSpec {
return *iceberg.UnpartitionedSpec
}
+func (c *commonMetadata) PartitionSpecByID(id int) *iceberg.PartitionSpec {
+ for _, s := range c.Specs {
+ if s.ID() == id {
+ return &s
+ }
+ }
+
+ return nil
+}
+
func (c *commonMetadata) LastPartitionSpecID() *int { return c.LastPartitionID
}
func (c *commonMetadata) Snapshots() []Snapshot { return c.SnapshotList }
func (c *commonMetadata) SnapshotByID(id int64) *Snapshot {
diff --git a/table/metadata_builder_internal_test.go
b/table/metadata_builder_internal_test.go
index 7840497a..6c9fed32 100644
--- a/table/metadata_builder_internal_test.go
+++ b/table/metadata_builder_internal_test.go
@@ -308,7 +308,8 @@ func TestAddRemovePartitionSpec(t *testing.T) {
require.Len(t, newBuilder.updates, 1)
require.Len(t, newBuild.PartitionSpecs(), 1)
_, err = newBuilder.GetSpecByID(1)
- require.ErrorContains(t, err, "partition spec with id 1 not found")
+ require.ErrorIs(t, err, ErrPartitionSpecNotFound)
+ require.ErrorContains(t, err, "id 1")
}
func TestSetDefaultPartitionSpec(t *testing.T) {
diff --git a/table/scanner.go b/table/scanner.go
index ee12bf9e..1bc9d005 100644
--- a/table/scanner.go
+++ b/table/scanner.go
@@ -242,33 +242,42 @@ func (scan *Scan) Projection() (*iceberg.Schema, error) {
}
func (scan *Scan) buildPartitionProjection(specID int)
(iceberg.BooleanExpression, error) {
- project := newInclusiveProjection(scan.metadata.CurrentSchema(),
- scan.metadata.PartitionSpecs()[specID], true)
+ spec := scan.metadata.PartitionSpecByID(specID)
+ if spec == nil {
+ return nil, fmt.Errorf("%w: id %d", ErrPartitionSpecNotFound,
specID)
+ }
+ project := newInclusiveProjection(scan.metadata.CurrentSchema(), *spec,
true)
return project(scan.rowFilter)
}
func (scan *Scan) buildManifestEvaluator(specID int)
(func(iceberg.ManifestFile) (bool, error), error) {
- spec := scan.metadata.PartitionSpecs()[specID]
+ spec := scan.metadata.PartitionSpecByID(specID)
+ if spec == nil {
+ return nil, fmt.Errorf("%w: id %d", ErrPartitionSpecNotFound,
specID)
+ }
- return newManifestEvaluator(spec, scan.metadata.CurrentSchema(),
+ return newManifestEvaluator(*spec, scan.metadata.CurrentSchema(),
scan.partitionFilters.Get(specID), scan.caseSensitive)
}
-func (scan *Scan) buildPartitionEvaluator(specID int) func(iceberg.DataFile)
(bool, error) {
- spec := scan.metadata.PartitionSpecs()[specID]
+func (scan *Scan) buildPartitionEvaluator(specID int) (func(iceberg.DataFile)
(bool, error), error) {
+ spec := scan.metadata.PartitionSpecByID(specID)
+ if spec == nil {
+ return nil, fmt.Errorf("%w: id %d", ErrPartitionSpecNotFound,
specID)
+ }
partType := spec.PartitionType(scan.metadata.CurrentSchema())
partSchema := iceberg.NewSchema(0, partType.FieldList...)
partExpr := scan.partitionFilters.Get(specID)
- return func(d iceberg.DataFile) (bool, error) {
- fn, err := iceberg.ExpressionEvaluator(partSchema, partExpr,
scan.caseSensitive)
- if err != nil {
- return false, err
- }
+ fn, err := iceberg.ExpressionEvaluator(partSchema, partExpr,
scan.caseSensitive)
+ if err != nil {
+ return nil, err
+ }
+ return func(d iceberg.DataFile) (bool, error) {
return fn(getPartitionRecord(d, partType))
- }
+ }, nil
}
func (scan *Scan) checkSequenceNumber(minSeqNum int64, manifest
iceberg.ManifestFile) bool {
@@ -370,7 +379,7 @@ func (scan *Scan) collectManifestEntries(
g, _ := errgroup.WithContext(ctx)
g.SetLimit(concurrencyLimit)
- partitionEvaluators := newKeyDefaultMap(scan.buildPartitionEvaluator)
+ partitionEvaluators :=
newKeyDefaultMapWrapErr(scan.buildPartitionEvaluator)
for _, mf := range manifestList {
if !scan.checkSequenceNumber(minSeqNum, mf) {
diff --git a/table/scanner_internal_test.go b/table/scanner_internal_test.go
index af08b687..07ca9700 100644
--- a/table/scanner_internal_test.go
+++ b/table/scanner_internal_test.go
@@ -25,6 +25,7 @@ import (
"github.com/apache/iceberg-go"
"github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
)
func newDataManifest(minSeqNum int64) iceberg.ManifestFile {
@@ -135,3 +136,98 @@ func TestKeyDefaultMapRaceCondition(t *testing.T) {
assert.Equal(t, int64(1), callCount,
"factory should be called exactly once per key, but was called
%d times", callCount)
}
+
+func TestBuildPartitionProjectionWithInvalidSpecID(t *testing.T) {
+ schema := iceberg.NewSchema(
+ 1,
+ iceberg.NestedField{
+ ID: 1, Name: "id",
+ Type: iceberg.PrimitiveTypes.Int64, Required: true,
+ },
+ )
+
+ metadata, err := NewMetadata(
+ schema,
+ iceberg.UnpartitionedSpec,
+ UnsortedSortOrder,
+ "s3://test-bucket/test_table",
+ iceberg.Properties{},
+ )
+ require.NoError(t, err)
+
+ scan := &Scan{
+ metadata: metadata,
+ rowFilter: iceberg.AlwaysTrue{},
+ caseSensitive: true,
+ }
+
+ expr, err := scan.buildPartitionProjection(999)
+ require.Error(t, err)
+ assert.Nil(t, expr)
+ assert.ErrorIs(t, err, ErrPartitionSpecNotFound)
+ assert.ErrorContains(t, err, "id 999")
+}
+
+func TestBuildManifestEvaluatorWithInvalidSpecID(t *testing.T) {
+ schema := iceberg.NewSchema(
+ 1,
+ iceberg.NestedField{
+ ID: 1, Name: "id",
+ Type: iceberg.PrimitiveTypes.Int64, Required: true,
+ },
+ )
+
+ metadata, err := NewMetadata(
+ schema,
+ iceberg.UnpartitionedSpec,
+ UnsortedSortOrder,
+ "s3://test-bucket/test_table",
+ iceberg.Properties{},
+ )
+ require.NoError(t, err)
+
+ scan := &Scan{
+ metadata: metadata,
+ rowFilter: iceberg.AlwaysTrue{},
+ caseSensitive: true,
+ }
+
+ scan.partitionFilters =
newKeyDefaultMapWrapErr(scan.buildPartitionProjection)
+
+ evaluator, err := scan.buildManifestEvaluator(999)
+ require.Error(t, err)
+ assert.Nil(t, evaluator)
+ assert.ErrorIs(t, err, ErrPartitionSpecNotFound)
+ assert.ErrorContains(t, err, "id 999")
+}
+
+func TestBuildPartitionEvaluatorWithInvalidSpecID(t *testing.T) {
+ schema := iceberg.NewSchema(
+ 1,
+ iceberg.NestedField{
+ ID: 1, Name: "id",
+ Type: iceberg.PrimitiveTypes.Int64, Required: true,
+ },
+ )
+
+ metadata, err := NewMetadata(
+ schema,
+ iceberg.UnpartitionedSpec,
+ UnsortedSortOrder,
+ "s3://test-bucket/test_table",
+ iceberg.Properties{},
+ )
+ require.NoError(t, err)
+
+ scan := &Scan{
+ metadata: metadata,
+ rowFilter: iceberg.AlwaysTrue{},
+ caseSensitive: true,
+ }
+
+ evaluator, err := scan.buildPartitionEvaluator(999)
+ require.Error(t, err)
+ assert.Nil(t, evaluator)
+ assert.ErrorIs(t, err, ErrPartitionSpecNotFound)
+ assert.ErrorContains(t, err, "id 999")
+}
diff --git a/table/update_spec.go b/table/update_spec.go
index 9447761d..a5493247 100644
--- a/table/update_spec.go
+++ b/table/update_spec.go
@@ -405,7 +405,7 @@ func (us *UpdateSpec) addNewField(schema *iceberg.Schema,
sourceId int, fieldId
}
func (us *UpdateSpec) isNewPartitionSpec(newSpecId int) bool {
- return !slices.ContainsFunc(us.txn.tbl.Metadata().PartitionSpecs(),
func(s iceberg.PartitionSpec) bool {
- return s.ID() == newSpecId
- })
+ spec := us.txn.tbl.Metadata().PartitionSpecByID(newSpecId)
+
+ return spec == nil
}