This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-go.git
The following commit(s) were added to refs/heads/main by this push:
new 55bdfbf8 feat(table): export StructLike and GetPartitionRecord (#791)
55bdfbf8 is described below
commit 55bdfbf836359142f7c010b0b80f0b7e06af2dca
Author: Tobias Pütz <[email protected]>
AuthorDate: Tue Mar 17 18:18:27 2026 +0100
feat(table): export StructLike and GetPartitionRecord (#791)
PartitionRecord and GetPartitionRecord are the only way to go from a
DataFile to something PartitionToPath accepts. Keeping them unexported
forces external consumers to reimplement the same conversion.
Also deduplicates the manual record construction in the pos delete
writer.
---
exprs.go | 16 ++++++++--------
partitions.go | 2 +-
table/partitioned_fanout_writer_test.go | 2 +-
table/pos_delete_partitioned_fanout_writer.go | 9 +--------
table/scanner.go | 12 ++++++++----
table/snapshots.go | 4 ++--
utils.go | 8 ++++----
visitors.go | 10 +++++-----
8 files changed, 30 insertions(+), 33 deletions(-)
diff --git a/exprs.go b/exprs.go
index 68ced28c..ed868d4f 100644
--- a/exprs.go
+++ b/exprs.go
@@ -338,8 +338,8 @@ type BoundTerm interface {
Ref() BoundReference
Type() Type
- evalToLiteral(structLike) Optional[Literal]
- evalIsNull(structLike) bool
+ evalToLiteral(StructLike) Optional[Literal]
+ evalIsNull(StructLike) bool
}
// unbound is a generic interface representing something that is not yet bound
@@ -485,7 +485,7 @@ func (b *boundRef[T]) Ref() BoundReference { return b }
func (b *boundRef[T]) Field() NestedField { return b.field }
func (b *boundRef[T]) Type() Type { return b.field.Type }
-func (b *boundRef[T]) eval(st structLike) Optional[T] {
+func (b *boundRef[T]) eval(st StructLike) Optional[T] {
switch v := b.acc.Get(st).(type) {
case nil:
return Optional[T]{}
@@ -506,7 +506,7 @@ func (b *boundRef[T]) eval(st structLike) Optional[T] {
}
}
-func (b *boundRef[T]) evalToLiteral(st structLike) Optional[Literal] {
+func (b *boundRef[T]) evalToLiteral(st StructLike) Optional[Literal] {
v := b.eval(st)
if !v.Valid {
return Optional[Literal]{}
@@ -520,7 +520,7 @@ func (b *boundRef[T]) evalToLiteral(st structLike)
Optional[Literal] {
return Optional[Literal]{Val: lit, Valid: true}
}
-func (b *boundRef[T]) evalIsNull(st structLike) bool {
+func (b *boundRef[T]) evalIsNull(st StructLike) bool {
v := b.eval(st)
return !v.Valid
@@ -605,7 +605,7 @@ type BoundUnaryPredicate interface {
type bound[T LiteralType] interface {
BoundTerm
- eval(structLike) Optional[T]
+ eval(StructLike) Optional[T]
}
func newBoundUnaryPred[T LiteralType](op Operation, term BoundTerm)
BoundUnaryPredicate {
@@ -1048,10 +1048,10 @@ func (b *BoundTransform) Equals(other BoundTerm) bool {
return b.transform.Equals(rhs.transform) && b.term.Equals(rhs.term)
}
-func (b *BoundTransform) evalToLiteral(st structLike) Optional[Literal] {
+func (b *BoundTransform) evalToLiteral(st StructLike) Optional[Literal] {
return b.transform.Apply(b.term.evalToLiteral(st))
}
-func (b *BoundTransform) evalIsNull(st structLike) bool {
+func (b *BoundTransform) evalIsNull(st StructLike) bool {
return !b.evalToLiteral(st).Valid
}
diff --git a/partitions.go b/partitions.go
index bd556cdd..0b92dae6 100644
--- a/partitions.go
+++ b/partitions.go
@@ -479,7 +479,7 @@ func (ps *PartitionSpec) PartitionType(schema *Schema)
*StructType {
//
// This does not apply the transforms to the data, it is assumed the provided
data
// has already been transformed appropriately.
-func (ps *PartitionSpec) PartitionToPath(data structLike, sc *Schema) string {
+func (ps *PartitionSpec) PartitionToPath(data StructLike, sc *Schema) string {
partType := ps.PartitionType(sc)
if len(partType.FieldList) == 0 {
diff --git a/table/partitioned_fanout_writer_test.go
b/table/partitioned_fanout_writer_test.go
index 9c95885c..57495094 100644
--- a/table/partitioned_fanout_writer_test.go
+++ b/table/partitioned_fanout_writer_test.go
@@ -151,7 +151,7 @@ func (s *FanoutWriterTestSuite)
testTransformPartition(transform iceberg.Transfo
fileCount++
totalRecords += dataFile.Count()
- partitionRec := getPartitionRecord(dataFile,
spec.PartitionType(icebergSchema))
+ partitionRec := GetPartitionRecord(dataFile,
spec.PartitionType(icebergSchema))
partitionPath := spec.PartitionToPath(partitionRec,
icebergSchema)
partitionPaths[partitionPath] += dataFile.Count()
}
diff --git a/table/pos_delete_partitioned_fanout_writer.go
b/table/pos_delete_partitioned_fanout_writer.go
index f03c7019..fe54dba9 100644
--- a/table/pos_delete_partitioned_fanout_writer.go
+++ b/table/pos_delete_partitioned_fanout_writer.go
@@ -135,14 +135,7 @@ func (p *positionDeletePartitionedFanoutWriter)
partitionPath(partitionContext p
return "", fmt.Errorf("unexpected missing partition spec in
metadata for spec id %d", partitionContext.specID)
}
- data := make(partitionRecord, spec.NumFields())
- for i, field := range spec.Fields() {
- val, ok := partitionContext.partitionData[field.FieldID]
- if !ok {
- return "", fmt.Errorf("unexpected missing partition
value for field id %d in spec id %d", field.FieldID, partitionContext.specID)
- }
- data[i] = val
- }
+ data := newPartitionRecord(partitionContext.partitionData,
spec.PartitionType(p.schema))
return spec.PartitionToPath(data, p.schema), nil
}
diff --git a/table/scanner.go b/table/scanner.go
index 0d1a0dbd..fde1fc52 100644
--- a/table/scanner.go
+++ b/table/scanner.go
@@ -119,9 +119,7 @@ func (m *manifestEntries) addPositionalDeleteEntry(e
iceberg.ManifestEntry) {
m.positionalDeleteEntries = append(m.positionalDeleteEntries, e)
}
-func getPartitionRecord(dataFile iceberg.DataFile, partitionType
*iceberg.StructType) partitionRecord {
- partitionData := dataFile.Partition()
-
+func newPartitionRecord(partitionData map[int]any, partitionType
*iceberg.StructType) partitionRecord {
out := make(partitionRecord, len(partitionType.FieldList))
for i, f := range partitionType.FieldList {
out[i] = partitionData[f.ID]
@@ -130,6 +128,12 @@ func getPartitionRecord(dataFile iceberg.DataFile,
partitionType *iceberg.Struct
return out
}
+// GetPartitionRecord converts a DataFile's partition map into a positional
+// record ordered by the fields of the given partition struct type.
+func GetPartitionRecord(dataFile iceberg.DataFile, partitionType
*iceberg.StructType) iceberg.StructLike {
+ return newPartitionRecord(dataFile.Partition(), partitionType)
+}
+
func openManifest(io io.IO, manifest iceberg.ManifestFile,
partitionFilter, metricsEval func(iceberg.DataFile) (bool, error),
) ([]iceberg.ManifestEntry, error) {
@@ -284,7 +288,7 @@ func (scan *Scan) buildPartitionEvaluator(specID int)
(func(iceberg.DataFile) (b
}
return func(d iceberg.DataFile) (bool, error) {
- return fn(getPartitionRecord(d, partType))
+ return fn(GetPartitionRecord(d, partType))
}, nil
}
diff --git a/table/snapshots.go b/table/snapshots.go
index fbdd47de..1ae60e52 100644
--- a/table/snapshots.go
+++ b/table/snapshots.go
@@ -413,7 +413,7 @@ func (s *SnapshotSummaryCollector) addFile(df
iceberg.DataFile, sc *iceberg.Sche
if len(df.Partition()) > 0 {
partitionPath := spec.PartitionToPath(
- getPartitionRecord(df, spec.PartitionType(sc)), sc)
+ GetPartitionRecord(df, spec.PartitionType(sc)), sc)
return s.updatePartitionMetrics(partitionPath, df, true)
}
@@ -428,7 +428,7 @@ func (s *SnapshotSummaryCollector) removeFile(df
iceberg.DataFile, sc *iceberg.S
if len(df.Partition()) > 0 {
partitionPath := spec.PartitionToPath(
- getPartitionRecord(df, spec.PartitionType(sc)), sc)
+ GetPartitionRecord(df, spec.PartitionType(sc)), sc)
return s.updatePartitionMetrics(partitionPath, df, false)
}
diff --git a/utils.go b/utils.go
index a1dec28b..36a798db 100644
--- a/utils.go
+++ b/utils.go
@@ -43,8 +43,8 @@ type Optional[T any] struct {
Valid bool
}
-// represents a single row in a record
-type structLike interface {
+// StructLike represents a single row in a record.
+type StructLike interface {
// Size returns the number of columns in this row
Size() int
// Get returns the value in the requested column,
@@ -64,11 +64,11 @@ func (a *accessor) String() string {
return fmt.Sprintf("Accessor(position=%d, inner=%s)", a.pos, a.inner)
}
-func (a *accessor) Get(s structLike) any {
+func (a *accessor) Get(s StructLike) any {
val, inner := s.Get(a.pos), a
for val != nil && inner.inner != nil {
inner = inner.inner
- val = val.(structLike).Get(inner.pos)
+ val = val.(StructLike).Get(inner.pos)
}
return val
diff --git a/visitors.go b/visitors.go
index 3debc2ea..9564a4ea 100644
--- a/visitors.go
+++ b/visitors.go
@@ -186,7 +186,7 @@ func (*bindVisitor) VisitBound(pred BoundPredicate)
BooleanExpression {
// ExpressionEvaluator returns a function which can be used to evaluate a
given expression
// as long as a structlike value is passed which operates like and matches the
passed in
// schema.
-func ExpressionEvaluator(s *Schema, unbound BooleanExpression, caseSensitive
bool) (func(structLike) (bool, error), error) {
+func ExpressionEvaluator(s *Schema, unbound BooleanExpression, caseSensitive
bool) (func(StructLike) (bool, error), error) {
bound, err := BindExpr(s, unbound, caseSensitive)
if err != nil {
return nil, err
@@ -197,10 +197,10 @@ func ExpressionEvaluator(s *Schema, unbound
BooleanExpression, caseSensitive boo
type exprEvaluator struct {
bound BooleanExpression
- st structLike
+ st StructLike
}
-func (e *exprEvaluator) Eval(st structLike) (bool, error) {
+func (e *exprEvaluator) Eval(st StructLike) (bool, error) {
e.st = st
return VisitExpr(e.bound, e)
@@ -283,7 +283,7 @@ func nullsFirstCmp[T LiteralType](cmp Comparator[T], v1, v2
Optional[T]) int {
return cmp(v1.Val, v2.Val)
}
-func typedCmp[T LiteralType](st structLike, term BoundTerm, lit Literal) int {
+func typedCmp[T LiteralType](st StructLike, term BoundTerm, lit Literal) int {
v := term.(bound[T]).eval(st)
var l Optional[T]
@@ -296,7 +296,7 @@ func typedCmp[T LiteralType](st structLike, term BoundTerm,
lit Literal) int {
return nullsFirstCmp(rhs.Comparator(), v, l)
}
-func doCmp(st structLike, term BoundTerm, lit Literal) int {
+func doCmp(st StructLike, term BoundTerm, lit Literal) int {
// we already properly casted and converted everything during binding
// so we can type assert based on the term type
switch term.Type().(type) {