Re: [PR] feat(table): clustered partitioned write path [iceberg-go]

via GitHub Thu, 30 Apr 2026 01:30:33 -0700


twuebi commented on code in PR #948:
URL: https://github.com/apache/iceberg-go/pull/948#discussion_r3166614733



##########
table/clustered_writer.go:
##########
@@ -0,0 +1,246 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package table
+
+import (
+       "cmp"
+       "context"
+       "errors"
+       "fmt"
+       "iter"
+       "slices"
+
+       "github.com/apache/arrow-go/v18/arrow"
+       "github.com/apache/iceberg-go"
+)
+
+// clusteredPartitionedWrite writes records to partitioned data files,
+// keeping at most one partition writer open at a time. When the
+// partition changes, the current writer is closed before opening a
+// new one. This is the memory-efficient write path for pre-clustered
+// input (e.g. compaction reads where each source file belongs to a
+// single partition).
+//
+// The input must be clustered by partition across batches: once a
+// partition's writer has been closed, encountering further records
+// for that partition returns an error. Within a single batch the
+// writer reclusters rows by partition. Use the fanout writer if the
+// input is not clustered across batches.
+//
+// Breaking out of the returned iterator early cancels the producer
+// so it stops opening new writers; in-flight writers finish cleanly.
+func clusteredPartitionedWrite(
+       ctx context.Context,
+       spec iceberg.PartitionSpec,
+       schema *iceberg.Schema,
+       factory *writerFactory,
+       records iter.Seq2[arrow.RecordBatch, error],
+) iter.Seq2[iceberg.DataFile, error] {
+       ctx, cancel := context.WithCancel(ctx)
+
+       outputCh := make(chan iceberg.DataFile, 1)
+       errCh := make(chan error, 1)

Review Comment:
   increased to 16



##########
table/clustered_writer.go:
##########
@@ -0,0 +1,246 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package table
+
+import (
+       "cmp"
+       "context"
+       "errors"
+       "fmt"
+       "iter"
+       "slices"
+
+       "github.com/apache/arrow-go/v18/arrow"
+       "github.com/apache/iceberg-go"
+)
+
+// clusteredPartitionedWrite writes records to partitioned data files,
+// keeping at most one partition writer open at a time. When the
+// partition changes, the current writer is closed before opening a
+// new one. This is the memory-efficient write path for pre-clustered
+// input (e.g. compaction reads where each source file belongs to a
+// single partition).
+//
+// The input must be clustered by partition across batches: once a
+// partition's writer has been closed, encountering further records
+// for that partition returns an error. Within a single batch the
+// writer reclusters rows by partition. Use the fanout writer if the
+// input is not clustered across batches.
+//
+// Breaking out of the returned iterator early cancels the producer
+// so it stops opening new writers; in-flight writers finish cleanly.
+func clusteredPartitionedWrite(
+       ctx context.Context,
+       spec iceberg.PartitionSpec,
+       schema *iceberg.Schema,
+       factory *writerFactory,
+       records iter.Seq2[arrow.RecordBatch, error],
+) iter.Seq2[iceberg.DataFile, error] {
+       ctx, cancel := context.WithCancel(ctx)
+
+       outputCh := make(chan iceberg.DataFile, 1)
+       errCh := make(chan error, 1)
+
+       go func() {
+               defer close(outputCh)
+               defer close(errCh)
+               defer factory.stopCount()
+
+               var (
+                       currentRec          partitionRecord
+                       currentWriter       *RollingDataWriter
+                       completedPartitions = &closedPartitionSet{}
+               )
+
+               closeCurrentWriter := func() error {
+                       if currentWriter == nil {
+                               return nil
+                       }
+                       w := currentWriter
+                       currentWriter = nil
+                       completedPartitions.add(currentRec)
+                       close(w.recordCh)
+                       w.wg.Wait()
+
+                       // stream's deferred close(errorCh) runs before its
+                       // deferred wg.Done, so by the time Wait returns the
+                       // channel is closed; this read never blocks and yields
+                       // either the buffered error or nil.
+                       return <-w.errorCh
+               }
+
+               sendErr := func(err error) {
+                       select {
+                       case errCh <- err:
+                       default:
+                       }
+               }
+
+               fail := func(err error) {
+                       sendErr(errors.Join(err, closeCurrentWriter()))
+               }
+
+               // Recover any panic so the consumer is not left blocking on
+               // errCh forever. Declared last so it runs first on goroutine
+               // exit, before the close(errCh) and close(outputCh) defers.
+               defer func() {
+                       if r := recover(); r != nil {
+                               fail(fmt.Errorf("clustered write panic: %v", r))
+                       }
+               }()
+
+               takeFn := partitionBatchByKey(ctx)
+
+               for rec, err := range records {
+                       if ctxErr := ctx.Err(); ctxErr != nil {
+                               fail(context.Cause(ctx))
+
+                               return
+                       }
+                       if err != nil {
+                               fail(err)
+
+                               return
+                       }

Review Comment:
   swapped



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] feat(table): clustered partitioned write path [iceberg-go]

Reply via email to