This is an automated email from the ASF dual-hosted git repository.

ahmedabu98 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
     new efe4e941939 extend to yaml (#38371)
efe4e941939 is described below

commit efe4e941939a77275146ecceb01cd74b28555286
Author: Ahmed Abualsaud <[email protected]>
AuthorDate: Tue May 5 14:56:14 2026 -0400

    extend to yaml (#38371)
---
 sdks/python/apache_beam/yaml/yaml_io.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/sdks/python/apache_beam/yaml/yaml_io.py 
b/sdks/python/apache_beam/yaml/yaml_io.py
index 336e32adc25..92f8ec47cca 100644
--- a/sdks/python/apache_beam/yaml/yaml_io.py
+++ b/sdks/python/apache_beam/yaml/yaml_io.py
@@ -563,6 +563,7 @@ def write_to_iceberg(
     drop: Optional[Iterable[str]] = None,
     only: Optional[str] = None,
     distribution_mode: Optional[str] = None,
+    autosharding: Optional[bool] = None,
 ):
   # TODO(robertwb): It'd be nice to derive this list of parameters, along with
   # their types and docs, programmatically from the iceberg (or managed)
@@ -616,6 +617,11 @@ def write_to_iceberg(
       distributions:
       - none: don't shuffle rows (default)
       - hash: shuffle rows by partition key before writing data
+    autosharding: Enables dynamic sharding to automatically adjust the number
+      of parallel writers based on data volume. It handles data skew by
+      further sub-dividing partitions into multiple shards to prevent
+      bottlenecks during high-throughput writes. Only available with 'hash'
+      distribution mode.
   """
   return beam.managed.Write(
       "iceberg",
@@ -630,7 +636,8 @@ def write_to_iceberg(
           keep=keep,
           drop=drop,
           only=only,
-          distribution_mode=distribution_mode))
+          distribution_mode=distribution_mode,
+          autosharding=autosharding))
 
 
 def io_providers():

Reply via email to