Re: [PR] Partitioned Append on Identity Transform [iceberg-python]

via GitHub Fri, 29 Mar 2024 11:29:13 -0700


jqin61 commented on code in PR #555:
URL: https://github.com/apache/iceberg-python/pull/555#discussion_r1544754857



##########
pyiceberg/table/__init__.py:
##########
@@ -2526,25 +2537,44 @@ def _dataframe_to_data_files(
     """
     from pyiceberg.io.pyarrow import bin_pack_arrow_table, write_file
 
-    if len([spec for spec in table_metadata.partition_specs if spec.spec_id != 
0]) > 0:
-        raise ValueError("Cannot write to partitioned tables")
-
     counter = itertools.count(0)
     write_uuid = write_uuid or uuid.uuid4()
-
     target_file_size = PropertyUtil.property_as_int(
         properties=table_metadata.properties,
         property_name=TableProperties.WRITE_TARGET_FILE_SIZE_BYTES,
         default=TableProperties.WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT,
     )
+    if target_file_size is None:
+        raise ValueError(
+            "Fail to get neither TableProperties.WRITE_TARGET_FILE_SIZE_BYTES 
nor WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT for writing target data file."
+        )
 
-    # This is an iter, so we don't have to materialize everything every time
-    # This will be more relevant when we start doing partitioned writes
-    yield from write_file(
-        io=io,
-        table_metadata=table_metadata,
-        tasks=iter([WriteTask(write_uuid, next(counter), batches) for batches 
in bin_pack_arrow_table(df, target_file_size)]),  # type: ignore
-    )
+    if any(len(spec.fields) > 0 for spec in table_metadata.partition_specs):

Review Comment:
   yes recent makes more sense.



##########
pyiceberg/table/__init__.py:
##########
@@ -2526,25 +2537,44 @@ def _dataframe_to_data_files(
     """
     from pyiceberg.io.pyarrow import bin_pack_arrow_table, write_file
 
-    if len([spec for spec in table_metadata.partition_specs if spec.spec_id != 
0]) > 0:
-        raise ValueError("Cannot write to partitioned tables")
-
     counter = itertools.count(0)
     write_uuid = write_uuid or uuid.uuid4()
-
     target_file_size = PropertyUtil.property_as_int(
         properties=table_metadata.properties,
         property_name=TableProperties.WRITE_TARGET_FILE_SIZE_BYTES,
         default=TableProperties.WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT,
     )
+    if target_file_size is None:
+        raise ValueError(
+            "Fail to get neither TableProperties.WRITE_TARGET_FILE_SIZE_BYTES 
nor WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT for writing target data file."
+        )
 
-    # This is an iter, so we don't have to materialize everything every time
-    # This will be more relevant when we start doing partitioned writes
-    yield from write_file(
-        io=io,
-        table_metadata=table_metadata,
-        tasks=iter([WriteTask(write_uuid, next(counter), batches) for batches 
in bin_pack_arrow_table(df, target_file_size)]),  # type: ignore
-    )
+    if any(len(spec.fields) > 0 for spec in table_metadata.partition_specs):

Review Comment:
   yes recent makes more sense. fixed in the incoming commit.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Re: [PR] Partitioned Append on Identity Transform [iceberg-python]

Reply via email to