rdblue commented on code in PR #41: URL: https://github.com/apache/iceberg-python/pull/41#discussion_r1429245410
########## pyiceberg/table/__init__.py: ########## @@ -1904,3 +2001,144 @@ def _generate_snapshot_id() -> int: snapshot_id = snapshot_id if snapshot_id >= 0 else snapshot_id * -1 return snapshot_id + + +@dataclass(frozen=True) +class WriteTask: + df: pa.Table + # Later to be extended with partition information + + +def _generate_datafile_filename(extension: str) -> str: + # Mimics the behavior in the Java API: + # https://github.com/apache/iceberg/blob/a582968975dd30ff4917fbbe999f1be903efac02/core/src/main/java/org/apache/iceberg/io/OutputFileFactory.java#L92-L101 + return f"00000-0-{uuid.uuid4()}-0.{extension}" + + +def _new_manifest_path(location: str, num: int = 0) -> str: + return f'{location}/metadata/{uuid.uuid4()}-m{num}.avro' + + +def _generate_manifest_list_filename(snapshot_id: int, attempt: int = 0) -> str: + # Mimics the behavior in Java: + # https://github.com/apache/iceberg/blob/c862b9177af8e2d83122220764a056f3b96fd00c/core/src/main/java/org/apache/iceberg/SnapshotProducer.java#L491 + return f"snap-{snapshot_id}-{attempt}-{uuid.uuid4()}.avro" Review Comment: I think that we want the UUIDs to be the ID of the commit so that we can associated files that are in the same commit, rather than generating a UUID for every file. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org