Fokko commented on code in PR #471:
URL: https://github.com/apache/iceberg-python/pull/471#discussion_r1507463357
##########
pyiceberg/table/__init__.py:
##########
@@ -219,68 +220,41 @@ def property_as_int(properties: Dict[str, str],
property_name: str, default: Opt
class Transaction:
_table: Table
+ _table_metadata: TableMetadata
+ _autocommit: bool
_updates: Tuple[TableUpdate, ...]
_requirements: Tuple[TableRequirement, ...]
- def __init__(
- self,
- table: Table,
- actions: Optional[Tuple[TableUpdate, ...]] = None,
- requirements: Optional[Tuple[TableRequirement, ...]] = None,
- ):
+ def __init__(self, table: Table, autocommit: bool = False):
self._table = table
- self._updates = actions or ()
- self._requirements = requirements or ()
+ self._table_metadata = table.metadata
+ self._autocommit = autocommit
+ self._updates = ()
+ self._requirements = ()
def __enter__(self) -> Transaction:
"""Start a transaction to update the table."""
return self
def __exit__(self, _: Any, value: Any, traceback: Any) -> None:
"""Close and commit the transaction."""
- fresh_table = self.commit_transaction()
- # Update the new data in place
- self._table.metadata = fresh_table.metadata
- self._table.metadata_location = fresh_table.metadata_location
-
- def _append_updates(self, *new_updates: TableUpdate) -> Transaction:
- """Append updates to the set of staged updates.
-
- Args:
- *new_updates: Any new updates.
+ self.commit_transaction()
- Raises:
- ValueError: When the type of update is not unique.
+ def _apply(self, updates: Tuple[TableUpdate, ...], requirements:
Tuple[TableRequirement, ...] = ()) -> Transaction:
+ """Check if the requirements are met, and applies the updates to the
metadata."""
+ for requirement in requirements:
+ requirement.validate(self._table_metadata)
- Returns:
- Transaction object with the new updates appended.
- """
- for new_update in new_updates:
- # explicitly get type of new_update as new_update is an
instantiated class
- type_new_update = type(new_update)
- if any(isinstance(update, type_new_update) for update in
self._updates):
- raise ValueError(f"Updates in a single commit need to be
unique, duplicate: {type_new_update}")
- self._updates = self._updates + new_updates
- return self
+ self._updates += updates
+ self._requirements += requirements
- def _append_requirements(self, *new_requirements: TableRequirement) ->
Transaction:
- """Append requirements to the set of staged requirements.
+ self._table_metadata = update_table_metadata(self._table_metadata,
updates)
Review Comment:
The problem is that in the `Transaction`, we call the `_commit` method on
the table, and we don't carry through the metadata there. For the REST catalog,
this is not relevant, for the catalogs where you have to construct the metadata
yourself, it is. I like these additional checks, but I'm unsure if we want to
change the API to pass this through.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]