Fokko commented on code in PR #1878:
URL: https://github.com/apache/iceberg-python/pull/1878#discussion_r2030233703
##########
pyiceberg/table/upsert_util.py:
##########
@@ -82,14 +82,43 @@ def get_rows_to_update(source_table: pa.Table,
target_table: pa.Table, join_cols
],
)
- return (
- source_table
- # We already know that the schema is compatible, this is to fix large_
types
- .cast(target_table.schema)
- .join(target_table, keys=list(join_cols_set), join_type="inner",
left_suffix="-lhs", right_suffix="-rhs")
- .filter(diff_expr)
- .drop_columns([f"{col}-rhs" for col in non_key_cols])
- .rename_columns({f"{col}-lhs" if col not in join_cols else col: col
for col in source_table.column_names})
- # Finally cast to the original schema since it doesn't carry
nullability:
- # https://github.com/apache/arrow/issues/45557
- ).cast(target_table.schema)
+ try:
+ return (
+ source_table
+ # We already know that the schema is compatible, this is to fix
large_ types
+ .cast(target_table.schema)
+ .join(target_table, keys=list(join_cols_set), join_type="inner",
left_suffix="-lhs", right_suffix="-rhs")
+ .filter(diff_expr)
+ .drop_columns([f"{col}-rhs" for col in non_key_cols])
+ .rename_columns({f"{col}-lhs" if col not in join_cols else col:
col for col in source_table.column_names})
+ # Finally cast to the original schema since it doesn't carry
nullability:
+ # https://github.com/apache/arrow/issues/45557
+ ).cast(target_table.schema)
+ except pa.ArrowInvalid:
+ # When we are not able to compare (e.g. due to unsupported types),
+ # fall back to selecting only rows in the source table that do NOT
already exist in the target.
+ # See: https://github.com/apache/arrow/issues/35785
+
+ MARKER_COLUMN_NAME = "__from_target"
+
+ assert MARKER_COLUMN_NAME not in join_cols_set
Review Comment:
We try to avoid `assert` outside of the tests. Could you raise a
`ValueError` instead?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]