geruh commented on code in PR #2864:
URL: https://github.com/apache/iceberg-python/pull/2864#discussion_r2680641676
##########
pyiceberg/table/__init__.py:
##########
@@ -1798,6 +1811,74 @@ def __init__(
self.delete_files = delete_files or set()
self.residual = residual
+ @staticmethod
+ def from_rest_response(
+ rest_task: RESTFileScanTask,
+ delete_files: list[RESTDeleteFile],
+ ) -> FileScanTask:
+ """Convert a RESTFileScanTask to a FileScanTask.
+
+ Args:
+ rest_task: The REST file scan task.
+ delete_files: The list of delete files from the ScanTasks response.
+
+ Returns:
+ A FileScanTask with the converted data and delete files.
+
+ Raises:
+ NotImplementedError: If equality delete files are encountered.
+ """
+ from pyiceberg.catalog.rest.scan_planning import RESTEqualityDeleteFile
+
+ data_file = _rest_file_to_data_file(rest_task.data_file)
+
+ resolved_deletes: set[DataFile] = set()
+ if rest_task.delete_file_references:
+ for idx in rest_task.delete_file_references:
+ delete_file = delete_files[idx]
+ if isinstance(delete_file, RESTEqualityDeleteFile):
+ raise NotImplementedError(f"PyIceberg does not yet support
equality deletes: {delete_file.file_path}")
+ resolved_deletes.add(_rest_file_to_data_file(delete_file))
+
+ return FileScanTask(
+ data_file=data_file,
+ delete_files=resolved_deletes,
+ residual=rest_task.residual_filter if rest_task.residual_filter
else ALWAYS_TRUE,
Review Comment:
Thanks for review @singhpk234! The residual filters from REST are not bound
in the normal sense. Currently the residual is only used for the optimize check
in `count()`.
The actual row filtering still uses the full row_filter, not the residual.
This works correctly but is slightly inefficient.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]