wgtmac commented on code in PR #489:
URL: https://github.com/apache/iceberg-cpp/pull/489#discussion_r2664070373


##########
src/iceberg/table_scan.h:
##########
@@ -50,175 +63,293 @@ class ICEBERG_EXPORT FileScanTask : public ScanTask {
   ///
   /// \param data_file The data file to read.
   /// \param delete_files Delete files that apply to this data file.
-  /// \param residual_filter Optional residual filter to apply after reading.
+  /// \param filter Optional residual filter to apply after reading.
   explicit FileScanTask(std::shared_ptr<DataFile> data_file,
                         std::vector<std::shared_ptr<DataFile>> delete_files = 
{},
-                        std::shared_ptr<Expression> residual_filter = nullptr);
+                        std::shared_ptr<Expression> filter = nullptr);
 
   /// \brief The data file that should be read by this scan task.
-  const std::shared_ptr<DataFile>& data_file() const;
+  const std::shared_ptr<DataFile>& data_file() const { return data_file_; }
 
   /// \brief Delete files that apply to this data file.
-  const std::vector<std::shared_ptr<DataFile>>& delete_files() const;
+  const std::vector<std::shared_ptr<DataFile>>& delete_files() const {
+    return delete_files_;
+  }
 
   /// \brief Residual filter to apply after reading.
-  const std::shared_ptr<Expression>& residual_filter() const;
-
-  /// \brief Check if any deletes need to be applied.
-  bool has_deletes() const;
-
-  /// \brief Check if a residual filter needs to be applied.
-  bool has_residual_filter() const;
+  const std::shared_ptr<Expression>& residual_filter() const { return 
residual_filter_; }
 
+  Kind kind() const override { return Kind::kFileScanTask; }
   int64_t size_bytes() const override;
   int32_t files_count() const override;
   int64_t estimated_row_count() const override;
 
-  /**
-   * \brief Returns a C-ABI compatible ArrowArrayStream to read the data for 
this task.
-   *
-   * \param io The FileIO instance for accessing the file data.
-   * \param projected_schema The projected schema for reading the data.
-   * \param filter Optional filter expression to apply during reading.
-   * \return A Result containing an ArrowArrayStream, or an error on failure.
-   */
+  /// TODO(gangwu): move it to iceberg/data/task_scanner.h
+  ///
+  /// \brief Returns a C-ABI compatible ArrowArrayStream to read the data for 
this task.
+  ///
+  /// \param io The FileIO instance for accessing the file data.
+  /// \param projected_schema The projected schema for reading the data.
+  /// \return A Result containing an ArrowArrayStream, or an error on failure.
   Result<ArrowArrayStream> ToArrow(const std::shared_ptr<FileIO>& io,
-                                   const std::shared_ptr<Schema>& 
projected_schema,
-                                   const std::shared_ptr<Expression>& filter) 
const;
+                                   std::shared_ptr<Schema> projected_schema) 
const;
 
  private:
-  /// \brief Data file metadata.
   std::shared_ptr<DataFile> data_file_;
-  /// \brief Delete files that apply to this data file.
   std::vector<std::shared_ptr<DataFile>> delete_files_;
-  /// \brief Residual filter to apply after reading.
   std::shared_ptr<Expression> residual_filter_;
 };
 
-/// \brief Scan context holding snapshot and scan-specific metadata.
+namespace internal {
+
+// Internal table scan context used by different scan implementations.
 struct TableScanContext {
-  /// \brief Table metadata.
-  std::shared_ptr<TableMetadata> table_metadata;
-  /// \brief Snapshot to scan.
-  std::shared_ptr<Snapshot> snapshot;
-  /// \brief Projected schema.
-  std::shared_ptr<Schema> projected_schema;
-  /// \brief Filter expression to apply.
+  std::optional<int64_t> snapshot_id;
   std::shared_ptr<Expression> filter;
-  /// \brief Whether the scan is case-sensitive.
-  bool case_sensitive = false;
-  /// \brief Additional options for the scan.
+  bool ignore_residuals{false};
+  bool case_sensitive{true};
+  bool return_column_stats{false};
+  std::unordered_set<int32_t> columns_to_keep_stats;
+  std::vector<std::string> selected_columns;
+  std::shared_ptr<Schema> projected_schema;
   std::unordered_map<std::string, std::string> options;
-  /// \brief Optional limit on the number of rows to scan.
-  std::optional<int64_t> limit;
+  bool from_snapshot_id_inclusive{false};
+  std::optional<int64_t> from_snapshot_id;
+  std::optional<int64_t> to_snapshot_id;

Review Comment:
   I think that may be a little bit complicated and `from/to` do not need to 
appear at the same time.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to