yjshen commented on a change in pull request #811:
URL: https://github.com/apache/arrow-datafusion/pull/811#discussion_r688298118
##########
File path: datafusion/src/datasource/mod.rs
##########
@@ -36,3 +47,231 @@ pub(crate) enum Source<R = Box<dyn std::io::Read + Send +
Sync + 'static>> {
/// Read data from a reader
Reader(std::sync::Mutex<Option<R>>),
}
+
+#[derive(Debug, Clone)]
+/// A single file that should be read, along with its schema, statistics
+/// and partition column values that need to be appended to each row.
+pub struct PartitionedFile {
+ /// Path for the file (e.g. URL, filesystem path, etc)
+ pub file_path: String,
+ /// Schema of the file
+ pub schema: Schema,
+ /// Statistics of the file
+ pub statistics: Statistics,
+ /// Values of partition columns to be appended to each row
+ pub partition_value: Option<Vec<ScalarValue>>,
+ /// Schema of partition columns
+ pub partition_schema: Option<Schema>,
+ // We may include row group range here for a more fine-grained parallel
execution
+}
+
+impl From<String> for PartitionedFile {
+ fn from(file_path: String) -> Self {
+ Self {
+ file_path,
+ schema: Schema::empty(),
+ statistics: Default::default(),
+ partition_value: None,
+ partition_schema: None,
+ }
+ }
+}
+
+impl std::fmt::Display for PartitionedFile {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ write!(f, "{}", self.file_path)
+ }
+}
+
+#[derive(Debug, Clone)]
+/// A collection of files that should be read in a single task
+pub struct FilePartition {
Review comment:
`FilePartition` is used as the unit of execution in Scan, a task is
responsible for reading one `FilePartition` each time, much like the
`ParquetPartition` but I intended to make it more generalized for all kinds of
scan.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]