jecsand838 commented on code in PR #8006:
URL: https://github.com/apache/arrow-rs/pull/8006#discussion_r2238326885


##########
arrow-avro/src/reader/mod.rs:
##########
@@ -216,73 +440,227 @@ impl ReaderBuilder {
     /// - `batch_size` = 1024
     /// - `strict_mode` = false
     /// - `utf8_view` = false
-    /// - `schema` = None
+    /// - `reader_schema` = None
+    /// - `writer_schema_store` = None
+    /// - `active_fp` = None
+    /// - `static_store_mode` = false
     pub fn new() -> Self {
         Self::default()
     }
 
-    fn make_record_decoder(&self, schema: &AvroSchema<'_>) -> 
Result<RecordDecoder, ArrowError> {
-        let root_field = AvroFieldBuilder::new(schema)
-            .with_utf8view(self.utf8_view)
-            .with_strict_mode(self.strict_mode)
-            .build()?;
-        RecordDecoder::try_new_with_options(root_field.data_type(), 
self.utf8_view)
+    /// Sets the maximum number of rows to include in each `RecordBatch`.
+    ///
+    /// Defaults to `1024`.
+    pub fn with_batch_size(mut self, n: usize) -> Self {
+        self.batch_size = n;
+        self
     }
 
-    fn build_impl<R: BufRead>(self, reader: &mut R) -> Result<(Header, 
Decoder), ArrowError> {
-        let header = read_header(reader)?;
-        let record_decoder = if let Some(schema) = &self.schema {
-            self.make_record_decoder(schema)?
-        } else {
-            let avro_schema: Option<AvroSchema<'_>> = header
-                .schema()
-                .map_err(|e| ArrowError::ExternalError(Box::new(e)))?;
-            let avro_schema = avro_schema.ok_or_else(|| {
-                ArrowError::ParseError("No Avro schema present in file 
header".to_string())
-            })?;
-            self.make_record_decoder(&avro_schema)?
-        };
-        let decoder = Decoder::new(record_decoder, self.batch_size);
-        Ok((header, decoder))
+    /// Configures the reader to decode string data into `StringViewArray`.
+    ///
+    /// When enabled, string data is decoded into `StringViewArray` instead of 
`StringArray`.
+    /// This can improve performance for strings that are frequently accessed.
+    ///
+    /// Defaults to `false`.
+    pub fn with_utf8_view(mut self, enabled: bool) -> Self {
+        self.utf8_view = enabled;
+        self
+    }
+
+    /// Get whether StringViewArray is enabled for string data
+    pub fn use_utf8view(&self) -> bool {
+        self.utf8_view
+    }
+
+    /// Enables or disables strict schema resolution mode.
+    ///
+    /// When enabled (`true`), an error is returned if a field in the writer's 
schema
+    /// cannot be resolved to a field in the reader's schema. When disabled 
(`false`),
+    /// any unresolvable fields are simply skipped during decoding.
+    ///
+    /// Defaults to `false`.
+    pub fn with_strict_mode(mut self, enabled: bool) -> Self {
+        self.strict_mode = enabled;
+        self
     }
 
-    /// Sets the row-based batch size
-    pub fn with_batch_size(mut self, batch_size: usize) -> Self {
-        self.batch_size = batch_size;
+    /// Sets the reader's Avro schema, which the decoded data will be 
projected into.
+    ///
+    /// If a reader schema is provided, the decoder will perform schema 
resolution,
+    /// converting data from the writer's schema (read from the file or schema 
store)
+    /// to the specified reader schema. If not set, the writer's schema is 
used.
+    ///
+    /// Defaults to `None`.
+    pub fn with_reader_schema(mut self, s: AvroSchema<'static>) -> Self {
+        self.reader_schema = Some(s);
         self
     }
 
-    /// Set whether to use StringViewArray for string data
+    /// Sets the `SchemaStore` used for resolving writer schemas.
     ///
-    /// When enabled, string data from Avro files will be loaded into
-    /// Arrow's StringViewArray instead of the standard StringArray.
-    pub fn with_utf8_view(mut self, utf8_view: bool) -> Self {

Review Comment:
   Tried to clean up the diff with my latest pushes. Let me know if that's 
better and easier to follow.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to