mbrobbel commented on code in PR #8293:
URL: https://github.com/apache/arrow-rs/pull/8293#discussion_r2349987168
##########
arrow-avro/src/reader/record.rs:
##########
@@ -976,6 +1172,127 @@ fn sign_cast_to<const N: usize>(raw: &[u8]) ->
Result<[u8; N], ArrowError> {
Ok(out)
}
+#[derive(Debug)]
+struct Projector {
+ writer_to_reader: Arc<[Option<usize>]>,
+ skip_decoders: Vec<Option<Skipper>>,
+ field_defaults: Vec<Option<AvroLiteral>>,
+ default_injections: Arc<[(usize, AvroLiteral)]>,
+}
+
+#[derive(Debug)]
+struct ProjectorBuilder<'a> {
+ rec: &'a ResolvedRecord,
+ reader_fields: Option<Arc<[AvroField]>>,
+}
+
+impl<'a> ProjectorBuilder<'a> {
+ #[inline]
+ fn try_new(rec: &'a ResolvedRecord) -> Self {
+ Self {
+ rec,
+ reader_fields: None,
+ }
+ }
+
+ #[inline]
+ fn with_reader_fields(mut self, reader_fields: &Arc<[AvroField]>) -> Self {
+ self.reader_fields = Some(reader_fields.clone());
+ self
+ }
+
+ #[inline]
+ fn build(self) -> Result<Projector, ArrowError> {
+ let reader_fields = self.reader_fields.ok_or_else(|| {
+ ArrowError::InvalidArgumentError(
+ "ProjectorBuilder requires reader_fields to be
provided".to_string(),
+ )
+ })?;
+ let mut field_defaults: Vec<Option<AvroLiteral>> =
Vec::with_capacity(reader_fields.len());
+ for avro_field in reader_fields.iter() {
+ if let Some(ResolutionInfo::DefaultValue(lit)) =
+ avro_field.data_type().resolution.as_ref()
+ {
+ field_defaults.push(Some(lit.clone()));
+ } else {
+ field_defaults.push(None);
+ }
+ }
+ let mut default_injections: Vec<(usize, AvroLiteral)> =
+ Vec::with_capacity(self.rec.default_fields.len());
+ for &idx in self.rec.default_fields.iter() {
+ let lit = field_defaults
+ .get(idx)
+ .and_then(|lit| lit.clone())
+ .unwrap_or(AvroLiteral::Null);
+ default_injections.push((idx, lit));
+ }
+ let mut skip_decoders: Vec<Option<Skipper>> =
+ Vec::with_capacity(self.rec.skip_fields.len());
+ for datatype in self.rec.skip_fields.iter() {
+ let skipper = match datatype {
+ Some(datatype) => Some(Skipper::from_avro(datatype)?),
+ None => None,
+ };
+ skip_decoders.push(skipper);
+ }
+ Ok(Projector {
+ writer_to_reader: self.rec.writer_to_reader.clone(),
+ skip_decoders,
+ field_defaults,
+ default_injections: default_injections.into(),
+ })
+ }
+}
+
+impl Projector {
+ #[inline]
+ fn project_default(&self, decoder: &mut Decoder, index: usize) ->
Result<(), ArrowError> {
+ // SAFETY: `index` is obtained by listing the reader's record fields
(i.e., from
+ // `decoders.iter_mut().enumerate()`), and `field_defaults` was built
in
+ // `ProjectorBuilder::build` to have exactly one element per reader
field.
+ // Therefore, `index < self.field_defaults.len()` always holds here, so
+ // `self.field_defaults[index]` cannot panic. We only take an
immutable reference
+ // via `.as_ref()`, and `self` is borrowed immutably.
+ if let Some(default_literal) = self.field_defaults[index].as_ref() {
+ decoder.append_default(default_literal)
+ } else {
+ decoder.append_null();
+ Ok(())
+ }
+ }
+
+ #[inline]
+ fn project_record(
+ &mut self,
+ buf: &mut AvroCursor<'_>,
+ encodings: &mut [Decoder],
+ ) -> Result<(), ArrowError> {
+ let n_writer = self.writer_to_reader.len();
+ let n_injections = self.default_injections.len();
+ for index in 0..(n_writer + n_injections) {
+ if index < n_writer {
+ match (
+ self.writer_to_reader[index],
+ self.skip_decoders[index].as_mut(),
+ ) {
Review Comment:
Maybe you can use
https://doc.rust-lang.org/stable/std/iter/trait.Iterator.html#method.zip?
##########
arrow-avro/src/reader/record.rs:
##########
@@ -976,6 +1172,127 @@ fn sign_cast_to<const N: usize>(raw: &[u8]) ->
Result<[u8; N], ArrowError> {
Ok(out)
}
+#[derive(Debug)]
+struct Projector {
+ writer_to_reader: Arc<[Option<usize>]>,
+ skip_decoders: Vec<Option<Skipper>>,
+ field_defaults: Vec<Option<AvroLiteral>>,
+ default_injections: Arc<[(usize, AvroLiteral)]>,
+}
+
+#[derive(Debug)]
+struct ProjectorBuilder<'a> {
+ rec: &'a ResolvedRecord,
+ reader_fields: Option<Arc<[AvroField]>>,
+}
+
+impl<'a> ProjectorBuilder<'a> {
+ #[inline]
+ fn try_new(rec: &'a ResolvedRecord) -> Self {
+ Self {
+ rec,
+ reader_fields: None,
+ }
+ }
+
+ #[inline]
+ fn with_reader_fields(mut self, reader_fields: &Arc<[AvroField]>) -> Self {
+ self.reader_fields = Some(reader_fields.clone());
+ self
+ }
+
+ #[inline]
+ fn build(self) -> Result<Projector, ArrowError> {
+ let reader_fields = self.reader_fields.ok_or_else(|| {
+ ArrowError::InvalidArgumentError(
+ "ProjectorBuilder requires reader_fields to be
provided".to_string(),
+ )
+ })?;
Review Comment:
Maybe the `reader_fields` shouldn't be an `Option` and just an arg of
`try_new`?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]