This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 7bb96c5ec9 Improve `arrow-ipc` documentation (#6983)
7bb96c5ec9 is described below

commit 7bb96c5ec9f268412d863c22a3f09b28bc1ba7d6
Author: Andrew Lamb <[email protected]>
AuthorDate: Wed Jan 22 18:17:44 2025 -0500

    Improve `arrow-ipc` documentation (#6983)
    
    * Improve `arrow-ipc` documentation
    
    * Improve, reduce emphasis on Read/Write/Seek
    
    * Apply suggestions from code review
    
    Co-authored-by: Raphael Taylor-Davies 
<[email protected]>
    
    ---------
    
    Co-authored-by: Raphael Taylor-Davies 
<[email protected]>
---
 arrow-ipc/src/lib.rs    | 19 +++++++++++
 arrow-ipc/src/reader.rs | 87 +++++++++++++++++++++++++++++++++++++++++++++----
 arrow-ipc/src/writer.rs | 59 +++++++++++++++++++++++++++++----
 3 files changed, 153 insertions(+), 12 deletions(-)

diff --git a/arrow-ipc/src/lib.rs b/arrow-ipc/src/lib.rs
index a76083b939..4638abdb4e 100644
--- a/arrow-ipc/src/lib.rs
+++ b/arrow-ipc/src/lib.rs
@@ -17,7 +17,26 @@
 
 //! Support for the [Arrow IPC Format]
 //!
+//! The Arrow IPC format defines how to read and write [`RecordBatch`]es 
to/from
+//! a file or stream of bytes. This format can be used to serialize and 
deserialize
+//! data to files and over the network.
+//!
+//! There are two variants of the IPC format:
+//! 1. [IPC Streaming Format]: Supports streaming data sources, implemented by
+//!    [StreamReader] and [StreamWriter]
+//!
+//! 2. [IPC File Format]: Supports random access, implemented by [FileReader] 
and
+//!    [FileWriter].
+//!
+//! See the [`reader`] and [`writer`] modules for more information.
+//!
 //! [Arrow IPC Format]: 
https://arrow.apache.org/docs/format/Columnar.html#serialization-and-interprocess-communication-ipc
+//! [IPC Streaming Format]: 
https://arrow.apache.org/docs/format/Columnar.html#ipc-streaming-format
+//! [StreamReader]: reader::StreamReader
+//! [StreamWriter]: writer::StreamWriter
+//! [IPC File Format]: 
https://arrow.apache.org/docs/format/Columnar.html#ipc-file-format
+//! [FileReader]: reader::FileReader
+//! [FileWriter]: writer::FileWriter
 
 #![warn(missing_docs)]
 pub mod convert;
diff --git a/arrow-ipc/src/reader.rs b/arrow-ipc/src/reader.rs
index 4dcd56156e..b72785651b 100644
--- a/arrow-ipc/src/reader.rs
+++ b/arrow-ipc/src/reader.rs
@@ -17,8 +17,12 @@
 
 //! Arrow IPC File and Stream Readers
 //!
-//! The `FileReader` and `StreamReader` have similar interfaces,
-//! however the `FileReader` expects a reader that supports `Seek`ing
+//! # Notes
+//!
+//! The [`FileReader`] and [`StreamReader`] have similar interfaces,
+//! however the [`FileReader`] expects a reader that supports [`Seek`]ing
+//!
+//! [`Seek`]: std::io::Seek
 
 mod stream;
 
@@ -997,10 +1001,49 @@ impl FileReaderBuilder {
     }
 }
 
-/// Arrow File reader
+/// Arrow File Reader
+///
+/// Reads Arrow [`RecordBatch`]es from bytes in the [IPC File Format],
+/// providing random access to the record batches.
+///
+/// # See Also
+///
+/// * [`Self::set_index`] for random access
+/// * [`StreamReader`] for reading streaming data
+///
+/// # Example: Reading from a `File`
+/// ```
+/// # use std::io::Cursor;
+/// use arrow_array::record_batch;
+/// # use arrow_ipc::reader::FileReader;
+/// # use arrow_ipc::writer::FileWriter;
+/// # let batch = record_batch!(("a", Int32, [1, 2, 3])).unwrap();
+/// # let mut file = vec![]; // mimic a stream for the example
+/// # {
+/// #  let mut writer = FileWriter::try_new(&mut file, 
&batch.schema()).unwrap();
+/// #  writer.write(&batch).unwrap();
+/// #  writer.write(&batch).unwrap();
+/// #  writer.finish().unwrap();
+/// # }
+/// # let mut file = Cursor::new(&file);
+/// let projection = None; // read all columns
+/// let mut reader = FileReader::try_new(&mut file, projection).unwrap();
+/// // Position the reader to the second batch
+/// reader.set_index(1).unwrap();
+/// // read batches from the reader using the Iterator trait
+/// let mut num_rows = 0;
+/// for batch in reader {
+///    let batch = batch.unwrap();
+///    num_rows += batch.num_rows();
+/// }
+/// assert_eq!(num_rows, 3);
+/// ```
+/// # Example: Reading from `mmap`ed file
 ///
-/// For an example creating Arrays with memory mapped (`mmap`) files see the 
[`zero_copy_ipc`] example.
+/// For an example creating Arrays without copying using  memory mapped 
(`mmap`)
+/// files see the [`zero_copy_ipc`] example.
 ///
+/// [IPC Streaming Format]: 
https://arrow.apache.org/docs/format/Columnar.html#ipc-streaming-format
 /// [`zero_copy_ipc`]: 
https://github.com/apache/arrow-rs/blob/main/arrow/examples/zero_copy_ipc.rs
 pub struct FileReader<R> {
     /// File reader that supports reading and seeking
@@ -1078,7 +1121,7 @@ impl<R: Read + Seek> FileReader<R> {
         self.decoder.schema.clone()
     }
 
-    /// Read a specific record batch
+    /// See to a specific [`RecordBatch`]
     ///
     /// Sets the current block to the index, allowing random reads
     pub fn set_index(&mut self, index: usize) -> Result<(), ArrowError> {
@@ -1136,7 +1179,39 @@ impl<R: Read + Seek> RecordBatchReader for FileReader<R> 
{
     }
 }
 
-/// Arrow Stream reader
+/// Arrow Stream Reader
+///
+/// Reads Arrow [`RecordBatch`]es from bytes in the [IPC Streaming Format].
+///
+/// # See Also
+///
+/// * [`FileReader`] for random access.
+///
+/// # Example
+/// ```
+/// # use arrow_array::record_batch;
+/// # use arrow_ipc::reader::StreamReader;
+/// # use arrow_ipc::writer::StreamWriter;
+/// # let batch = record_batch!(("a", Int32, [1, 2, 3])).unwrap();
+/// # let mut stream = vec![]; // mimic a stream for the example
+/// # {
+/// #  let mut writer = StreamWriter::try_new(&mut stream, 
&batch.schema()).unwrap();
+/// #  writer.write(&batch).unwrap();
+/// #  writer.finish().unwrap();
+/// # }
+/// # let stream = stream.as_slice();
+/// let projection = None; // read all columns
+/// let mut reader = StreamReader::try_new(stream, projection).unwrap();
+/// // read batches from the reader using the Iterator trait
+/// let mut num_rows = 0;
+/// for batch in reader {
+///    let batch = batch.unwrap();
+///    num_rows += batch.num_rows();
+/// }
+/// assert_eq!(num_rows, 3);
+/// ```
+///
+/// [IPC Streaming Format]: 
https://arrow.apache.org/docs/format/Columnar.html#ipc-streaming-format
 pub struct StreamReader<R> {
     /// Stream reader
     reader: R,
diff --git a/arrow-ipc/src/writer.rs b/arrow-ipc/src/writer.rs
index ee5b9a54cc..1581df56de 100644
--- a/arrow-ipc/src/writer.rs
+++ b/arrow-ipc/src/writer.rs
@@ -17,8 +17,12 @@
 
 //! Arrow IPC File and Stream Writers
 //!
-//! The `FileWriter` and `StreamWriter` have similar interfaces,
-//! however the `FileWriter` expects a reader that supports `Seek`ing
+//! # Notes
+//!
+//! [`FileWriter`] and [`StreamWriter`] have similar interfaces,
+//! however the [`FileWriter`] expects a reader that supports [`Seek`]ing
+//!
+//! [`Seek`]: std::io::Seek
 
 use std::cmp::min;
 use std::collections::HashMap;
@@ -188,7 +192,7 @@ impl Default for IpcWriteOptions {
 /// Handles low level details of encoding [`Array`] and [`Schema`] into the
 /// [Arrow IPC Format].
 ///
-/// # Example:
+/// # Example
 /// ```
 /// # fn run() {
 /// # use std::sync::Arc;
@@ -905,7 +909,28 @@ impl DictionaryTracker {
     }
 }
 
-/// Writer for an IPC file
+/// Arrow File Writer
+///
+/// Writes Arrow [`RecordBatch`]es in the [IPC File Format].
+///
+/// # See Also
+///
+/// * [`StreamWriter`] for writing IPC Streams
+///
+/// # Example
+/// ```
+/// # use arrow_array::record_batch;
+/// # use arrow_ipc::writer::StreamWriter;
+/// # let mut file = vec![]; // mimic a file for the example
+/// let batch = record_batch!(("a", Int32, [1, 2, 3])).unwrap();
+/// // create a new writer, the schema must be known in advance
+/// let mut writer = StreamWriter::try_new(&mut file, 
&batch.schema()).unwrap();
+/// // write each batch to the underlying writer
+/// writer.write(&batch).unwrap();
+/// // When all batches are written, call finish to flush all buffers
+/// writer.finish().unwrap();
+/// ```
+/// [IPC File Format]: 
https://arrow.apache.org/docs/format/Columnar.html#ipc-file-format
 pub struct FileWriter<W> {
     /// The object to write to
     writer: W,
@@ -1108,7 +1133,7 @@ impl<W: Write> FileWriter<W> {
         Ok(())
     }
 
-    /// Unwraps the the underlying writer.
+    /// Unwraps the underlying writer.
     ///
     /// The writer is flushed and the FileWriter is finished before returning.
     ///
@@ -1135,7 +1160,29 @@ impl<W: Write> RecordBatchWriter for FileWriter<W> {
     }
 }
 
-/// Writer for an IPC stream
+/// Arrow Stream Writer
+///
+/// Writes Arrow [`RecordBatch`]es to bytes using the [IPC Streaming Format].
+///
+/// # See Also
+///
+/// * [`FileWriter`] for writing IPC Files
+///
+/// # Example
+/// ```
+/// # use arrow_array::record_batch;
+/// # use arrow_ipc::writer::StreamWriter;
+/// # let mut stream = vec![]; // mimic a stream for the example
+/// let batch = record_batch!(("a", Int32, [1, 2, 3])).unwrap();
+/// // create a new writer, the schema must be known in advance
+/// let mut writer = StreamWriter::try_new(&mut stream, 
&batch.schema()).unwrap();
+/// // write each batch to the underlying stream
+/// writer.write(&batch).unwrap();
+/// // When all batches are written, call finish to flush all buffers
+/// writer.finish().unwrap();
+/// ```
+///
+/// [IPC Streaming Format]: 
https://arrow.apache.org/docs/format/Columnar.html#ipc-streaming-format
 pub struct StreamWriter<W> {
     /// The object to write to
     writer: W,

Reply via email to