diff --git a/arrow-ipc/src/lib.rs b/arrow-ipc/src/lib.rs index a76083b9395..4638abdb4ed 100644 --- a/arrow-ipc/src/lib.rs +++ b/arrow-ipc/src/lib.rs @@ -17,7 +17,26 @@ //! Support for the [Arrow IPC Format] //! +//! The Arrow IPC format defines how to read and write [`RecordBatch`]es to/from +//! a file or stream of bytes. This format can be used to serialize and deserialize +//! data to files and over the network. +//! +//! There are two variants of the IPC format: +//! 1. [IPC Streaming Format]: Supports streaming data sources, implemented by +//! [StreamReader] and [StreamWriter] +//! +//! 2. [IPC File Format]: Supports random access, implemented by [FileReader] and +//! [FileWriter]. +//! +//! See the [`reader`] and [`writer`] modules for more information. +//! //! [Arrow IPC Format]: https://arrow.apache.org/docs/format/Columnar.html#serialization-and-interprocess-communication-ipc +//! [IPC Streaming Format]: https://arrow.apache.org/docs/format/Columnar.html#ipc-streaming-format +//! [StreamReader]: reader::StreamReader +//! [StreamWriter]: writer::StreamWriter +//! [IPC File Format]: https://arrow.apache.org/docs/format/Columnar.html#ipc-file-format +//! [FileReader]: reader::FileReader +//! [FileWriter]: writer::FileWriter #![warn(missing_docs)] pub mod convert; diff --git a/arrow-ipc/src/reader.rs b/arrow-ipc/src/reader.rs index 4dcd56156ee..b72785651b1 100644 --- a/arrow-ipc/src/reader.rs +++ b/arrow-ipc/src/reader.rs @@ -17,8 +17,12 @@ //! Arrow IPC File and Stream Readers //! -//! The `FileReader` and `StreamReader` have similar interfaces, -//! however the `FileReader` expects a reader that supports `Seek`ing +//! # Notes +//! +//! The [`FileReader`] and [`StreamReader`] have similar interfaces, +//! however the [`FileReader`] expects a reader that supports [`Seek`]ing +//! +//! [`Seek`]: std::io::Seek mod stream; @@ -997,10 +1001,49 @@ impl FileReaderBuilder { } } -/// Arrow File reader +/// Arrow File Reader +/// +/// Reads Arrow [`RecordBatch`]es from bytes in the [IPC File Format], +/// providing random access to the record batches. +/// +/// # See Also +/// +/// * [`Self::set_index`] for random access +/// * [`StreamReader`] for reading streaming data +/// +/// # Example: Reading from a `File` +/// ``` +/// # use std::io::Cursor; +/// use arrow_array::record_batch; +/// # use arrow_ipc::reader::FileReader; +/// # use arrow_ipc::writer::FileWriter; +/// # let batch = record_batch!(("a", Int32, [1, 2, 3])).unwrap(); +/// # let mut file = vec![]; // mimic a stream for the example +/// # { +/// # let mut writer = FileWriter::try_new(&mut file, &batch.schema()).unwrap(); +/// # writer.write(&batch).unwrap(); +/// # writer.write(&batch).unwrap(); +/// # writer.finish().unwrap(); +/// # } +/// # let mut file = Cursor::new(&file); +/// let projection = None; // read all columns +/// let mut reader = FileReader::try_new(&mut file, projection).unwrap(); +/// // Position the reader to the second batch +/// reader.set_index(1).unwrap(); +/// // read batches from the reader using the Iterator trait +/// let mut num_rows = 0; +/// for batch in reader { +/// let batch = batch.unwrap(); +/// num_rows += batch.num_rows(); +/// } +/// assert_eq!(num_rows, 3); +/// ``` +/// # Example: Reading from `mmap`ed file /// -/// For an example creating Arrays with memory mapped (`mmap`) files see the [`zero_copy_ipc`] example. +/// For an example creating Arrays without copying using memory mapped (`mmap`) +/// files see the [`zero_copy_ipc`] example. /// +/// [IPC Streaming Format]: https://arrow.apache.org/docs/format/Columnar.html#ipc-streaming-format /// [`zero_copy_ipc`]: https://github.com/apache/arrow-rs/blob/main/arrow/examples/zero_copy_ipc.rs pub struct FileReader { /// File reader that supports reading and seeking @@ -1078,7 +1121,7 @@ impl FileReader { self.decoder.schema.clone() } - /// Read a specific record batch + /// See to a specific [`RecordBatch`] /// /// Sets the current block to the index, allowing random reads pub fn set_index(&mut self, index: usize) -> Result<(), ArrowError> { @@ -1136,7 +1179,39 @@ impl RecordBatchReader for FileReader { } } -/// Arrow Stream reader +/// Arrow Stream Reader +/// +/// Reads Arrow [`RecordBatch`]es from bytes in the [IPC Streaming Format]. +/// +/// # See Also +/// +/// * [`FileReader`] for random access. +/// +/// # Example +/// ``` +/// # use arrow_array::record_batch; +/// # use arrow_ipc::reader::StreamReader; +/// # use arrow_ipc::writer::StreamWriter; +/// # let batch = record_batch!(("a", Int32, [1, 2, 3])).unwrap(); +/// # let mut stream = vec![]; // mimic a stream for the example +/// # { +/// # let mut writer = StreamWriter::try_new(&mut stream, &batch.schema()).unwrap(); +/// # writer.write(&batch).unwrap(); +/// # writer.finish().unwrap(); +/// # } +/// # let stream = stream.as_slice(); +/// let projection = None; // read all columns +/// let mut reader = StreamReader::try_new(stream, projection).unwrap(); +/// // read batches from the reader using the Iterator trait +/// let mut num_rows = 0; +/// for batch in reader { +/// let batch = batch.unwrap(); +/// num_rows += batch.num_rows(); +/// } +/// assert_eq!(num_rows, 3); +/// ``` +/// +/// [IPC Streaming Format]: https://arrow.apache.org/docs/format/Columnar.html#ipc-streaming-format pub struct StreamReader { /// Stream reader reader: R, diff --git a/arrow-ipc/src/writer.rs b/arrow-ipc/src/writer.rs index ee5b9a54cc9..1581df56dee 100644 --- a/arrow-ipc/src/writer.rs +++ b/arrow-ipc/src/writer.rs @@ -17,8 +17,12 @@ //! Arrow IPC File and Stream Writers //! -//! The `FileWriter` and `StreamWriter` have similar interfaces, -//! however the `FileWriter` expects a reader that supports `Seek`ing +//! # Notes +//! +//! [`FileWriter`] and [`StreamWriter`] have similar interfaces, +//! however the [`FileWriter`] expects a reader that supports [`Seek`]ing +//! +//! [`Seek`]: std::io::Seek use std::cmp::min; use std::collections::HashMap; @@ -188,7 +192,7 @@ impl Default for IpcWriteOptions { /// Handles low level details of encoding [`Array`] and [`Schema`] into the /// [Arrow IPC Format]. /// -/// # Example: +/// # Example /// ``` /// # fn run() { /// # use std::sync::Arc; @@ -905,7 +909,28 @@ impl DictionaryTracker { } } -/// Writer for an IPC file +/// Arrow File Writer +/// +/// Writes Arrow [`RecordBatch`]es in the [IPC File Format]. +/// +/// # See Also +/// +/// * [`StreamWriter`] for writing IPC Streams +/// +/// # Example +/// ``` +/// # use arrow_array::record_batch; +/// # use arrow_ipc::writer::StreamWriter; +/// # let mut file = vec![]; // mimic a file for the example +/// let batch = record_batch!(("a", Int32, [1, 2, 3])).unwrap(); +/// // create a new writer, the schema must be known in advance +/// let mut writer = StreamWriter::try_new(&mut file, &batch.schema()).unwrap(); +/// // write each batch to the underlying writer +/// writer.write(&batch).unwrap(); +/// // When all batches are written, call finish to flush all buffers +/// writer.finish().unwrap(); +/// ``` +/// [IPC File Format]: https://arrow.apache.org/docs/format/Columnar.html#ipc-file-format pub struct FileWriter { /// The object to write to writer: W, @@ -1108,7 +1133,7 @@ impl FileWriter { Ok(()) } - /// Unwraps the the underlying writer. + /// Unwraps the underlying writer. /// /// The writer is flushed and the FileWriter is finished before returning. /// @@ -1135,7 +1160,29 @@ impl RecordBatchWriter for FileWriter { } } -/// Writer for an IPC stream +/// Arrow Stream Writer +/// +/// Writes Arrow [`RecordBatch`]es to bytes using the [IPC Streaming Format]. +/// +/// # See Also +/// +/// * [`FileWriter`] for writing IPC Files +/// +/// # Example +/// ``` +/// # use arrow_array::record_batch; +/// # use arrow_ipc::writer::StreamWriter; +/// # let mut stream = vec![]; // mimic a stream for the example +/// let batch = record_batch!(("a", Int32, [1, 2, 3])).unwrap(); +/// // create a new writer, the schema must be known in advance +/// let mut writer = StreamWriter::try_new(&mut stream, &batch.schema()).unwrap(); +/// // write each batch to the underlying stream +/// writer.write(&batch).unwrap(); +/// // When all batches are written, call finish to flush all buffers +/// writer.finish().unwrap(); +/// ``` +/// +/// [IPC Streaming Format]: https://arrow.apache.org/docs/format/Columnar.html#ipc-streaming-format pub struct StreamWriter { /// The object to write to writer: W,