clippy

coralogix · Dec 4, 2024 · 6ca22df · 6ca22df
1 parent d0b26a0
commit 6ca22df
Show file tree

Hide file tree

Showing 18 changed files with 50 additions and 48 deletions.
diff --git a/arrow-array/src/array/byte_view_array.rs b/arrow-array/src/array/byte_view_array.rs
@@ -66,7 +66,7 @@ use super::ByteArrayType;
 /// * Strings with length <= 12 are stored directly in the view.
 ///
 /// * Strings with length > 12: The first four bytes are stored inline in the
-/// view and the entire string is stored in one of the buffers.
+///     view and the entire string is stored in one of the buffers.
 ///
 /// Unlike [`GenericByteArray`], there are no constraints on the offsets other
 /// than they must point into a valid buffer. However, they can be out of order,

diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs
@@ -577,7 +577,7 @@ fn timestamp_to_date32<T: ArrowTimestampType>(
 /// * Numeric to boolean: 0 returns `false`, any other value returns `true`
 /// * List to List: the underlying data type is cast
 /// * List to FixedSizeList: the underlying data type is cast. If safe is true and a list element
-/// has the wrong length it will be replaced with NULL, otherwise an error will be returned
+///     has the wrong length it will be replaced with NULL, otherwise an error will be returned
 /// * Primitive to List: a list array with 1 value per slot is created
 /// * Date32 and Date64: precision lost when going to higher interval
 /// * Time32 and Time64: precision lost when going to higher interval

diff --git a/arrow-cast/src/parse.rs b/arrow-cast/src/parse.rs
@@ -289,8 +289,8 @@ fn to_timestamp_nanos(dt: NaiveDateTime) -> Result<i64, ArrowError> {
 /// * `23:59:59`
 /// * `6:00 pm`
 //
-/// Internally, this function uses the `chrono` library for the
-/// time parsing
+///     Internally, this function uses the `chrono` library for the
+///     time parsing
 ///
 /// ## Timezone / Offset Handling
 ///

diff --git a/arrow-data/src/data.rs b/arrow-data/src/data.rs
@@ -1179,8 +1179,8 @@ impl ArrayData {
     ///
     /// Does not (yet) check
     /// 1. Union type_ids are valid see [#85](https://github.com/apache/arrow-rs/issues/85)
-    /// Validates the the null count is correct and that any
-    /// nullability requirements of its children are correct
+    ///     Validates the the null count is correct and that any
+    ///     nullability requirements of its children are correct
     pub fn validate_nulls(&self) -> Result<(), ArrowError> {
         if let Some(nulls) = &self.nulls {
             let actual = nulls.len() - nulls.inner().count_set_bits();

diff --git a/arrow-data/src/equal/mod.rs b/arrow-data/src/equal/mod.rs
@@ -144,8 +144,8 @@ fn equal_range(
 /// * their null counts are equal
 /// * their null bitmaps are equal
 /// * each of their items are equal
-/// two items are equal when their in-memory representation is physically equal (i.e. same bit content).
-/// The physical comparison depend on the data type.
+///     two items are equal when their in-memory representation is physically equal (i.e. same bit content).
+///     The physical comparison depend on the data type.
 /// # Panics
 /// This function may panic whenever any of the [ArrayData] does not follow the Arrow specification.
 /// (e.g. wrong number of buffers, buffer `len` does not correspond to the declared `len`)

diff --git a/arrow-flight/src/decode.rs b/arrow-flight/src/decode.rs
@@ -225,8 +225,8 @@ impl futures::Stream for FlightRecordBatchStream {
 /// Example usecases
 ///
 /// 1. Using this low level stream it is possible to receive a steam
-/// of RecordBatches in FlightData that have different schemas by
-/// handling multiple schema messages separately.
+///     of RecordBatches in FlightData that have different schemas by
+///     handling multiple schema messages separately.
 pub struct FlightDataDecoder {
     /// Underlying data stream
     response: BoxStream<'static, Result<FlightData>>,

diff --git a/arrow-flight/src/encode.rs b/arrow-flight/src/encode.rs
@@ -39,10 +39,10 @@ use futures::{ready, stream::BoxStream, Stream, StreamExt};
 ///
 /// # Caveats
 ///   1. When [`DictionaryHandling`] is [`DictionaryHandling::Hydrate`], [`DictionaryArray`](arrow_array::array::DictionaryArray)s
-///   are converted to their underlying types prior to transport.
-///   When [`DictionaryHandling`] is [`DictionaryHandling::Resend`], Dictionary [`FlightData`] is sent with every
-///   [`RecordBatch`] that contains a [`DictionaryArray`](arrow_array::array::DictionaryArray).
-///   See <https://github.com/apache/arrow-rs/issues/3389>.
+///       are converted to their underlying types prior to transport.
+///       When [`DictionaryHandling`] is [`DictionaryHandling::Resend`], Dictionary [`FlightData`] is sent with every
+///       [`RecordBatch`] that contains a [`DictionaryArray`](arrow_array::array::DictionaryArray).
+///       See <https://github.com/apache/arrow-rs/issues/3389>.
 ///
 /// # Example
 /// ```no_run

diff --git a/arrow-flight/src/lib.rs b/arrow-flight/src/lib.rs
@@ -26,14 +26,14 @@
 //! This crate contains:
 //!
 //! 1. Low level [prost] generated structs
-//!  for Flight gRPC protobuf messages, such as [`FlightData`], [`FlightInfo`],
-//!  [`Location`] and [`Ticket`].
+//!      for Flight gRPC protobuf messages, such as [`FlightData`], [`FlightInfo`],
+//!      [`Location`] and [`Ticket`].
 //!
 //! 2. Low level [tonic] generated [`flight_service_client`] and
-//! [`flight_service_server`].
+//!     [`flight_service_server`].
 //!
 //! 3. Experimental support for [Flight SQL] in [`sql`]. Requires the
-//! `flight-sql-experimental` feature of this crate to be activated.
+//!     `flight-sql-experimental` feature of this crate to be activated.
 //!
 //! [Flight SQL]: https://arrow.apache.org/docs/format/FlightSql.html
 #![allow(rustdoc::invalid_html_tags)]

diff --git a/arrow-ord/src/sort.rs b/arrow-ord/src/sort.rs
@@ -370,7 +370,7 @@ fn sort_fixed_size_list(
 }
 
 #[inline(never)]
-fn sort_impl<T: ?Sized + Copy>(
+fn sort_impl<T: Copy>(
     options: SortOptions,
     valids: &mut [(u32, T)],
     nulls: &[u32],

diff --git a/arrow-schema/src/fields.rs b/arrow-schema/src/fields.rs
@@ -389,14 +389,13 @@ impl UnionFields {
         let mut set = 0_u128;
         type_ids
             .into_iter()
-            .map(|idx| {
+            .inspect(|&idx| {
                 let mask = 1_u128 << idx;
                 if (set & mask) != 0 {
                     panic!("duplicate type id: {}", idx);
                 } else {
                     set |= mask;
                 }
-                idx
             })
             .zip(fields)
             .collect()

diff --git a/arrow-string/src/substring.rs b/arrow-string/src/substring.rs
@@ -34,11 +34,11 @@ use std::sync::Arc;
 /// # Arguments
 ///
 /// * `start` - The start index of all substrings.
-/// If `start >= 0`, then count from the start of the string,
-/// otherwise count from the end of the string.
+///     If `start >= 0`, then count from the start of the string,
+///     otherwise count from the end of the string.
 ///
 /// * `length`(option) - The length of all substrings.
-/// If `length` is [None], then the substring is from `start` to the end of the string.
+///     If `length` is [None], then the substring is from `start` to the end of the string.
 ///
 /// Attention: Both `start` and `length` are counted by byte, not by char.
 ///
@@ -56,7 +56,7 @@ use std::sync::Arc;
 /// - The function errors when the passed array is not a [`GenericStringArray`], [`GenericBinaryArray`], [`FixedSizeBinaryArray`]
 ///   or [`DictionaryArray`] with supported array type as its value type.
 /// - The function errors if the offset of a substring in the input array is at invalid char boundary (only for \[Large\]String array).
-/// It is recommended to use [`substring_by_char`] if the input array may contain non-ASCII chars.
+///     It is recommended to use [`substring_by_char`] if the input array may contain non-ASCII chars.
 ///
 /// ## Example of trying to get an invalid utf-8 format substring
 /// ```
@@ -159,11 +159,11 @@ pub fn substring(
 /// * `array` - The input string array
 ///
 /// * `start` - The start index of all substrings.
-/// If `start >= 0`, then count from the start of the string,
-/// otherwise count from the end of the string.
+///     If `start >= 0`, then count from the start of the string,
+///     otherwise count from the end of the string.
 ///
 /// * `length`(option) - The length of all substrings.
-/// If `length` is `None`, then the substring is from `start` to the end of the string.
+///     If `length` is `None`, then the substring is from `start` to the end of the string.
 ///
 /// Attention: Both `start` and `length` are counted by char.
 ///

diff --git a/parquet/src/arrow/array_reader/mod.rs b/parquet/src/arrow/array_reader/mod.rs
@@ -61,12 +61,15 @@ pub use struct_array::StructArrayReader;
 
 /// Array reader reads parquet data into arrow array.
 pub trait ArrayReader: Send {
+    #[allow(dead_code)]
     fn as_any(&self) -> &dyn Any;
 
     /// Returns the arrow type of this array reader.
+    #[allow(dead_code)]
     fn get_data_type(&self) -> &ArrowType;
 
     /// Reads at most `batch_size` records into an arrow array and return it.
+    #[allow(dead_code)]
     fn next_batch(&mut self, batch_size: usize) -> Result<ArrayRef> {
         self.read_records(batch_size)?;
         self.consume_batch()

diff --git a/parquet/src/arrow/arrow_reader/mod.rs b/parquet/src/arrow/arrow_reader/mod.rs
@@ -353,11 +353,11 @@ impl ArrowReaderOptions {
 /// This structure allows
 ///
 /// 1. Loading metadata for a file once and then using that same metadata to
-/// construct multiple separate readers, for example, to distribute readers
-/// across multiple threads
+///     construct multiple separate readers, for example, to distribute readers
+///     across multiple threads
 ///
 /// 2. Using a cached copy of the [`ParquetMetadata`] rather than reading it
-/// from the file each time a reader is constructed.
+///     from the file each time a reader is constructed.
 ///
 /// [`ParquetMetadata`]: crate::file::metadata::ParquetMetaData
 #[derive(Debug, Clone)]
@@ -553,10 +553,10 @@ impl<T: ChunkReader + 'static> ParquetRecordBatchReaderBuilder<T> {
     /// This interface allows:
     ///
     /// 1. Loading metadata once and using it to create multiple builders with
-    /// potentially different settings or run on different threads
+    ///     potentially different settings or run on different threads
     ///
     /// 2. Using a cached copy of the metadata rather than re-reading it from the
-    /// file each time a reader is constructed.
+    ///     file each time a reader is constructed.
     ///
     /// See the docs on [`ArrowReaderMetadata`] for more details
     ///

diff --git a/parquet/src/arrow/arrow_reader/statistics.rs b/parquet/src/arrow/arrow_reader/statistics.rs
@@ -1297,14 +1297,14 @@ impl<'a> StatisticsConverter<'a> {
     /// # Parameters:
     ///
     /// * `column_page_index`: The parquet column page indices, read from
-    /// `ParquetMetaData` column_index
+    ///     `ParquetMetaData` column_index
     ///
     /// * `column_offset_index`: The parquet column offset indices, read from
-    /// `ParquetMetaData` offset_index
+    ///     `ParquetMetaData` offset_index
     ///
     /// * `row_group_indices`: The indices of the row groups, that are used to
-    /// extract the column page index and offset index on a per row group
-    /// per column basis.
+    ///     extract the column page index and offset index on a per row group
+    ///     per column basis.
     ///
     /// # Return Value
     ///
@@ -1423,13 +1423,13 @@ impl<'a> StatisticsConverter<'a> {
     /// # Parameters:
     ///
     /// * `column_offset_index`: The parquet column offset indices, read from
-    /// `ParquetMetaData` offset_index
+    ///     `ParquetMetaData` offset_index
     ///
     /// * `row_group_metadatas`: The metadata slice of the row groups, read
-    /// from `ParquetMetaData` row_groups
+    ///     from `ParquetMetaData` row_groups
     ///
     /// * `row_group_indices`: The indices of the row groups, that are used to
-    /// extract the column offset index on a per row group per column basis.
+    ///     extract the column offset index on a per row group per column basis.
     ///
     /// See docs on [`Self::data_page_mins`] for details.
     pub fn data_page_row_counts<I>(

diff --git a/parquet/src/file/metadata/mod.rs b/parquet/src/file/metadata/mod.rs
@@ -190,11 +190,11 @@ impl ParquetMetaData {
     /// 1. Includes size of self
     ///
     /// 2. Includes heap memory for sub fields such as [`FileMetaData`] and
-    /// [`RowGroupMetaData`].
+    ///     [`RowGroupMetaData`].
     ///
     /// 3. Includes memory from shared pointers (e.g. [`SchemaDescPtr`]). This
-    /// means `memory_size` will over estimate the memory size if such pointers
-    /// are shared.
+    ///     means `memory_size` will over estimate the memory size if such pointers
+    ///     are shared.
     ///
     /// 4. Does not include any allocator overheads
     pub fn memory_size(&self) -> usize {

diff --git a/parquet/src/file/properties.rs b/parquet/src/file/properties.rs
@@ -566,8 +566,8 @@ impl WriterPropertiesBuilder {
     /// * If the bloom filter is enabled previously then it is a no-op.
     ///
     /// * If the bloom filter is not enabled, default values for ndv and fpp
-    /// value are used used. See [`set_bloom_filter_ndv`] and
-    /// [`set_bloom_filter_fpp`] to further adjust the ndv and fpp.
+    ///     value are used used. See [`set_bloom_filter_ndv`] and
+    ///     [`set_bloom_filter_fpp`] to further adjust the ndv and fpp.
     ///
     /// [`set_bloom_filter_ndv`]: Self::set_bloom_filter_ndv
     /// [`set_bloom_filter_fpp`]: Self::set_bloom_filter_fpp

diff --git a/parquet/src/file/writer.rs b/parquet/src/file/writer.rs
@@ -449,10 +449,10 @@ impl<W: Write + Send> SerializedFileWriter<W> {
 ///
 /// All columns should be written sequentially; the main workflow is:
 /// - Request the next column using `next_column` method - this will return `None` if no
-/// more columns are available to write.
+///     more columns are available to write.
 /// - Once done writing a column, close column writer with `close`
 /// - Once all columns have been written, close row group writer with `close` method -
-/// it will return row group metadata and is no-op on already closed row group.
+///     it will return row group metadata and is no-op on already closed row group.
 pub struct SerializedRowGroupWriter<'a, W: Write> {
     descr: SchemaDescPtr,
     props: WriterPropertiesPtr,

diff --git a/parquet/src/format.rs b/parquet/src/format.rs