Skip to content

Commit

Permalink
chore: update rustc and fix future (#11696)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 authored Oct 12, 2023
1 parent 12a9635 commit d12dadb
Show file tree
Hide file tree
Showing 12 changed files with 28 additions and 49 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ point to the `main` branch of this repo.
polars = { git = "https://github.com/pola-rs/polars", rev = "<optional git tag>" }
```

Required Rust version `>=1.65`.
Required Rust version `>=1.71`.

## Contributing

Expand Down
4 changes: 1 addition & 3 deletions crates/nano-arrow/src/array/dictionary/value_map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,7 @@ pub struct Hashed<K> {

#[inline]
fn ahash_hash<T: Hash + ?Sized>(value: &T) -> u64 {
let mut hasher = BuildHasherDefault::<ahash::AHasher>::default().build_hasher();
value.hash(&mut hasher);
hasher.finish()
BuildHasherDefault::<ahash::AHasher>::default().hash_one(value)
}

impl<K> Hash for Hashed<K> {
Expand Down
22 changes: 6 additions & 16 deletions crates/polars-core/src/hashing/vector_hasher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,8 @@ pub(crate) const fn folded_multiply(s: u64, by: u64) -> u64 {
pub(crate) fn get_null_hash_value(random_state: RandomState) -> u64 {
// we just start with a large prime number and hash that twice
// to get a constant hash value for null/None
let mut hasher = random_state.build_hasher();
3188347919usize.hash(&mut hasher);
let first = hasher.finish();
let mut hasher = random_state.build_hasher();
first.hash(&mut hasher);
hasher.finish()
let first = random_state.hash_one(3188347919usize);
random_state.hash_one(first)
}

fn insert_null_hash(chunks: &[ArrayRef], random_state: RandomState, buf: &mut Vec<u64>) {
Expand Down Expand Up @@ -392,23 +388,17 @@ where
buf.clear();
buf.reserve(self.len());

self.downcast_iter().for_each(|arr| {
buf.extend(arr.into_iter().map(|opt_v| {
let mut hasher = random_state.build_hasher();
opt_v.hash(&mut hasher);
hasher.finish()
}))
});
self.downcast_iter()
.for_each(|arr| buf.extend(arr.into_iter().map(|opt_v| random_state.hash_one(opt_v))));

Ok(())
}

fn vec_hash_combine(&self, random_state: RandomState, hashes: &mut [u64]) -> PolarsResult<()> {
self.apply_to_slice(
|opt_v, h| {
let mut hasher = random_state.build_hasher();
opt_v.hash(&mut hasher);
_boost_hash_combine(hasher.finish(), *h)
let hashed = random_state.hash_one(opt_v);
_boost_hash_combine(hashed, *h)
},
hashes,
);
Expand Down
12 changes: 8 additions & 4 deletions crates/polars-io/src/cloud/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ use smartstring::alias::String as SmartString;
use url::Url;

#[cfg(feature = "aws")]
static BUCKET_REGION: Lazy<tokio::sync::Mutex<FastFixedCache<SmartString, SmartString>>> =
Lazy::new(|| tokio::sync::Mutex::new(FastFixedCache::default()));
static BUCKET_REGION: Lazy<std::sync::Mutex<FastFixedCache<SmartString, SmartString>>> =
Lazy::new(|| std::sync::Mutex::new(FastFixedCache::new(32)));

/// The type of the config keys must satisfy the following requirements:
/// 1. must be easily collected into a HashMap, the type required by the object_crate API.
Expand Down Expand Up @@ -146,10 +146,13 @@ impl CloudOptions {
.get_config_value(&AmazonS3ConfigKey::Region)
.is_none()
{
let mut bucket_region = BUCKET_REGION.lock().await;
let bucket = crate::cloud::CloudLocation::new(url)?.bucket;
let region = {
let bucket_region = BUCKET_REGION.lock().unwrap();
bucket_region.get(bucket.as_str()).cloned()
};

match bucket_region.get(bucket.as_str()) {
match region {
Some(region) => {
builder = builder.with_config(AmazonS3ConfigKey::Region, region.as_str())
},
Expand All @@ -165,6 +168,7 @@ impl CloudOptions {
if let Some(region) = result.headers().get("x-amz-bucket-region") {
let region =
std::str::from_utf8(region.as_bytes()).map_err(to_compute_err)?;
let mut bucket_region = BUCKET_REGION.lock().unwrap();
bucket_region.insert(bucket.into(), region.into());
builder = builder.with_config(AmazonS3ConfigKey::Region, region)
}
Expand Down
8 changes: 2 additions & 6 deletions crates/polars-ops/src/frame/hashing.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::hash::{BuildHasher, Hash, Hasher};
use std::hash::Hash;

use ahash::RandomState;
use hashbrown::hash_map::RawEntryMut;
Expand Down Expand Up @@ -88,11 +88,7 @@ where
.map(|iter| {
// create hashes and keys
iter.into_iter()
.map(|val| {
let mut hasher = build_hasher.build_hasher();
val.hash(&mut hasher);
(hasher.finish(), val)
})
.map(|val| (build_hasher.hash_one(&val), val))
.collect_trusted::<Vec<_>>()
})
.collect()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -366,9 +366,7 @@ pub fn prepare_bytes<'a>(
.map(|ca| {
ca.into_iter()
.map(|opt_b| {
let mut state = hb.build_hasher();
opt_b.hash(&mut state);
let hash = state.finish();
let hash = hb.hash_one(opt_b);
BytesHash::new(opt_b, hash)
})
.collect::<Vec<_>>()
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-ops/src/frame/join/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ mod merge_sorted;
#[cfg(feature = "chunked_ids")]
use std::borrow::Cow;
use std::fmt::{Debug, Display, Formatter};
use std::hash::{BuildHasher, Hash, Hasher};
use std::hash::Hash;

use ahash::RandomState;
pub use args::*;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@
//! assert_eq!(hllp.count(), 2);
//! ```
use std::hash::{BuildHasher, Hash, Hasher};
use std::hash::Hash;
use std::marker::PhantomData;

use polars_core::export::ahash::{AHasher, RandomState};
use polars_core::export::ahash::RandomState;

/// The greater is P, the smaller the error.
const HLL_P: usize = 14_usize;
Expand Down Expand Up @@ -85,9 +85,7 @@ where
/// reasonable performance.
#[inline]
fn hash_value(&self, obj: &T) -> u64 {
let mut hasher: AHasher = SEED.build_hasher();
obj.hash(&mut hasher);
hasher.finish()
SEED.hash_one(obj)
}

/// Adds an element to the HyperLogLog.
Expand Down
5 changes: 1 addition & 4 deletions crates/polars-plan/src/logical_plan/optimizer/cse.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
//! Common Subplan Elimination
use std::collections::{BTreeMap, BTreeSet};
use std::hash::{BuildHasher, Hash, Hasher};

use polars_core::prelude::*;

Expand Down Expand Up @@ -310,9 +309,7 @@ pub(crate) fn elim_cmn_subplans(
(Some(h), _) => *h,
(_, Some(h)) => *h,
_ => {
let mut h = hb.build_hasher();
node1.hash(&mut h);
let hash = h.finish();
let hash = hb.hash_one(node1);
let mut cache_id = lp_cache.wrapping_add(hash as usize);
// this ensures we can still add branch ids without overflowing
// during the dot representation
Expand Down
6 changes: 2 additions & 4 deletions crates/polars-utils/src/functions.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::hash::{BuildHasher, Hash, Hasher};
use std::hash::{BuildHasher, Hash};

// Faster than collecting from a flattened iterator.
pub fn flatten<T: Clone, R: AsRef<[T]>>(bufs: &[R], len: Option<usize>) -> Vec<T> {
Expand All @@ -20,7 +20,5 @@ pub fn hash_to_partition(h: u64, n_partitions: usize) -> usize {

#[inline]
pub fn get_hash<T: Hash, B: BuildHasher>(value: T, hb: &B) -> u64 {
let mut hasher = hb.build_hasher();
value.hash(&mut hasher);
hasher.finish()
hb.hash_one(value)
}
2 changes: 1 addition & 1 deletion py-polars/tests/unit/dataframe/test_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -1601,7 +1601,7 @@ def test_reproducible_hash_with_seeds() -> None:
if platform.mac_ver()[-1] != "arm64":
expected = pl.Series(
"s",
[13477868900383131459, 988796329533502010, 16840582678788620208],
[13477868900383131459, 6344663067812082469, 16840582678788620208],
dtype=pl.UInt64,
)
result = df.hash_rows(*seeds)
Expand Down
2 changes: 1 addition & 1 deletion rust-toolchain.toml
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
[toolchain]
channel = "nightly-2023-10-02"
channel = "nightly-2023-10-12"

0 comments on commit d12dadb

Please sign in to comment.