Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: update rustc and fix future #11696

Merged
merged 3 commits into from
Oct 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ point to the `main` branch of this repo.
polars = { git = "https://github.com/pola-rs/polars", rev = "<optional git tag>" }
```

Required Rust version `>=1.65`.
Required Rust version `>=1.71`.

## Contributing

Expand Down
4 changes: 1 addition & 3 deletions crates/nano-arrow/src/array/dictionary/value_map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,7 @@ pub struct Hashed<K> {

#[inline]
fn ahash_hash<T: Hash + ?Sized>(value: &T) -> u64 {
let mut hasher = BuildHasherDefault::<ahash::AHasher>::default().build_hasher();
value.hash(&mut hasher);
hasher.finish()
BuildHasherDefault::<ahash::AHasher>::default().hash_one(value)
}

impl<K> Hash for Hashed<K> {
Expand Down
22 changes: 6 additions & 16 deletions crates/polars-core/src/hashing/vector_hasher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,8 @@ pub(crate) const fn folded_multiply(s: u64, by: u64) -> u64 {
pub(crate) fn get_null_hash_value(random_state: RandomState) -> u64 {
// we just start with a large prime number and hash that twice
// to get a constant hash value for null/None
let mut hasher = random_state.build_hasher();
3188347919usize.hash(&mut hasher);
let first = hasher.finish();
let mut hasher = random_state.build_hasher();
first.hash(&mut hasher);
hasher.finish()
let first = random_state.hash_one(3188347919usize);
random_state.hash_one(first)
}

fn insert_null_hash(chunks: &[ArrayRef], random_state: RandomState, buf: &mut Vec<u64>) {
Expand Down Expand Up @@ -392,23 +388,17 @@ where
buf.clear();
buf.reserve(self.len());

self.downcast_iter().for_each(|arr| {
buf.extend(arr.into_iter().map(|opt_v| {
let mut hasher = random_state.build_hasher();
opt_v.hash(&mut hasher);
hasher.finish()
}))
});
self.downcast_iter()
.for_each(|arr| buf.extend(arr.into_iter().map(|opt_v| random_state.hash_one(opt_v))));

Ok(())
}

fn vec_hash_combine(&self, random_state: RandomState, hashes: &mut [u64]) -> PolarsResult<()> {
self.apply_to_slice(
|opt_v, h| {
let mut hasher = random_state.build_hasher();
opt_v.hash(&mut hasher);
_boost_hash_combine(hasher.finish(), *h)
let hashed = random_state.hash_one(opt_v);
_boost_hash_combine(hashed, *h)
},
hashes,
);
Expand Down
12 changes: 8 additions & 4 deletions crates/polars-io/src/cloud/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ use smartstring::alias::String as SmartString;
use url::Url;

#[cfg(feature = "aws")]
static BUCKET_REGION: Lazy<tokio::sync::Mutex<FastFixedCache<SmartString, SmartString>>> =
Lazy::new(|| tokio::sync::Mutex::new(FastFixedCache::default()));
static BUCKET_REGION: Lazy<std::sync::Mutex<FastFixedCache<SmartString, SmartString>>> =
Lazy::new(|| std::sync::Mutex::new(FastFixedCache::new(32)));

/// The type of the config keys must satisfy the following requirements:
/// 1. must be easily collected into a HashMap, the type required by the object_crate API.
Expand Down Expand Up @@ -146,10 +146,13 @@ impl CloudOptions {
.get_config_value(&AmazonS3ConfigKey::Region)
.is_none()
{
let mut bucket_region = BUCKET_REGION.lock().await;
let bucket = crate::cloud::CloudLocation::new(url)?.bucket;
let region = {
let bucket_region = BUCKET_REGION.lock().unwrap();
bucket_region.get(bucket.as_str()).cloned()
};

match bucket_region.get(bucket.as_str()) {
match region {
Some(region) => {
builder = builder.with_config(AmazonS3ConfigKey::Region, region.as_str())
},
Expand All @@ -165,6 +168,7 @@ impl CloudOptions {
if let Some(region) = result.headers().get("x-amz-bucket-region") {
let region =
std::str::from_utf8(region.as_bytes()).map_err(to_compute_err)?;
let mut bucket_region = BUCKET_REGION.lock().unwrap();
bucket_region.insert(bucket.into(), region.into());
builder = builder.with_config(AmazonS3ConfigKey::Region, region)
}
Expand Down
8 changes: 2 additions & 6 deletions crates/polars-ops/src/frame/hashing.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::hash::{BuildHasher, Hash, Hasher};
use std::hash::Hash;

use ahash::RandomState;
use hashbrown::hash_map::RawEntryMut;
Expand Down Expand Up @@ -88,11 +88,7 @@ where
.map(|iter| {
// create hashes and keys
iter.into_iter()
.map(|val| {
let mut hasher = build_hasher.build_hasher();
val.hash(&mut hasher);
(hasher.finish(), val)
})
.map(|val| (build_hasher.hash_one(&val), val))
.collect_trusted::<Vec<_>>()
})
.collect()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -366,9 +366,7 @@ pub fn prepare_bytes<'a>(
.map(|ca| {
ca.into_iter()
.map(|opt_b| {
let mut state = hb.build_hasher();
opt_b.hash(&mut state);
let hash = state.finish();
let hash = hb.hash_one(opt_b);
BytesHash::new(opt_b, hash)
})
.collect::<Vec<_>>()
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-ops/src/frame/join/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ mod merge_sorted;
#[cfg(feature = "chunked_ids")]
use std::borrow::Cow;
use std::fmt::{Debug, Display, Formatter};
use std::hash::{BuildHasher, Hash, Hasher};
use std::hash::Hash;

use ahash::RandomState;
pub use args::*;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@
//! assert_eq!(hllp.count(), 2);
//! ```

use std::hash::{BuildHasher, Hash, Hasher};
use std::hash::Hash;
use std::marker::PhantomData;

use polars_core::export::ahash::{AHasher, RandomState};
use polars_core::export::ahash::RandomState;

/// The greater is P, the smaller the error.
const HLL_P: usize = 14_usize;
Expand Down Expand Up @@ -85,9 +85,7 @@ where
/// reasonable performance.
#[inline]
fn hash_value(&self, obj: &T) -> u64 {
let mut hasher: AHasher = SEED.build_hasher();
obj.hash(&mut hasher);
hasher.finish()
SEED.hash_one(obj)
}

/// Adds an element to the HyperLogLog.
Expand Down
5 changes: 1 addition & 4 deletions crates/polars-plan/src/logical_plan/optimizer/cse.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
//! Common Subplan Elimination

use std::collections::{BTreeMap, BTreeSet};
use std::hash::{BuildHasher, Hash, Hasher};

use polars_core::prelude::*;

Expand Down Expand Up @@ -310,9 +309,7 @@ pub(crate) fn elim_cmn_subplans(
(Some(h), _) => *h,
(_, Some(h)) => *h,
_ => {
let mut h = hb.build_hasher();
node1.hash(&mut h);
let hash = h.finish();
let hash = hb.hash_one(node1);
let mut cache_id = lp_cache.wrapping_add(hash as usize);
// this ensures we can still add branch ids without overflowing
// during the dot representation
Expand Down
6 changes: 2 additions & 4 deletions crates/polars-utils/src/functions.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::hash::{BuildHasher, Hash, Hasher};
use std::hash::{BuildHasher, Hash};

// Faster than collecting from a flattened iterator.
pub fn flatten<T: Clone, R: AsRef<[T]>>(bufs: &[R], len: Option<usize>) -> Vec<T> {
Expand All @@ -20,7 +20,5 @@ pub fn hash_to_partition(h: u64, n_partitions: usize) -> usize {

#[inline]
pub fn get_hash<T: Hash, B: BuildHasher>(value: T, hb: &B) -> u64 {
let mut hasher = hb.build_hasher();
value.hash(&mut hasher);
hasher.finish()
hb.hash_one(value)
}
2 changes: 1 addition & 1 deletion py-polars/tests/unit/dataframe/test_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -1601,7 +1601,7 @@ def test_reproducible_hash_with_seeds() -> None:
if platform.mac_ver()[-1] != "arm64":
expected = pl.Series(
"s",
[13477868900383131459, 988796329533502010, 16840582678788620208],
[13477868900383131459, 6344663067812082469, 16840582678788620208],
dtype=pl.UInt64,
)
result = df.hash_rows(*seeds)
Expand Down
2 changes: 1 addition & 1 deletion rust-toolchain.toml
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
[toolchain]
channel = "nightly-2023-10-02"
channel = "nightly-2023-10-12"