Skip to content

Commit

Permalink
Auto merge of rust-lang#94084 - Mark-Simulacrum:drop-sharded, r=cjgillot
Browse files Browse the repository at this point in the history
Avoid query cache sharding code in single-threaded mode

In non-parallel compilers, this is just adding needless overhead at compilation time (since there is only one shard statically anyway). This amounts to roughly ~10 seconds reduction in bootstrap time, with overall neutral (some wins, some losses) performance results.

Parallel compiler performance should be largely unaffected by this PR; sharding is kept there.
  • Loading branch information
bors committed Feb 27, 2022
2 parents bab4c13 + 594ea74 commit 3b1fe7e
Show file tree
Hide file tree
Showing 8 changed files with 167 additions and 204 deletions.
2 changes: 1 addition & 1 deletion compiler/rustc_data_structures/src/sharded.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ impl<K: Eq + Hash + Copy + IntoPointer> ShardedHashMap<K, ()> {
}

#[inline]
fn make_hash<K: Hash + ?Sized>(val: &K) -> u64 {
pub fn make_hash<K: Hash + ?Sized>(val: &K) -> u64 {
let mut state = FxHasher::default();
val.hash(&mut state);
state.finish()
Expand Down
19 changes: 9 additions & 10 deletions compiler/rustc_middle/src/ty/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ macro_rules! define_callbacks {

#[derive(Default)]
pub struct QueryCaches<$tcx> {
$($(#[$attr])* pub $name: QueryCacheStore<query_storage::$name<$tcx>>,)*
$($(#[$attr])* pub $name: query_storage::$name<$tcx>,)*
}

impl<$tcx> TyCtxtEnsure<$tcx> {
Expand All @@ -222,12 +222,12 @@ macro_rules! define_callbacks {

let cached = try_get_cached(self.tcx, &self.tcx.query_caches.$name, &key, noop);

let lookup = match cached {
match cached {
Ok(()) => return,
Err(lookup) => lookup,
};
Err(()) => (),
}

self.tcx.queries.$name(self.tcx, DUMMY_SP, key, lookup, QueryMode::Ensure);
self.tcx.queries.$name(self.tcx, DUMMY_SP, key, QueryMode::Ensure);
})*
}

Expand All @@ -251,12 +251,12 @@ macro_rules! define_callbacks {

let cached = try_get_cached(self.tcx, &self.tcx.query_caches.$name, &key, copy);

let lookup = match cached {
match cached {
Ok(value) => return value,
Err(lookup) => lookup,
};
Err(()) => (),
}

self.tcx.queries.$name(self.tcx, self.span, key, lookup, QueryMode::Get).unwrap()
self.tcx.queries.$name(self.tcx, self.span, key, QueryMode::Get).unwrap()
})*
}

Expand Down Expand Up @@ -314,7 +314,6 @@ macro_rules! define_callbacks {
tcx: TyCtxt<$tcx>,
span: Span,
key: query_keys::$name<$tcx>,
lookup: QueryLookup,
mode: QueryMode,
) -> Option<query_stored::$name<$tcx>>;)*
}
Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_query_impl/src/on_disk_cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use rustc_middle::thir;
use rustc_middle::ty::codec::{RefDecodable, TyDecoder, TyEncoder};
use rustc_middle::ty::{self, Ty, TyCtxt};
use rustc_query_system::dep_graph::DepContext;
use rustc_query_system::query::{QueryContext, QuerySideEffects};
use rustc_query_system::query::{QueryCache, QueryContext, QuerySideEffects};
use rustc_serialize::{
opaque::{self, FileEncodeResult, FileEncoder, IntEncodedWithFixedSize},
Decodable, Decoder, Encodable, Encoder,
Expand Down Expand Up @@ -1034,7 +1034,7 @@ where
assert!(Q::query_state(tcx).all_inactive());
let cache = Q::query_cache(tcx);
let mut res = Ok(());
cache.iter_results(&mut |key, value, dep_node| {
cache.iter(&mut |key, value, dep_node| {
if res.is_err() {
return;
}
Expand Down
5 changes: 2 additions & 3 deletions compiler/rustc_query_impl/src/plumbing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ macro_rules! define_queries {
}

#[inline(always)]
fn query_cache<'a>(tcx: QueryCtxt<$tcx>) -> &'a QueryCacheStore<Self::Cache>
fn query_cache<'a>(tcx: QueryCtxt<$tcx>) -> &'a Self::Cache
where 'tcx:'a
{
&tcx.query_caches.$name
Expand Down Expand Up @@ -537,12 +537,11 @@ macro_rules! define_queries_struct {
tcx: TyCtxt<$tcx>,
span: Span,
key: query_keys::$name<$tcx>,
lookup: QueryLookup,
mode: QueryMode,
) -> Option<query_stored::$name<$tcx>> {
opt_remap_env_constness!([$($modifiers)*][key]);
let qcx = QueryCtxt { tcx, queries: self };
get_query::<queries::$name<$tcx>, _>(qcx, span, key, lookup, mode)
get_query::<queries::$name<$tcx>, _>(qcx, span, key, mode)
})*
}
};
Expand Down
8 changes: 4 additions & 4 deletions compiler/rustc_query_impl/src/profiling_support.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use rustc_data_structures::profiling::SelfProfiler;
use rustc_hir::def_id::{CrateNum, DefId, DefIndex, LocalDefId, CRATE_DEF_INDEX, LOCAL_CRATE};
use rustc_hir::definitions::DefPathData;
use rustc_middle::ty::{TyCtxt, WithOptConstParam};
use rustc_query_system::query::{QueryCache, QueryCacheStore};
use rustc_query_system::query::QueryCache;
use std::fmt::Debug;
use std::io::Write;

Expand Down Expand Up @@ -229,7 +229,7 @@ where
fn alloc_self_profile_query_strings_for_query_cache<'tcx, C>(
tcx: TyCtxt<'tcx>,
query_name: &'static str,
query_cache: &QueryCacheStore<C>,
query_cache: &C,
string_cache: &mut QueryKeyStringCache,
) where
C: QueryCache,
Expand All @@ -251,7 +251,7 @@ fn alloc_self_profile_query_strings_for_query_cache<'tcx, C>(
// locked while doing so. Instead we copy out the
// `(query_key, dep_node_index)` pairs and release the lock again.
let mut query_keys_and_indices = Vec::new();
query_cache.iter_results(&mut |k, _, i| query_keys_and_indices.push((k.clone(), i)));
query_cache.iter(&mut |k, _, i| query_keys_and_indices.push((k.clone(), i)));

// Now actually allocate the strings. If allocating the strings
// generates new entries in the query cache, we'll miss them but
Expand All @@ -276,7 +276,7 @@ fn alloc_self_profile_query_strings_for_query_cache<'tcx, C>(
let event_id = event_id_builder.from_label(query_name).to_string_id();

let mut query_invocation_ids = Vec::new();
query_cache.iter_results(&mut |_, _, i| {
query_cache.iter(&mut |_, _, i| {
query_invocation_ids.push(i.into());
});

Expand Down
149 changes: 76 additions & 73 deletions compiler/rustc_query_system/src/query/caches.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
use crate::dep_graph::DepNodeIndex;
use crate::query::plumbing::{QueryCacheStore, QueryLookup};

use rustc_arena::TypedArena;
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::sharded;
#[cfg(parallel_compiler)]
use rustc_data_structures::sharded::Sharded;
#[cfg(not(parallel_compiler))]
use rustc_data_structures::sync::Lock;
use rustc_data_structures::sync::WorkerLocal;
use std::default::Default;
use std::fmt::Debug;
Expand All @@ -25,35 +28,23 @@ pub trait QueryStorage {

pub trait QueryCache: QueryStorage + Sized {
type Key: Hash + Eq + Clone + Debug;
type Sharded: Default;

/// Checks if the query is already computed and in the cache.
/// It returns the shard index and a lock guard to the shard,
/// which will be used if the query is not in the cache and we need
/// to compute it.
fn lookup<'s, R, OnHit>(
fn lookup<R, OnHit>(
&self,
state: &'s QueryCacheStore<Self>,
key: &Self::Key,
// `on_hit` can be called while holding a lock to the query state shard.
on_hit: OnHit,
) -> Result<R, QueryLookup>
) -> Result<R, ()>
where
OnHit: FnOnce(&Self::Stored, DepNodeIndex) -> R;

fn complete(
&self,
lock_sharded_storage: &mut Self::Sharded,
key: Self::Key,
value: Self::Value,
index: DepNodeIndex,
) -> Self::Stored;
fn complete(&self, key: Self::Key, value: Self::Value, index: DepNodeIndex) -> Self::Stored;

fn iter(
&self,
shards: &Sharded<Self::Sharded>,
f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex),
);
fn iter(&self, f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex));
}

pub struct DefaultCacheSelector;
Expand All @@ -62,11 +53,16 @@ impl<K: Eq + Hash, V: Clone> CacheSelector<K, V> for DefaultCacheSelector {
type Cache = DefaultCache<K, V>;
}

pub struct DefaultCache<K, V>(PhantomData<(K, V)>);
pub struct DefaultCache<K, V> {
#[cfg(parallel_compiler)]
cache: Sharded<FxHashMap<K, (V, DepNodeIndex)>>,
#[cfg(not(parallel_compiler))]
cache: Lock<FxHashMap<K, (V, DepNodeIndex)>>,
}

impl<K, V> Default for DefaultCache<K, V> {
fn default() -> Self {
DefaultCache(PhantomData)
DefaultCache { cache: Default::default() }
}
}

Expand All @@ -87,49 +83,51 @@ where
V: Clone + Debug,
{
type Key = K;
type Sharded = FxHashMap<K, (V, DepNodeIndex)>;

#[inline(always)]
fn lookup<'s, R, OnHit>(
&self,
state: &'s QueryCacheStore<Self>,
key: &K,
on_hit: OnHit,
) -> Result<R, QueryLookup>
fn lookup<R, OnHit>(&self, key: &K, on_hit: OnHit) -> Result<R, ()>
where
OnHit: FnOnce(&V, DepNodeIndex) -> R,
{
let (lookup, lock) = state.get_lookup(key);
let result = lock.raw_entry().from_key_hashed_nocheck(lookup.key_hash, key);
let key_hash = sharded::make_hash(key);
#[cfg(parallel_compiler)]
let lock = self.cache.get_shard_by_hash(key_hash).lock();
#[cfg(not(parallel_compiler))]
let lock = self.cache.lock();
let result = lock.raw_entry().from_key_hashed_nocheck(key_hash, key);

if let Some((_, value)) = result {
let hit_result = on_hit(&value.0, value.1);
Ok(hit_result)
} else {
Err(lookup)
Err(())
}
}

#[inline]
fn complete(
&self,
lock_sharded_storage: &mut Self::Sharded,
key: K,
value: V,
index: DepNodeIndex,
) -> Self::Stored {
lock_sharded_storage.insert(key, (value.clone(), index));
fn complete(&self, key: K, value: V, index: DepNodeIndex) -> Self::Stored {
#[cfg(parallel_compiler)]
let mut lock = self.cache.get_shard_by_value(&key).lock();
#[cfg(not(parallel_compiler))]
let mut lock = self.cache.lock();
lock.insert(key, (value.clone(), index));
value
}

fn iter(
&self,
shards: &Sharded<Self::Sharded>,
f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex),
) {
let shards = shards.lock_shards();
for shard in shards.iter() {
for (k, v) in shard.iter() {
fn iter(&self, f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex)) {
#[cfg(parallel_compiler)]
{
let shards = self.cache.lock_shards();
for shard in shards.iter() {
for (k, v) in shard.iter() {
f(k, &v.0, v.1);
}
}
}
#[cfg(not(parallel_compiler))]
{
let map = self.cache.lock();
for (k, v) in map.iter() {
f(k, &v.0, v.1);
}
}
Expand All @@ -144,12 +142,15 @@ impl<'tcx, K: Eq + Hash, V: 'tcx> CacheSelector<K, V> for ArenaCacheSelector<'tc

pub struct ArenaCache<'tcx, K, V> {
arena: WorkerLocal<TypedArena<(V, DepNodeIndex)>>,
phantom: PhantomData<(K, &'tcx V)>,
#[cfg(parallel_compiler)]
cache: Sharded<FxHashMap<K, &'tcx (V, DepNodeIndex)>>,
#[cfg(not(parallel_compiler))]
cache: Lock<FxHashMap<K, &'tcx (V, DepNodeIndex)>>,
}

impl<'tcx, K, V> Default for ArenaCache<'tcx, K, V> {
fn default() -> Self {
ArenaCache { arena: WorkerLocal::new(|_| TypedArena::default()), phantom: PhantomData }
ArenaCache { arena: WorkerLocal::new(|_| TypedArena::default()), cache: Default::default() }
}
}

Expand All @@ -171,51 +172,53 @@ where
V: Debug,
{
type Key = K;
type Sharded = FxHashMap<K, &'tcx (V, DepNodeIndex)>;

#[inline(always)]
fn lookup<'s, R, OnHit>(
&self,
state: &'s QueryCacheStore<Self>,
key: &K,
on_hit: OnHit,
) -> Result<R, QueryLookup>
fn lookup<R, OnHit>(&self, key: &K, on_hit: OnHit) -> Result<R, ()>
where
OnHit: FnOnce(&&'tcx V, DepNodeIndex) -> R,
{
let (lookup, lock) = state.get_lookup(key);
let result = lock.raw_entry().from_key_hashed_nocheck(lookup.key_hash, key);
let key_hash = sharded::make_hash(key);
#[cfg(parallel_compiler)]
let lock = self.cache.get_shard_by_hash(key_hash).lock();
#[cfg(not(parallel_compiler))]
let lock = self.cache.lock();
let result = lock.raw_entry().from_key_hashed_nocheck(key_hash, key);

if let Some((_, value)) = result {
let hit_result = on_hit(&&value.0, value.1);
Ok(hit_result)
} else {
Err(lookup)
Err(())
}
}

#[inline]
fn complete(
&self,
lock_sharded_storage: &mut Self::Sharded,
key: K,
value: V,
index: DepNodeIndex,
) -> Self::Stored {
fn complete(&self, key: K, value: V, index: DepNodeIndex) -> Self::Stored {
let value = self.arena.alloc((value, index));
let value = unsafe { &*(value as *const _) };
lock_sharded_storage.insert(key, value);
#[cfg(parallel_compiler)]
let mut lock = self.cache.get_shard_by_value(&key).lock();
#[cfg(not(parallel_compiler))]
let mut lock = self.cache.lock();
lock.insert(key, value);
&value.0
}

fn iter(
&self,
shards: &Sharded<Self::Sharded>,
f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex),
) {
let shards = shards.lock_shards();
for shard in shards.iter() {
for (k, v) in shard.iter() {
fn iter(&self, f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex)) {
#[cfg(parallel_compiler)]
{
let shards = self.cache.lock_shards();
for shard in shards.iter() {
for (k, v) in shard.iter() {
f(k, &v.0, v.1);
}
}
}
#[cfg(not(parallel_compiler))]
{
let map = self.cache.lock();
for (k, v) in map.iter() {
f(k, &v.0, v.1);
}
}
Expand Down
Loading

0 comments on commit 3b1fe7e

Please sign in to comment.