Auto merge of rust-lang#94084 - Mark-Simulacrum:drop-sharded, r=cjgillot

Avoid query cache sharding code in single-threaded mode In non-parallel compilers, this is just adding needless overhead at compilation time (since there is only one shard statically anyway). This amounts to roughly ~10 seconds reduction in bootstrap time, with overall neutral (some wins, some losses) performance results. Parallel compiler performance should be largely unaffected by this PR; sharding is kept there.
rust-lang-ci · Feb 27, 2022 · 3b1fe7e · 3b1fe7e
2 parents bab4c13 + 594ea74
commit 3b1fe7e
Show file tree

Hide file tree

Showing 8 changed files with 167 additions and 204 deletions.
diff --git a/compiler/rustc_data_structures/src/sharded.rs b/compiler/rustc_data_structures/src/sharded.rs
@@ -129,7 +129,7 @@ impl<K: Eq + Hash + Copy + IntoPointer> ShardedHashMap<K, ()> {
 }
 
 #[inline]
-fn make_hash<K: Hash + ?Sized>(val: &K) -> u64 {
+pub fn make_hash<K: Hash + ?Sized>(val: &K) -> u64 {
     let mut state = FxHasher::default();
     val.hash(&mut state);
     state.finish()

diff --git a/compiler/rustc_middle/src/ty/query.rs b/compiler/rustc_middle/src/ty/query.rs
@@ -210,7 +210,7 @@ macro_rules! define_callbacks {
 
         #[derive(Default)]
         pub struct QueryCaches<$tcx> {
-            $($(#[$attr])* pub $name: QueryCacheStore<query_storage::$name<$tcx>>,)*
+            $($(#[$attr])* pub $name: query_storage::$name<$tcx>,)*
         }
 
         impl<$tcx> TyCtxtEnsure<$tcx> {
@@ -222,12 +222,12 @@ macro_rules! define_callbacks {
 
                 let cached = try_get_cached(self.tcx, &self.tcx.query_caches.$name, &key, noop);
 
-                let lookup = match cached {
+                match cached {
                     Ok(()) => return,
-                    Err(lookup) => lookup,
-                };
+                    Err(()) => (),
+                }
 
-                self.tcx.queries.$name(self.tcx, DUMMY_SP, key, lookup, QueryMode::Ensure);
+                self.tcx.queries.$name(self.tcx, DUMMY_SP, key, QueryMode::Ensure);
             })*
         }
 
@@ -251,12 +251,12 @@ macro_rules! define_callbacks {
 
                 let cached = try_get_cached(self.tcx, &self.tcx.query_caches.$name, &key, copy);
 
-                let lookup = match cached {
+                match cached {
                     Ok(value) => return value,
-                    Err(lookup) => lookup,
-                };
+                    Err(()) => (),
+                }
 
-                self.tcx.queries.$name(self.tcx, self.span, key, lookup, QueryMode::Get).unwrap()
+                self.tcx.queries.$name(self.tcx, self.span, key, QueryMode::Get).unwrap()
             })*
         }
 
@@ -314,7 +314,6 @@ macro_rules! define_callbacks {
                 tcx: TyCtxt<$tcx>,
                 span: Span,
                 key: query_keys::$name<$tcx>,
-                lookup: QueryLookup,
                 mode: QueryMode,
             ) -> Option<query_stored::$name<$tcx>>;)*
         }

diff --git a/compiler/rustc_query_impl/src/on_disk_cache.rs b/compiler/rustc_query_impl/src/on_disk_cache.rs
@@ -13,7 +13,7 @@ use rustc_middle::thir;
 use rustc_middle::ty::codec::{RefDecodable, TyDecoder, TyEncoder};
 use rustc_middle::ty::{self, Ty, TyCtxt};
 use rustc_query_system::dep_graph::DepContext;
-use rustc_query_system::query::{QueryContext, QuerySideEffects};
+use rustc_query_system::query::{QueryCache, QueryContext, QuerySideEffects};
 use rustc_serialize::{
     opaque::{self, FileEncodeResult, FileEncoder, IntEncodedWithFixedSize},
     Decodable, Decoder, Encodable, Encoder,
@@ -1034,7 +1034,7 @@ where
     assert!(Q::query_state(tcx).all_inactive());
     let cache = Q::query_cache(tcx);
     let mut res = Ok(());
-    cache.iter_results(&mut |key, value, dep_node| {
+    cache.iter(&mut |key, value, dep_node| {
         if res.is_err() {
             return;
         }

diff --git a/compiler/rustc_query_impl/src/plumbing.rs b/compiler/rustc_query_impl/src/plumbing.rs
@@ -336,7 +336,7 @@ macro_rules! define_queries {
             }
 
             #[inline(always)]
-            fn query_cache<'a>(tcx: QueryCtxt<$tcx>) -> &'a QueryCacheStore<Self::Cache>
+            fn query_cache<'a>(tcx: QueryCtxt<$tcx>) -> &'a Self::Cache
                 where 'tcx:'a
             {
                 &tcx.query_caches.$name
@@ -537,12 +537,11 @@ macro_rules! define_queries_struct {
                 tcx: TyCtxt<$tcx>,
                 span: Span,
                 key: query_keys::$name<$tcx>,
-                lookup: QueryLookup,
                 mode: QueryMode,
             ) -> Option<query_stored::$name<$tcx>> {
                 opt_remap_env_constness!([$($modifiers)*][key]);
                 let qcx = QueryCtxt { tcx, queries: self };
-                get_query::<queries::$name<$tcx>, _>(qcx, span, key, lookup, mode)
+                get_query::<queries::$name<$tcx>, _>(qcx, span, key, mode)
             })*
         }
     };

diff --git a/compiler/rustc_query_impl/src/profiling_support.rs b/compiler/rustc_query_impl/src/profiling_support.rs
@@ -4,7 +4,7 @@ use rustc_data_structures::profiling::SelfProfiler;
 use rustc_hir::def_id::{CrateNum, DefId, DefIndex, LocalDefId, CRATE_DEF_INDEX, LOCAL_CRATE};
 use rustc_hir::definitions::DefPathData;
 use rustc_middle::ty::{TyCtxt, WithOptConstParam};
-use rustc_query_system::query::{QueryCache, QueryCacheStore};
+use rustc_query_system::query::QueryCache;
 use std::fmt::Debug;
 use std::io::Write;
 
@@ -229,7 +229,7 @@ where
 fn alloc_self_profile_query_strings_for_query_cache<'tcx, C>(
     tcx: TyCtxt<'tcx>,
     query_name: &'static str,
-    query_cache: &QueryCacheStore<C>,
+    query_cache: &C,
     string_cache: &mut QueryKeyStringCache,
 ) where
     C: QueryCache,
@@ -251,7 +251,7 @@ fn alloc_self_profile_query_strings_for_query_cache<'tcx, C>(
             // locked while doing so. Instead we copy out the
             // `(query_key, dep_node_index)` pairs and release the lock again.
             let mut query_keys_and_indices = Vec::new();
-            query_cache.iter_results(&mut |k, _, i| query_keys_and_indices.push((k.clone(), i)));
+            query_cache.iter(&mut |k, _, i| query_keys_and_indices.push((k.clone(), i)));
 
             // Now actually allocate the strings. If allocating the strings
             // generates new entries in the query cache, we'll miss them but
@@ -276,7 +276,7 @@ fn alloc_self_profile_query_strings_for_query_cache<'tcx, C>(
             let event_id = event_id_builder.from_label(query_name).to_string_id();
 
             let mut query_invocation_ids = Vec::new();
-            query_cache.iter_results(&mut |_, _, i| {
+            query_cache.iter(&mut |_, _, i| {
                 query_invocation_ids.push(i.into());
             });
 

diff --git a/compiler/rustc_query_system/src/query/caches.rs b/compiler/rustc_query_system/src/query/caches.rs
@@ -1,9 +1,12 @@
 use crate::dep_graph::DepNodeIndex;
-use crate::query::plumbing::{QueryCacheStore, QueryLookup};
 
 use rustc_arena::TypedArena;
 use rustc_data_structures::fx::FxHashMap;
+use rustc_data_structures::sharded;
+#[cfg(parallel_compiler)]
 use rustc_data_structures::sharded::Sharded;
+#[cfg(not(parallel_compiler))]
+use rustc_data_structures::sync::Lock;
 use rustc_data_structures::sync::WorkerLocal;
 use std::default::Default;
 use std::fmt::Debug;
@@ -25,35 +28,23 @@ pub trait QueryStorage {
 
 pub trait QueryCache: QueryStorage + Sized {
     type Key: Hash + Eq + Clone + Debug;
-    type Sharded: Default;
 
     /// Checks if the query is already computed and in the cache.
     /// It returns the shard index and a lock guard to the shard,
     /// which will be used if the query is not in the cache and we need
     /// to compute it.
-    fn lookup<'s, R, OnHit>(
+    fn lookup<R, OnHit>(
         &self,
-        state: &'s QueryCacheStore<Self>,
         key: &Self::Key,
         // `on_hit` can be called while holding a lock to the query state shard.
         on_hit: OnHit,
-    ) -> Result<R, QueryLookup>
+    ) -> Result<R, ()>
     where
         OnHit: FnOnce(&Self::Stored, DepNodeIndex) -> R;
 
-    fn complete(
-        &self,
-        lock_sharded_storage: &mut Self::Sharded,
-        key: Self::Key,
-        value: Self::Value,
-        index: DepNodeIndex,
-    ) -> Self::Stored;
+    fn complete(&self, key: Self::Key, value: Self::Value, index: DepNodeIndex) -> Self::Stored;
 
-    fn iter(
-        &self,
-        shards: &Sharded<Self::Sharded>,
-        f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex),
-    );
+    fn iter(&self, f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex));
 }
 
 pub struct DefaultCacheSelector;
@@ -62,11 +53,16 @@ impl<K: Eq + Hash, V: Clone> CacheSelector<K, V> for DefaultCacheSelector {
     type Cache = DefaultCache<K, V>;
 }
 
-pub struct DefaultCache<K, V>(PhantomData<(K, V)>);
+pub struct DefaultCache<K, V> {
+    #[cfg(parallel_compiler)]
+    cache: Sharded<FxHashMap<K, (V, DepNodeIndex)>>,
+    #[cfg(not(parallel_compiler))]
+    cache: Lock<FxHashMap<K, (V, DepNodeIndex)>>,
+}
 
 impl<K, V> Default for DefaultCache<K, V> {
     fn default() -> Self {
-        DefaultCache(PhantomData)
+        DefaultCache { cache: Default::default() }
     }
 }
 
@@ -87,49 +83,51 @@ where
     V: Clone + Debug,
 {
     type Key = K;
-    type Sharded = FxHashMap<K, (V, DepNodeIndex)>;
 
     #[inline(always)]
-    fn lookup<'s, R, OnHit>(
-        &self,
-        state: &'s QueryCacheStore<Self>,
-        key: &K,
-        on_hit: OnHit,
-    ) -> Result<R, QueryLookup>
+    fn lookup<R, OnHit>(&self, key: &K, on_hit: OnHit) -> Result<R, ()>
     where
         OnHit: FnOnce(&V, DepNodeIndex) -> R,
     {
-        let (lookup, lock) = state.get_lookup(key);
-        let result = lock.raw_entry().from_key_hashed_nocheck(lookup.key_hash, key);
+        let key_hash = sharded::make_hash(key);
+        #[cfg(parallel_compiler)]
+        let lock = self.cache.get_shard_by_hash(key_hash).lock();
+        #[cfg(not(parallel_compiler))]
+        let lock = self.cache.lock();
+        let result = lock.raw_entry().from_key_hashed_nocheck(key_hash, key);
 
         if let Some((_, value)) = result {
             let hit_result = on_hit(&value.0, value.1);
             Ok(hit_result)
         } else {
-            Err(lookup)
+            Err(())
         }
     }
 
     #[inline]
-    fn complete(
-        &self,
-        lock_sharded_storage: &mut Self::Sharded,
-        key: K,
-        value: V,
-        index: DepNodeIndex,
-    ) -> Self::Stored {
-        lock_sharded_storage.insert(key, (value.clone(), index));
+    fn complete(&self, key: K, value: V, index: DepNodeIndex) -> Self::Stored {
+        #[cfg(parallel_compiler)]
+        let mut lock = self.cache.get_shard_by_value(&key).lock();
+        #[cfg(not(parallel_compiler))]
+        let mut lock = self.cache.lock();
+        lock.insert(key, (value.clone(), index));
         value
     }
 
-    fn iter(
-        &self,
-        shards: &Sharded<Self::Sharded>,
-        f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex),
-    ) {
-        let shards = shards.lock_shards();
-        for shard in shards.iter() {
-            for (k, v) in shard.iter() {
+    fn iter(&self, f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex)) {
+        #[cfg(parallel_compiler)]
+        {
+            let shards = self.cache.lock_shards();
+            for shard in shards.iter() {
+                for (k, v) in shard.iter() {
+                    f(k, &v.0, v.1);
+                }
+            }
+        }
+        #[cfg(not(parallel_compiler))]
+        {
+            let map = self.cache.lock();
+            for (k, v) in map.iter() {
                 f(k, &v.0, v.1);
             }
         }
@@ -144,12 +142,15 @@ impl<'tcx, K: Eq + Hash, V: 'tcx> CacheSelector<K, V> for ArenaCacheSelector<'tc
 
 pub struct ArenaCache<'tcx, K, V> {
     arena: WorkerLocal<TypedArena<(V, DepNodeIndex)>>,
-    phantom: PhantomData<(K, &'tcx V)>,
+    #[cfg(parallel_compiler)]
+    cache: Sharded<FxHashMap<K, &'tcx (V, DepNodeIndex)>>,
+    #[cfg(not(parallel_compiler))]
+    cache: Lock<FxHashMap<K, &'tcx (V, DepNodeIndex)>>,
 }
 
 impl<'tcx, K, V> Default for ArenaCache<'tcx, K, V> {
     fn default() -> Self {
-        ArenaCache { arena: WorkerLocal::new(|_| TypedArena::default()), phantom: PhantomData }
+        ArenaCache { arena: WorkerLocal::new(|_| TypedArena::default()), cache: Default::default() }
     }
 }
 
@@ -171,51 +172,53 @@ where
     V: Debug,
 {
     type Key = K;
-    type Sharded = FxHashMap<K, &'tcx (V, DepNodeIndex)>;
 
     #[inline(always)]
-    fn lookup<'s, R, OnHit>(
-        &self,
-        state: &'s QueryCacheStore<Self>,
-        key: &K,
-        on_hit: OnHit,
-    ) -> Result<R, QueryLookup>
+    fn lookup<R, OnHit>(&self, key: &K, on_hit: OnHit) -> Result<R, ()>
     where
         OnHit: FnOnce(&&'tcx V, DepNodeIndex) -> R,
     {
-        let (lookup, lock) = state.get_lookup(key);
-        let result = lock.raw_entry().from_key_hashed_nocheck(lookup.key_hash, key);
+        let key_hash = sharded::make_hash(key);
+        #[cfg(parallel_compiler)]
+        let lock = self.cache.get_shard_by_hash(key_hash).lock();
+        #[cfg(not(parallel_compiler))]
+        let lock = self.cache.lock();
+        let result = lock.raw_entry().from_key_hashed_nocheck(key_hash, key);
 
         if let Some((_, value)) = result {
             let hit_result = on_hit(&&value.0, value.1);
             Ok(hit_result)
         } else {
-            Err(lookup)
+            Err(())
         }
     }
 
     #[inline]
-    fn complete(
-        &self,
-        lock_sharded_storage: &mut Self::Sharded,
-        key: K,
-        value: V,
-        index: DepNodeIndex,
-    ) -> Self::Stored {
+    fn complete(&self, key: K, value: V, index: DepNodeIndex) -> Self::Stored {
         let value = self.arena.alloc((value, index));
         let value = unsafe { &*(value as *const _) };
-        lock_sharded_storage.insert(key, value);
+        #[cfg(parallel_compiler)]
+        let mut lock = self.cache.get_shard_by_value(&key).lock();
+        #[cfg(not(parallel_compiler))]
+        let mut lock = self.cache.lock();
+        lock.insert(key, value);
         &value.0
     }
 
-    fn iter(
-        &self,
-        shards: &Sharded<Self::Sharded>,
-        f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex),
-    ) {
-        let shards = shards.lock_shards();
-        for shard in shards.iter() {
-            for (k, v) in shard.iter() {
+    fn iter(&self, f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex)) {
+        #[cfg(parallel_compiler)]
+        {
+            let shards = self.cache.lock_shards();
+            for shard in shards.iter() {
+                for (k, v) in shard.iter() {
+                    f(k, &v.0, v.1);
+                }
+            }
+        }
+        #[cfg(not(parallel_compiler))]
+        {
+            let map = self.cache.lock();
+            for (k, v) in map.iter() {
                 f(k, &v.0, v.1);
             }
         }