Skip to content

Commit

Permalink
feat(gc): record workspace manifest and target dir in global cache tr…
Browse files Browse the repository at this point in the history
…acker
  • Loading branch information
baby230211 committed May 2, 2024
1 parent bd1cf58 commit f2d5a7d
Show file tree
Hide file tree
Showing 2 changed files with 207 additions and 3 deletions.
200 changes: 198 additions & 2 deletions src/cargo/core/global_cache_tracker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,13 +131,15 @@ use std::time::{Duration, SystemTime};
use tracing::{debug, trace};

/// The filename of the database.
const GLOBAL_CACHE_FILENAME: &str = ".global-cache";
const GLOBAL_CACHE_FILENAME: &str = ".global-cache.sqlite";

const REGISTRY_INDEX_TABLE: &str = "registry_index";
const REGISTRY_CRATE_TABLE: &str = "registry_crate";
const REGISTRY_SRC_TABLE: &str = "registry_src";
const GIT_DB_TABLE: &str = "git_db";
const GIT_CO_TABLE: &str = "git_checkout";
const WORKSPACE_MANIFEST_TABLE: &str = "workspace_manifest_index";
const TARGET_DIR_TABLE: &str = "target_dir_index";

/// How often timestamps will be updated.
///
Expand Down Expand Up @@ -209,6 +211,27 @@ pub struct GitCheckout {
pub size: Option<u64>,
}

/// The key for a workspace manifest entry stored in the database.
#[derive(Clone, Debug, Hash, Eq, PartialEq)]
pub struct WorkspaceManifestIndex {
/// A unique name of the workspace manifest.
pub encoded_workspace_manifest_name: InternedString,
}

#[derive(Clone, Debug, Hash, Eq, PartialEq)]
pub struct TargetDirIndex {
/// A unique name of the target directory.
pub encoded_target_dir_name: InternedString,
}

/// The key for a workspace entry stored in the database.
#[derive(Clone, Debug, Hash, Eq, PartialEq)]
pub struct WorkspaceSrc {
pub encoded_workspace_manifest_name: InternedString,
pub encoded_target_dir_name: InternedString,

}

/// Filesystem paths in the global cache.
///
/// Accessing these assumes a lock has already been acquired.
Expand Down Expand Up @@ -303,6 +326,30 @@ fn migrations() -> Vec<Migration> {
)?;
Ok(())
}),
basic_migration(
"CREATE TABLE workspace_manifest_index (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT UNIQUE NOT NULL,
timestamp INTEGER NOT NULL
)",
),
basic_migration(
"CREATE TABLE target_dir_index (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT UNIQUE NOT NULL,
timestamp INTEGER NOT NULL
)",
),
basic_migration(
"CREATE TABLE workspace_src (
workspace_id INTEGER NOT NULL,
target_dir_id INTEGER NOT NULL,
timestamp INTEGER NOT NULL,
PRIMARY KEY (workspace_id, target_dir_id),
FOREIGN KEY (workspace_id) REFERENCES workspace_manifest_index (id) ON DELETE CASCADE,
FOREIGN KEY (target_dir_id) REFERENCES target_dir_index (id) ON DELETE CASCADE
)",
)
]
}

Expand Down Expand Up @@ -348,6 +395,7 @@ impl GlobalCacheTracker {
/// [`CacheLockMode::DownloadExclusive`] before calling this.
pub fn new(gctx: &GlobalContext) -> CargoResult<GlobalCacheTracker> {
let db_path = Self::db_path(gctx);
println!("db_path: {:?}", db_path);
// A package cache lock is required to ensure only one cargo is
// accessing at the same time. If there is concurrent access, we
// want to rely on cargo's own "Blocking" system (which can
Expand Down Expand Up @@ -1413,7 +1461,16 @@ pub struct DeferredGlobalLastUse {
/// The key is the git db name (which is its directory name) and the value
/// is the `id` in the `git_db` table.
git_keys: HashMap<InternedString, ParentId>,

/// Cache of workspace manifest keys, used for faster fetching.
///
/// The key is the workspace manifest path and the value
/// is the `id` in the `workspace_manifest` table.
workspace_manifest_keys: HashMap<InternedString, ParentId>,
/// Cache of target dir keys, used for faster fetching.
///
/// The key is the target dir path and the value
/// is the `id` in the `target_dir` table.
target_dir_keys: HashMap<InternedString, ParentId>,
/// New registry index entries to insert.
registry_index_timestamps: HashMap<RegistryIndex, Timestamp>,
/// New registry `.crate` entries to insert.
Expand All @@ -1424,6 +1481,12 @@ pub struct DeferredGlobalLastUse {
git_db_timestamps: HashMap<GitDb, Timestamp>,
/// New git checkout entries to insert.
git_checkout_timestamps: HashMap<GitCheckout, Timestamp>,
/// New workspace manifest entries to insert.
workspace_db_timestamps: HashMap<WorkspaceManifestIndex, Timestamp>,
/// New target dir entries to insert.
target_dir_db_timestamps: HashMap<TargetDirIndex, Timestamp>,
/// New workspace src entries to insert.
workspace_src_timestamps: HashMap<WorkspaceSrc, Timestamp>,
/// This is used so that a warning about failing to update the database is
/// only displayed once.
save_err_has_warned: bool,
Expand All @@ -1437,11 +1500,16 @@ impl DeferredGlobalLastUse {
DeferredGlobalLastUse {
registry_keys: HashMap::new(),
git_keys: HashMap::new(),
workspace_manifest_keys: HashMap::new(),
target_dir_keys: HashMap::new(),
registry_index_timestamps: HashMap::new(),
registry_crate_timestamps: HashMap::new(),
registry_src_timestamps: HashMap::new(),
git_db_timestamps: HashMap::new(),
git_checkout_timestamps: HashMap::new(),
target_dir_db_timestamps: HashMap::new(),
workspace_db_timestamps: HashMap::new(),
workspace_src_timestamps: HashMap::new(),
save_err_has_warned: false,
now: now(),
}
Expand All @@ -1453,6 +1521,9 @@ impl DeferredGlobalLastUse {
&& self.registry_src_timestamps.is_empty()
&& self.git_db_timestamps.is_empty()
&& self.git_checkout_timestamps.is_empty()
&& self.target_dir_db_timestamps.is_empty()
&& self.workspace_db_timestamps.is_empty()
&& self.workspace_src_timestamps.is_empty()
}

fn clear(&mut self) {
Expand All @@ -1461,6 +1532,9 @@ impl DeferredGlobalLastUse {
self.registry_src_timestamps.clear();
self.git_db_timestamps.clear();
self.git_checkout_timestamps.clear();
self.target_dir_db_timestamps.clear();
self.workspace_db_timestamps.clear();
self.workspace_src_timestamps.clear();
}

/// Indicates the given [`RegistryIndex`] has been used right now.
Expand Down Expand Up @@ -1489,6 +1563,13 @@ impl DeferredGlobalLastUse {
self.mark_git_checkout_used_stamp(git_checkout, None);
}

/// Indicates the given [`WorkspaceManifest`] has been used right now.
///
/// Also implicitly marks the workspace manifest used, too.
pub fn mark_workspace_src_used(&mut self, workspace_src: WorkspaceSrc) {
self.mark_workspace_src_used_stamp(workspace_src, None);
}

/// Indicates the given [`RegistryIndex`] has been used with the given
/// time (or "now" if `None`).
pub fn mark_registry_index_used_stamp(
Expand Down Expand Up @@ -1553,6 +1634,24 @@ impl DeferredGlobalLastUse {
self.git_checkout_timestamps.insert(git_checkout, timestamp);
}

pub fn mark_workspace_src_used_stamp(
&mut self,
workspace_src: WorkspaceSrc,
timestamp: Option<&SystemTime>,
) {
let timestamp = timestamp.map_or(self.now, to_timestamp);
let workspace_db = WorkspaceManifestIndex {
encoded_workspace_manifest_name: workspace_src.encoded_workspace_manifest_name,
};
let target_dir_db = TargetDirIndex {
encoded_target_dir_name: workspace_src.encoded_target_dir_name,
};
self.target_dir_db_timestamps.insert(target_dir_db, timestamp);
self.workspace_db_timestamps.insert(workspace_db, timestamp);
self.workspace_src_timestamps
.insert(workspace_src, timestamp);
}

/// Saves all of the deferred information to the database.
///
/// This will also clear the state of `self`.
Expand All @@ -1566,9 +1665,13 @@ impl DeferredGlobalLastUse {
// These must run before the ones that refer to their IDs.
self.insert_registry_index_from_cache(&tx)?;
self.insert_git_db_from_cache(&tx)?;
self.insert_target_dir_index_from_cache(&tx)?;
self.insert_workspace_manifest_index_from_cache(&tx)?;

self.insert_registry_crate_from_cache(&tx)?;
self.insert_registry_src_from_cache(&tx)?;
self.insert_git_checkout_from_cache(&tx)?;
self.insert_workspace_src_from_cache(&tx)?;
tx.commit()?;
trace!(target: "gc", "last-use save complete");
Ok(())
Expand Down Expand Up @@ -1632,6 +1735,32 @@ impl DeferredGlobalLastUse {
);
}

// Flushes all of the `target_dir_db_timestamps` to the database,
// clearing `target_dir_index_timestamps`.
fn insert_target_dir_index_from_cache(&mut self, conn: &Connection) -> CargoResult<()> {
insert_or_update_parent!(
self,
conn,
"target_dir_index",
target_dir_db_timestamps,
target_dir_keys,
encoded_target_dir_name
);
}

// Flushes all of the `workspace_db_timestamps` to the database,
// clearing `workspace_manifest_index_timestamps`.
fn insert_workspace_manifest_index_from_cache(&mut self, conn: &Connection) -> CargoResult<()> {
insert_or_update_parent!(
self,
conn,
"workspace_manifest_index",
workspace_db_timestamps,
workspace_manifest_keys,
encoded_workspace_manifest_name
);
}

/// Flushes all of the `registry_crate_timestamps` to the database,
/// clearing `registry_index_timestamps`.
fn insert_registry_crate_from_cache(&mut self, conn: &Connection) -> CargoResult<()> {
Expand Down Expand Up @@ -1707,6 +1836,73 @@ impl DeferredGlobalLastUse {
Ok(())
}

// Flushes all of the `workspace_src_timestamps` to the database,
// clearing `workspace_src_timestamps`.
fn insert_workspace_src_from_cache(&mut self, conn: &Connection) -> CargoResult<()> {
let workspace_src_timestamps = std::mem::take(&mut self.workspace_src_timestamps);
for (workspace_src, timestamp) in workspace_src_timestamps {
let workspace_id = self.workspace_id(conn, workspace_src.encoded_workspace_manifest_name)?;
let target_dir_id = self.target_dir_id(conn, workspace_src.encoded_target_dir_name)?;
let mut stmt = conn.prepare_cached(
"INSERT INTO workspace_src (workspace_id, target_dir_id, timestamp)
VALUES (?1, ?2, ?3)
ON CONFLICT DO UPDATE SET timestamp=excluded.timestamp
WHERE timestamp < ?4",
)?;
stmt.execute(params![
workspace_id,
target_dir_id,
timestamp,
timestamp - UPDATE_RESOLUTION
])?;
}
Ok(())
}

fn workspace_id(
&mut self,
conn: &Connection,
encoded_workspace_manifest_name: InternedString,
) -> CargoResult<ParentId> {
match self.workspace_manifest_keys.get(&encoded_workspace_manifest_name) {
Some(i) => Ok(*i),
None => {
let Some(id) = GlobalCacheTracker::id_from_name(
conn,
WORKSPACE_MANIFEST_TABLE,
&encoded_workspace_manifest_name,
)?
else {
bail!("expected workspace_manifest {encoded_workspace_manifest_name} to exist, but wasn't found");
};
self.workspace_manifest_keys.insert(encoded_workspace_manifest_name, id);
Ok(id)
}
}
}

fn target_dir_id(
&mut self,
conn: &Connection,
encoded_target_dir_name: InternedString,
) -> CargoResult<ParentId> {
match self.target_dir_keys.get(&encoded_target_dir_name) {
Some(i) => Ok(*i),
None => {
let Some(id) = GlobalCacheTracker::id_from_name(
conn,
TARGET_DIR_TABLE,
&encoded_target_dir_name,
)?
else {
bail!("expected target_dir {encoded_target_dir_name} to exist, but wasn't found");
};
self.target_dir_keys.insert(encoded_target_dir_name, id);
Ok(id)
}
}
}

/// Returns the numeric ID of the registry, either fetching from the local
/// cache, or getting it from the database.
///
Expand Down
10 changes: 9 additions & 1 deletion src/cargo/ops/cargo_compile/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ use crate::core::compiler::{DefaultExecutor, Executor, UnitInterner};
use crate::core::profiles::Profiles;
use crate::core::resolver::features::{self, CliFeatures, FeaturesFor};
use crate::core::resolver::{HasDevUnits, Resolve};
use crate::core::{PackageId, PackageSet, SourceId, TargetKind, Workspace};
use crate::core::{global_cache_tracker, PackageId, PackageSet, SourceId, TargetKind, Workspace};
use crate::drop_println;
use crate::ops;
use crate::ops::resolve::WorkspaceResolve;
Expand Down Expand Up @@ -264,6 +264,14 @@ pub fn create_bcx<'a, 'gctx>(
HasDevUnits::No
}
};
let _ = &gctx
.deferred_global_last_use()?
.mark_workspace_src_used(global_cache_tracker::WorkspaceSrc {
encoded_workspace_manifest_name: InternedString::new(
ws.root_manifest().to_str().unwrap(),
),
encoded_target_dir_name: InternedString::new(ws.target_dir().as_path_unlocked().to_str().unwrap()),
});
let resolve = ops::resolve_ws_with_opts(
ws,
&mut target_data,
Expand Down

0 comments on commit f2d5a7d

Please sign in to comment.