diff --git a/Cargo.lock b/Cargo.lock index b94b5110f6..2991463947 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2211,6 +2211,7 @@ name = "gitbutler-command-context" version = "0.0.0" dependencies = [ "anyhow", + "bstr", "git2", "gitbutler-project", "gix", @@ -2374,6 +2375,7 @@ dependencies = [ "anyhow", "git2", "gitbutler-branch", + "gitbutler-command-context", "gitbutler-diff", "gitbutler-fs", "gitbutler-project", diff --git a/crates/gitbutler-branch-actions/src/status.rs b/crates/gitbutler-branch-actions/src/status.rs index dc2f5ae287..65126359ea 100644 --- a/crates/gitbutler-branch-actions/src/status.rs +++ b/crates/gitbutler-branch-actions/src/status.rs @@ -47,16 +47,16 @@ pub fn get_applied_status_cached( worktree_changes: Option, ) -> Result { assure_open_workspace_mode(ctx).context("ng applied status requires open workspace mode")?; - // TODO(ST): this was `get_workspace_head()`, which is slow and ideally, we don't dynamically - // calculate which should already be 'fixed' - why do we have the integration branch - // if we can't assume it's in the right state? So ideally, we assure that the code - // that affects the integration branch also updates it? - let integration_commit_id = ctx.repository().head_commit()?.id(); let mut virtual_branches = ctx .project() .virtual_branches() .list_branches_in_workspace()?; let base_file_diffs = worktree_changes.map(Ok).unwrap_or_else(|| { + // TODO(ST): this was `get_workspace_head()`, which is slow and ideally, we don't dynamically + // calculate which should already be 'fixed' - why do we have the integration branch + // if we can't assume it's in the right state? So ideally, we assure that the code + // that affects the integration branch also updates it? + let integration_commit_id = ctx.repository().head_commit()?.id(); gitbutler_diff::workdir(ctx.repository(), &integration_commit_id.to_owned()) .context("failed to diff workdir") })?; diff --git a/crates/gitbutler-command-context/Cargo.toml b/crates/gitbutler-command-context/Cargo.toml index df826abf78..cd5d076203 100644 --- a/crates/gitbutler-command-context/Cargo.toml +++ b/crates/gitbutler-command-context/Cargo.toml @@ -12,3 +12,4 @@ gix.workspace = true tracing.workspace = true gitbutler-project.workspace = true itertools = "0.13" +bstr = "1.10.0" diff --git a/crates/gitbutler-command-context/src/lib.rs b/crates/gitbutler-command-context/src/lib.rs index ca8a45422f..c4dc09dc6e 100644 --- a/crates/gitbutler-command-context/src/lib.rs +++ b/crates/gitbutler-command-context/src/lib.rs @@ -98,3 +98,6 @@ impl CommandContext { )?) } } + +mod repository_ext; +pub use repository_ext::RepositoryExtLite; diff --git a/crates/gitbutler-command-context/src/repository_ext.rs b/crates/gitbutler-command-context/src/repository_ext.rs new file mode 100644 index 0000000000..58841ea898 --- /dev/null +++ b/crates/gitbutler-command-context/src/repository_ext.rs @@ -0,0 +1,50 @@ +use anyhow::{Context, Result}; +use gix::bstr::{BString, ByteVec}; +use tracing::instrument; + +/// An extension trait that should avoid pulling in large amounts of dependency so it can be used +/// in more places without causing cycles. +/// `gitbutler_repo::RepositoryExt` may not be usable everywhere due to that. +pub trait RepositoryExtLite { + /// Exclude files that are larger than `limit_in_bytes` (eg. database.sql which may never be intended to be committed) + /// so they don't show up in the next diff. + fn ignore_large_files_in_diffs(&self, limit_in_bytes: u64) -> Result<()>; +} + +impl RepositoryExtLite for git2::Repository { + #[instrument(level = tracing::Level::DEBUG, skip(self), err(Debug))] + fn ignore_large_files_in_diffs(&self, limit_in_bytes: u64) -> Result<()> { + use gix::bstr::ByteSlice; + let repo = gix::open(self.path())?; + let worktree_dir = repo + .work_dir() + .context("All repos are expected to have a worktree")?; + let files_to_exclude: Vec<_> = repo + .dirwalk_iter( + repo.index_or_empty()?, + None::, + Default::default(), + repo.dirwalk_options()? + .emit_ignored(None) + .emit_pruned(false) + .emit_untracked(gix::dir::walk::EmissionMode::Matching), + )? + .filter_map(Result::ok) + .filter_map(|item| { + let path = worktree_dir.join(gix::path::from_bstr(item.entry.rela_path.as_bstr())); + let file_is_too_large = path + .metadata() + .map_or(false, |md| md.is_file() && md.len() > limit_in_bytes); + file_is_too_large + .then(|| Vec::from(item.entry.rela_path).into_string().ok()) + .flatten() + }) + .collect(); + // TODO(ST): refactor this to be path-safe and ' ' save - the returned list is space separated (!!) + // Just make sure this isn't needed anymore. + let ignore_list = files_to_exclude.join(" "); + // In-memory, libgit2 internal ignore rule + self.add_ignore_rule(&ignore_list)?; + Ok(()) + } +} diff --git a/crates/gitbutler-diff/src/diff.rs b/crates/gitbutler-diff/src/diff.rs index 393f27c500..0315d8cdb0 100644 --- a/crates/gitbutler-diff/src/diff.rs +++ b/crates/gitbutler-diff/src/diff.rs @@ -3,6 +3,7 @@ use std::{borrow::Cow, collections::HashMap, path::PathBuf, str}; use anyhow::{Context, Result}; use bstr::{BStr, BString, ByteSlice, ByteVec}; use gitbutler_cherry_pick::RepositoryExt; +use gitbutler_command_context::RepositoryExtLite; use gitbutler_serde::BStringForFrontend; use serde::{Deserialize, Serialize}; use tracing::instrument; @@ -155,8 +156,8 @@ pub fn workdir(repo: &git2::Repository, commit_oid: &git2::Oid) -> Result Result { - let repo = gix::open(repo.path())?; - let files_to_exclude: Vec<_> = repo - .dirwalk_iter( - repo.index_or_empty()?, - None::, - Default::default(), - repo.dirwalk_options()? - .emit_ignored(None) - .emit_pruned(false) - .emit_untracked(gix::dir::walk::EmissionMode::Matching), - )? - .filter_map(Result::ok) - .filter_map(|item| { - let path = worktree_dir.join(gix::path::from_bstr(item.entry.rela_path.as_bstr())); - let file_is_too_large = path.metadata().map_or(false, |md| { - md.is_file() && md.len() > SNAPSHOT_FILE_LIMIT_BYTES - }); - file_is_too_large - .then(|| Vec::from(item.entry.rela_path).into_string().ok()) - .flatten() - }) - .collect(); - Ok(files_to_exclude.join(" ")) -} - /// Returns the number of lines of code (added + removed) since the last snapshot in `project`. /// Includes untracked files. /// `repo` is an already opened project repository. @@ -752,13 +712,7 @@ fn lines_since_snapshot(project: &Project, repo: &git2::Repository) -> Result