diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 42246e7e52f..afc1cea684a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -359,14 +359,6 @@ jobs: - name: gix-pack with all features (including wasm) run: cd gix-pack && cargo build --all-features --target "$TARGET" - check-mode: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - - name: Find scripts with mode/shebang mismatch - run: etc/check-mode.sh - check-packetline: strategy: fail-fast: false @@ -449,7 +441,6 @@ jobs: - test-32bit-cross - lint - cargo-deny - - check-mode - check-packetline - check-blocking diff --git a/Cargo.lock b/Cargo.lock index 7f226e80914..ffde5316554 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3357,6 +3357,8 @@ dependencies = [ "anyhow", "clap", "gix", + "once_cell", + "regex", ] [[package]] diff --git a/etc/check-mode.sh b/etc/check-mode.sh deleted file mode 100755 index 80d524c98f4..00000000000 --- a/etc/check-mode.sh +++ /dev/null @@ -1,47 +0,0 @@ -#!/usr/bin/env bash - -set -eu -o pipefail - -# Go to the worktree's root. (Even if the dir name ends in a newline.) -root_padded="$(git rev-parse --show-toplevel && echo -n .)" -root="${root_padded%$'\n.'}" -cd -- "$root" - -symbolic_shebang="$(printf '#!' | od -An -ta)" -status=0 - -function check_item () { - local mode="$1" oid="$2" path="$3" symbolic_magic - - # Extract the first two bytes (or less if shorter) and put in symbolic form. - symbolic_magic="$(git cat-file blob "$oid" | od -N2 -An -ta)" - - # Check for inconsistency between the mode and whether `#!` is present. - if [ "$mode" = 100644 ] && [ "$symbolic_magic" = "$symbolic_shebang" ]; then - printf 'mode -x but has shebang: %q\n' "$path" - elif [ "$mode" = 100755 ] && [ "$symbolic_magic" != "$symbolic_shebang" ]; then - printf 'mode +x but no shebang: %q\n' "$path" - else - return 0 - fi - - status=1 -} - -readonly record_pattern='^([0-7]+) ([[:xdigit:]]+) [[:digit:]]+'$'\t''(.+)$' - -# Check regular files named with a `.sh` suffix. -while IFS= read -rd '' record; do - [[ $record =~ $record_pattern ]] || exit 2 # bash 3.2 `set -e` doesn't cover this. - mode="${BASH_REMATCH[1]}" - oid="${BASH_REMATCH[2]}" - path="${BASH_REMATCH[3]}" - - case "$mode" in - 100644 | 100755) - check_item "$mode" "$oid" "$path" - ;; - esac -done < <(git ls-files -sz -- '*.sh') - -exit "$status" diff --git a/justfile b/justfile index 42b8cb0c449..f5a49d82641 100755 --- a/justfile +++ b/justfile @@ -10,10 +10,10 @@ alias c := check alias nt := nextest # run all tests, clippy, including journey tests, try building docs -test: clippy check doc unit-tests journey-tests-pure journey-tests-small journey-tests-async journey-tests +test: clippy check doc unit-tests journey-tests-pure journey-tests-small journey-tests-async journey-tests check-mode # run all tests, without clippy, and try building docs -ci-test: check doc unit-tests +ci-test: check doc unit-tests check-mode # run all journey tests - should be run in a fresh clone or after `cargo clean` ci-journey-tests: journey-tests-pure journey-tests-small journey-tests-async journey-tests @@ -196,6 +196,7 @@ target_dir := `cargo metadata --format-version 1 | jq -r .target_directory` ein := target_dir / "debug/ein" gix := target_dir / "debug/gix" jtt := target_dir / "debug/jtt" +it := target_dir / "debug/it" # run journey tests (max) journey-tests: @@ -257,7 +258,8 @@ find-yanked: # Find shell scripts whose +x/-x bits and magic bytes (e.g. `#!`) disagree check-mode: - ./etc/check-mode.sh + cargo build -p internal-tools + "{{ it }}" check-mode # Delete gix-packetline-blocking/src and regenerate from gix-packetline/src copy-packetline: diff --git a/tests/it/Cargo.toml b/tests/it/Cargo.toml index aad0dfbbf82..1432f77e4ae 100644 --- a/tests/it/Cargo.toml +++ b/tests/it/Cargo.toml @@ -14,7 +14,8 @@ name = "it" path = "src/main.rs" [dependencies] -clap = { version = "4.5.16", features = ["derive"] } anyhow = "1.0.86" - +clap = { version = "4.5.16", features = ["derive"] } gix = { version = "^0.68.0", path = "../../gix", default-features = false, features = ["attributes", "revision"] } +once_cell = "1.20.2" +regex = { version = "1.11.1", default-features = false, features = ["std"] } diff --git a/tests/it/src/args.rs b/tests/it/src/args.rs index 8123b9d36df..e0ccc820ae6 100644 --- a/tests/it/src/args.rs +++ b/tests/it/src/args.rs @@ -62,6 +62,17 @@ pub enum Subcommands { #[clap(value_parser = AsPathSpec)] patterns: Vec, }, + /// Check for executable bits that disagree with shebangs. + /// + /// This checks committed and staged files, but not anything unstaged, to find shell scripts + /// that either begin with a `#!` but not `+x` permissions, or do not begin with `#!` but do + /// have `+x` permissions. Such mismatches are reported but not automatically corrected. Some + /// platforms (at least Windows) do not have such permissions, but Git still represents them. + /// + /// This currently only checks files name with an `.sh` suffix, and only operates on the + /// current repository. Its main use is checking that fixture scripts are have correct modes. + #[clap(visible_alias = "cm")] + CheckMode {}, } #[derive(Clone)] diff --git a/tests/it/src/commands/check_mode.rs b/tests/it/src/commands/check_mode.rs new file mode 100644 index 00000000000..35f95d78cad --- /dev/null +++ b/tests/it/src/commands/check_mode.rs @@ -0,0 +1,116 @@ +pub(super) mod function { + use anyhow::{bail, Context}; + use gix::bstr::ByteSlice; + use once_cell::sync::Lazy; + use regex::bytes::Regex; + use std::ffi::{OsStr, OsString}; + use std::io::{BufRead, BufReader, Read}; + use std::process::{Command, Stdio}; + + pub fn check_mode() -> anyhow::Result<()> { + let root = find_root()?; + let mut any_mismatch = false; + + let mut child = git_on(&root) + .args(["ls-files", "-sz", "--", "*.sh"]) + .stdout(Stdio::piped()) + .spawn() + .context("Can't start `git` subprocess to list index")?; + + let stdout = child.stdout.take().expect("should have captured stdout"); + for result in BufReader::new(stdout).split(0) { + let record = result.context(r"Can't read '\0'-terminated record")?; + any_mismatch |= check_for_mismatch(&root, &record)?; + } + + let status = child.wait().context("Failure running `git` subprocess to list index")?; + if !status.success() { + bail!("`git` subprocess to list index did not complete successfully"); + } + if any_mismatch { + bail!("Mismatch found - scan completed, finding at least one `#!` vs. `+x` mismatch"); + } + Ok(()) + } + + /// Find the top-level directory of the current repository working tree. + fn find_root() -> anyhow::Result { + let output = Command::new(gix::path::env::exe_invocation()) + .args(["rev-parse", "--show-toplevel"]) + .output() + .context("Can't run `git` to find worktree root")?; + + if !output.status.success() { + bail!("`git` failed to find worktree root"); + } + + let root = output + .stdout + .strip_suffix(b"\n") + .context("Can't parse worktree root")? + .to_os_str()? + .to_owned(); + Ok(root) + } + + /// Prepare a `git` command, passing `root` as an operand to `-C`. + /// + /// This is suitable when `git` gave us the path `root`. Then it should already be in a form + /// where `git -C` will be able to use it, without alteration, regardless of the platform. + /// (Otherwise, it may be preferable to set `root` as the `cwd` of the `git` process instead.) + fn git_on(root: &OsStr) -> Command { + let mut cmd = Command::new(gix::path::env::exe_invocation()); + cmd.arg("-C").arg(root); + cmd + } + + /// On mismatch, report it and return `Some(true)`. + fn check_for_mismatch(root: &OsStr, record: &[u8]) -> anyhow::Result { + static RECORD_REGEX: Lazy = Lazy::new(|| { + let pattern = r"(?-u)\A([0-7]+) ([[:xdigit:]]+) [[:digit:]]+\t(.+)\z"; + Regex::new(pattern).expect("regex should be valid") + }); + + let fields = RECORD_REGEX.captures(record).context("Malformed record from `git`")?; + let mode = fields.get(1).expect("match should get mode").as_bytes(); + let oid = fields + .get(2) + .expect("match should get oid") + .as_bytes() + .to_os_str() + .expect("oid field verified as hex digits, should be valid OsStr"); + let path = fields.get(3).expect("match should get path").as_bytes().as_bstr(); + + match mode { + b"100644" if blob_has_shebang(root, oid)? => { + println!("mode -x but has shebang: {path:?}"); + Ok(true) + } + b"100755" if !blob_has_shebang(root, oid)? => { + println!("mode +x but no shebang: {path:?}"); + Ok(true) + } + _ => Ok(false), + } + } + + fn blob_has_shebang(root: &OsStr, oid: &OsStr) -> anyhow::Result { + let mut buf = [0u8; 2]; + + let mut child = git_on(root) + .args(["cat-file", "blob"]) + .arg(oid) + .stdout(Stdio::piped()) + .spawn() + .context("Can't start `git` subprocess to read blob")?; + + let mut stdout = child.stdout.take().expect("should have captured stdout"); + let count = stdout.read(&mut buf).context("Error reading data from blob")?; + drop(stdout); // Let the pipe break rather than waiting for the rest of the blob. + + // TODO: Maybe check status? On Unix, it should be 0 or SIGPIPE. Not sure about Windows. + _ = child.wait().context("Failure running `git` subprocess to read blob")?; + + Ok(&buf[..count] == b"#!") + } +} diff --git a/tests/it/src/commands/mod.rs b/tests/it/src/commands/mod.rs index 81158eaad7f..adc7a6decb2 100644 --- a/tests/it/src/commands/mod.rs +++ b/tests/it/src/commands/mod.rs @@ -3,3 +3,6 @@ pub use copy_royal::function::copy_royal; pub mod git_to_sh; pub use git_to_sh::function::git_to_sh; + +pub mod check_mode; +pub use check_mode::function::check_mode; diff --git a/tests/it/src/main.rs b/tests/it/src/main.rs index fad4190cf2c..847978481df 100644 --- a/tests/it/src/main.rs +++ b/tests/it/src/main.rs @@ -31,6 +31,7 @@ fn main() -> anyhow::Result<()> { destination_dir, patterns, } => commands::copy_royal(dry_run, &worktree_root, destination_dir, patterns), + Subcommands::CheckMode {} => commands::check_mode(), } }