diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c405f757..a4ad318e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -43,4 +43,8 @@ jobs: - run: nix flake check --all-systems --print-build-logs - - run: nix build --print-build-logs .#probe-bundled .#probe-py + - run: nix build --print-build-logs .#probe-bundled + + # The devshell uses slightly different build process than the Nix pkg + # Might as well test that too + - run: nix develop --command just compile fix check test-native diff --git a/.gitignore b/.gitignore index c5d4cd06..3d633267 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,7 @@ **/.nextflow **/work **/.pytest_cache +**/__pycache__/ # build directories **/target @@ -21,4 +22,4 @@ **/desktop.ini probe_log -dataflow_graph.pkl +.dmypy.json diff --git a/Justfile b/Justfile index ffee3e47..3195e535 100644 --- a/Justfile +++ b/Justfile @@ -1,34 +1,51 @@ -fix-format-nix: +fix-nix: alejandra . -fix-ruff: - #ruff format probe_src # TODO: uncomment - ruff check --fix probe_src +fix-py: compile-cli + # fix-py depends on compile-cli for the autogen python code + #ruff format probe_py/ tests/ libprobe/generator/ # TODO: uncomment + ruff check --fix probe_py/ tests/ libprobe/generator/ -fix-format-rust: - env --chdir probe_src/frontend cargo fmt +fix-cli: + # cargo clippy refuses to run if unstaged inputs (fixes may be destructive) + # so we git add -A + env --chdir cli-wrapper git add -A + env --chdir cli-wrapper cargo clippy --fix --allow-staged -- --deny warnings + env --chdir cli-wrapper cargo fmt -fix-clippy: - git add -A - env --chdir probe_src/frontend cargo clippy --fix --allow-staged +fix: fix-nix fix-py fix-cli -check-mypy: - mypy --strict probe_src/libprobe - mypy --strict --package probe_py.generated - mypy --strict --package probe_py.manual +check-py: compile-cli + # dmypy == daemon mypy; much faster. + dmypy run -- --strict --no-namespace-packages --pretty probe_py/ tests/ libprobe/generator/ + +check-cli: + env --chdir cli-wrapper cargo doc --workspace + +check: check-py check-cli compile-lib: - make --directory=probe_src/libprobe all + make --directory=libprobe all compile-cli: - env --chdir=probe_src/frontend cargo build --release + env --chdir=cli-wrapper cargo build --release + env --chdir=cli-wrapper cargo build compile-tests: - make --directory=probe_src/tests/c all + make --directory=tests/examples all compile: compile-lib compile-cli compile-tests -test-dev: compile - pytest probe_src --failed-first --maxfail=1 +test-nix: + nix build .#probe-bundled + nix flake check --all-systems + +test-native: compile + python -m pytest tests/ -ra --failed-first --maxfail=1 -v + +test: test-native +# Unless you the user explicitly asks (`just test-nix`), +# we don't really need to test-nix. +# It runs the same checks as `just test` and `just check`, but in Nix. -pre-commit: fix-format-nix fix-ruff fix-format-rust fix-clippy compile check-mypy test-dev +pre-commit: fix check compile test diff --git a/Makefile b/Makefile deleted file mode 100644 index 91e90242..00000000 --- a/Makefile +++ /dev/null @@ -1,25 +0,0 @@ -all: - mkdir -p experiments - - # Process 14194128: bash -c cat flake.nix > test0; head test0 >tmp ; wc -l test0; head test0 >tmp ; wc -l process_14194128/output.log 2>&1 - # Process 14194128: cat flake.nix - mkdir -p process_14194128 - # Copy input files for process 14194128 - cp flake.nix_v0 process_14194128/ - # Run command for process 14194128 - (cd process_14194128 && cat flake.nix) - # Process 14194128: head test0 - mkdir -p process_14194128 - # Copy input files for process 14194128 - cp test0_v0 process_14194128/ - # Run command for process 14194128 - (cd process_14194128 && head test0) - # Process 14194128: wc -l - mkdir -p process_14194128 - # Copy input files for process 14194128 - cp tmp_v0 process_14194128/ - # Run command for process 14194128 - (wc -l) > process_14194128/output.log 2>&1 \ No newline at end of file diff --git a/README.md b/README.md index 5ab5439f..6354a3d9 100644 --- a/README.md +++ b/README.md @@ -109,30 +109,28 @@ probe export --help 7. **Before submitting a PR**, run `just pre-commit` which will run pre-commit checks. -## Resarch reading list - -- [_Provenance for Computational Tasks: A Survey_ by Freire, et al. in CiSE 2008](https://sci.utah.edu/~csilva/papers/cise2008a.pdf) for an overview of provenance in general. - -- [_Transparent Result Caching_ by Vahdat and Anderson in USENIX ATC 1998](https://www.usenix.org/legacy/publications/library/proceedings/usenix98/full_papers/vahdat/vahdat.pdf) for an early system-level provenance tracer in Solaris using the `/proc` fs. Linux's `/proc` fs doesn't have the same functionality. However, this paper discusses two interesting application of provenance: unmake (query lineage information) and transparent Make (more generally, incremental computation). - -- [_CDE: Using System Call Interposition to Automatically Create Portable Software Packages_ by Guo and Engler in USENIX ATC 2011](https://www.usenix.org/legacy/events/atc11/tech/final_files/GuoEngler.pdf) for an early system-level provenance tracer. Their only application is software execution replay, but replay is quite an important application. - -- [_Techniques for Preserving Scientific Software Executions: Preserve the Mess or Encourage Cleanliness?_ by Thain, Meng, and Ivie in 2015 ](https://curate.nd.edu/articles/journal_contribution/Techniques_for_Preserving_Scientific_Software_Executions_Preserve_the_Mess_or_Encourage_Cleanliness_/24824439?file=43664937) discusses whether enabling automatic-replay is actually a good idea. A cursory glance makes PROBE seem more like "preserving the mess", but I think, with some care in the design choices, it actually can be more like "encouraging cleanliness", for example, by having heuristics that help cull/simplify provenance and generating human readable/editable package-manager recipes. - -- [_SoK: History is a Vast Early Warning System: Auditing the Provenance of System Intrusions_ by Inam et al. in IEEE Symposium on Security and Privacy 2023](https://adambates.org/documents/Inam_Oakland23.pdf) see specifically Inam's survey of different possibilities for the "Capture layer", "Reduction layer", and "Infrastructure layer". Although provenance-for-security has different constraints than provenacne for other purposes, the taxonomy that Inam lays out is still useful. PROBE operates by intercepting libc calls, which is essentially a "middleware" in Table I (platform modification, no program modification, no config change, incomplete mediation, not tamperproof, inter-process tracing, etc.). - -- [_System-Level Provenance Tracers_ by me et al. in ACM REP 2023](./docs/acm-rep-pres.pdf) for a motivation of this work. It surveys prior work, identifies potential gaps, and explains why I think library interposition is a promising path for future research. - -- [_Computational Experiment Comprehension using Provenance Summarization_ by Bufford et al. in ACM REP 2023](https://dl.acm.org/doi/pdf/10.1145/3641525.3663617) discusses how to implement an interface for querying provenance information. They compare classical graph-based visualization with an interactive LLM in a user-study. - -## Prior art - -- [RR-debugger](https://github.com/rr-debugger/rr) which is much slower, but features more complete capturing, lets you replay but doesn't let you do any other analysis. - -- [Sciunits](https://github.com/depaul-dice/sciunit) which is much slower, more likely to crash, has less complete capturing, lets you replay but doesn't let you do other analysis. - -- [Reprozip](https://www.reprozip.org/) which is much slower and has less complete capturing. - -- [CARE](https://proot-me.github.io/care/) which is much slower, has less complete capturing, and lets you do containerized replay but not unpriveleged native replay and not other analysis. - -- [FSAtrace](https://github.com/jacereda/fsatrace) which is more likely to crash, has less complete capturing, and doesn't have replay or other analyses. +## Directory structure + +- `libprobe`: Library that implements interposition (C, Make, Python; happens to be manual and code-gen). + - `libprobe/include`: Headers that will be used by the Rust wrapper to read PROBE data. + - `libprobe/src`: Main C sources of `libprobe`. + - `libprobe/generator`: Python and C-template code-generator. + - `libprobe/generated`: (Generated, not committed to Git) output of code-generation. + - `libprobe/Makefile`: Makefile that runs all of `libprobe`; run `just compile-cli` to invoke. +- `cli-wrapper`: (Cargo workspace) code that wraps libprobe. + - `cli-wrapper/cli`: (Cargo crate) main CLI. + - `cli-wrapper/lib`: (Cargo crate) supporting library functions. + - `cli-wrapper/macros`: (Cargo crate) supporting macros; they use structs from `libprobe/include` to create Rust structs and Python dataclasses. + - `cli-wrapper/frontend.nix`: Nix code that builds the Cargo workspace; Gets included in `flake.nix`. +- `probe_py`: Python Code that implements analysis of PROBE data (happens to be manual and code-gen), should be added to `$PYTHONPATH` by `nix develop` + - `probe_py/probe_py`: Main package to be imported or run. + - `probe_py/pyproject.toml`: Definition of main package and dependencies. + - `probe_py/tests`: Python unittests, i.e., `from probe_py import foobar; test_foobar()`; Run `just test-py`. + - `probe_py/mypy_stubs`: "Stub" files that tell Mypy how to check untyped library code. Should be added to `$MYPYPATH` by `nix develop`. +- `tests`: End-to-end opaque-box tests. They will be run with Pytest, but they will not test Python directly; they should always `subprocess.run(["probe", ...])`. Additionally, some tests have to be manually invoked. +- `docs`: Documentation and papers. +- `benchmark`: Programs and infrastructure for benchmarking. + - `benchmark/REPRODUCING.md`: Read this first! +- `flake.nix`: Nix code that defines packages and the devshell. +- `setup_devshell.sh`: Helps instantiate Nix devshell. +- `Justfile`: "Shortcuts" for defining and running common commands (e.g., `just --list`). diff --git a/probe_src/benchmark_results.csv b/benchmark/PROBE_small_bench.csv similarity index 100% rename from probe_src/benchmark_results.csv rename to benchmark/PROBE_small_bench.csv diff --git a/probe_src/frontend/Cargo.lock b/cli-wrapper/Cargo.lock similarity index 99% rename from probe_src/frontend/Cargo.lock rename to cli-wrapper/Cargo.lock index 04d1a6d1..b6fc69b4 100644 --- a/probe_src/frontend/Cargo.lock +++ b/cli-wrapper/Cargo.lock @@ -773,7 +773,7 @@ dependencies = [ "flate2", "libc", "log", - "probe_frontend", + "probe_lib", "rand", "serde", "serde_json", @@ -781,7 +781,7 @@ dependencies = [ ] [[package]] -name = "probe_frontend" +name = "probe_lib" version = "0.2.0" dependencies = [ "bindgen", diff --git a/probe_src/frontend/Cargo.toml b/cli-wrapper/Cargo.toml similarity index 94% rename from probe_src/frontend/Cargo.toml rename to cli-wrapper/Cargo.toml index 5b25b713..2c9072fc 100644 --- a/probe_src/frontend/Cargo.toml +++ b/cli-wrapper/Cargo.toml @@ -1,8 +1,8 @@ [workspace] resolver = "2" -members = [ +members = [ "cli", - "lib", + "lib", "macros", ] diff --git a/probe_src/frontend/LICENSE b/cli-wrapper/LICENSE similarity index 100% rename from probe_src/frontend/LICENSE rename to cli-wrapper/LICENSE diff --git a/probe_src/frontend/README.md b/cli-wrapper/README.md similarity index 100% rename from probe_src/frontend/README.md rename to cli-wrapper/README.md diff --git a/probe_src/frontend/cli/Cargo.toml b/cli-wrapper/cli/Cargo.toml similarity index 92% rename from probe_src/frontend/cli/Cargo.toml rename to cli-wrapper/cli/Cargo.toml index 4c1ebdc8..968d2dfd 100644 --- a/probe_src/frontend/cli/Cargo.toml +++ b/cli-wrapper/cli/Cargo.toml @@ -19,7 +19,7 @@ exec = "0.3.1" flate2 = "1.0.30" libc = "0.2.155" log = "0.4.21" -probe_frontend = { path = "../lib" } +probe_lib = { path = "../lib" } rand = "0.8.5" serde = "1.0.203" serde_json = "1.0.118" diff --git a/probe_src/frontend/cli/src/dump.rs b/cli-wrapper/cli/src/dump.rs similarity index 99% rename from probe_src/frontend/cli/src/dump.rs rename to cli-wrapper/cli/src/dump.rs index 92ae7861..d175df30 100644 --- a/probe_src/frontend/cli/src/dump.rs +++ b/cli-wrapper/cli/src/dump.rs @@ -7,7 +7,7 @@ use std::{ use chrono::{DateTime, SecondsFormat}; use color_eyre::eyre::{eyre, Result, WrapErr}; -use probe_frontend::ops; +use probe_lib::ops; use serde::{Deserialize, Serialize}; /// Print the ops from a probe log out for humans. diff --git a/probe_src/frontend/cli/src/main.rs b/cli-wrapper/cli/src/main.rs similarity index 98% rename from probe_src/frontend/cli/src/main.rs rename to cli-wrapper/cli/src/main.rs index 7466856f..b265ae72 100644 --- a/probe_src/frontend/cli/src/main.rs +++ b/cli-wrapper/cli/src/main.rs @@ -10,7 +10,7 @@ mod dump; /// Run commands under provenance and generate probe record directory. mod record; -/// Wrapper over [`probe_frontend::transcribe`]. +/// Wrapper over [`probe_lib::transcribe`]. mod transcribe; /// Utility code for creating temporary directories. @@ -137,7 +137,7 @@ fn main() -> Result<()> { let exit = std::process::Command::new("python3") .arg("-m") - .arg("probe_py.manual.cli") + .arg("probe_py.cli") .arg(subcommand) .args(&args) .spawn() diff --git a/probe_src/frontend/cli/src/record.rs b/cli-wrapper/cli/src/record.rs similarity index 100% rename from probe_src/frontend/cli/src/record.rs rename to cli-wrapper/cli/src/record.rs diff --git a/probe_src/frontend/cli/src/transcribe.rs b/cli-wrapper/cli/src/transcribe.rs similarity index 88% rename from probe_src/frontend/cli/src/transcribe.rs rename to cli-wrapper/cli/src/transcribe.rs index 799df9b1..d3364060 100644 --- a/probe_src/frontend/cli/src/transcribe.rs +++ b/cli-wrapper/cli/src/transcribe.rs @@ -10,7 +10,7 @@ pub fn transcribe, T: Write>( ) -> Result<()> { let log_dir = Dir::temp(true).wrap_err("Failed to create temp directory for transcription")?; - probe_frontend::transcribe::parse_top_level(record_dir, &log_dir) + probe_lib::transcribe::parse_top_level(record_dir, &log_dir) .wrap_err("Failed to transcribe record directory")?; tar.append_dir_all(".", &log_dir) diff --git a/probe_src/frontend/cli/src/util.rs b/cli-wrapper/cli/src/util.rs similarity index 100% rename from probe_src/frontend/cli/src/util.rs rename to cli-wrapper/cli/src/util.rs diff --git a/probe_src/frontend/deny.toml b/cli-wrapper/deny.toml similarity index 100% rename from probe_src/frontend/deny.toml rename to cli-wrapper/deny.toml diff --git a/cli-wrapper/frontend.nix b/cli-wrapper/frontend.nix new file mode 100644 index 00000000..8335eaa1 --- /dev/null +++ b/cli-wrapper/frontend.nix @@ -0,0 +1,136 @@ +{ + pkgs, + craneLib, + rust-target, + advisory-db, + system, + python, + lib, +}: rec { + # See https://crane.dev/examples/quick-start-workspace.html + + src = craneLib.cleanCargoSource ./.; + + # Common arguments can be set here to avoid repeating them later + commonArgs = { + inherit src; + strictDeps = true; + + # all the crates in this workspace either use rust-bindgen or depend + # on local crate that does. + nativeBuildInputs = [ + pkgs.rustPlatform.bindgenHook + ]; + + CARGO_BUILD_TARGET = rust-target; + CARGO_BUILD_RUSTFLAGS = "-C target-feature=+crt-static"; + CPATH = ../libprobe/include; + + # pygen needs to know where to write the python file + preConfigurePhases = [ + "pygenConfigPhase" + ]; + pygenConfigPhase = '' + export PYGEN_OUTFILE="$out/resources/ops.py" + mkdir --parents "$(dirname "$PYGEN_OUTFILE")" + echo "Sending python code to $PYGEN_OUTFILE" + ''; + }; + + # Build *just* the cargo dependencies (of the entire workspace), + # so we can reuse all of that work (e.g. via cachix) when running in CI + # It is *highly* recommended to use something like cargo-hakari to avoid + # cache misses when building individual top-level-crates + cargoArtifacts = craneLib.buildDepsOnly commonArgs; + + individualCrateArgs = + commonArgs + // { + inherit cargoArtifacts; + inherit (craneLib.crateNameFromCargoToml {inherit src;}) version; + # disable tests since we'll run them all via cargo-nextest + doCheck = false; + }; + + fileSetForCrate = crates: + lib.fileset.toSource { + root = ./.; + fileset = lib.fileset.unions ([ + ./Cargo.toml + ./Cargo.lock + ] + ++ (builtins.map craneLib.fileset.commonCargoSources crates)); + }; + + packages = rec { + inherit cargoArtifacts; + + # Prior to this version, the old code had one derivatino per crate (probe-cli, probe-lib, and probe-macros). + # What could go wrong? + # Since the old version used `src = ./.`, it would rebuild all three if any one changed. + + # craneLib's workspace example [1] says to use `src = fileSetForCrate ./path/to/crate`. + # However, when I tried doing that, it would say "failed to load manifest for workspace member lib" because "failed to read macros/Cargo.toml". + # Because `lib/Cargo.toml` has a dependency on `{path = "../macros"}`, + # I think the source code of both crates have to be present at build-time of lib. + # Which means no source filtering is possible. + # Indeed the exposed packages in craneLib's example (my-cli and my-server) [1] do not depend on each other. + # They depend on my-common, which is *not* filtered out (*is* included) in the `src` for those crates. + # If it's possible to simultaneously: + # - expose two Cargo crates A and B + # - where A depends on B + # - when A changes only A needs to be rebuilt + # then I don't know how to do it. + # Therefore, I will only offer one crate as a Nix package. + # + # https://crane.dev/examples/quick-start-workspace.html + + probe-cli = craneLib.buildPackage (individualCrateArgs + // { + pname = "probe-cli"; + cargoExtraArgs = "-p probe_cli"; + src = fileSetForCrate [ + ./cli + ./lib + ./macros + ]; + }); + }; + checks = { + probe-workspace-clippy = craneLib.cargoClippy (commonArgs + // { + inherit (packages) cargoArtifacts; + cargoClippyExtraArgs = "--all-targets -- --deny warnings"; + }); + + probe-workspace-doc = craneLib.cargoDoc (commonArgs + // { + inherit (packages) cargoArtifacts; + }); + + # Check formatting + probe-workspace-fmt = craneLib.cargoFmt { + inherit src; + }; + + # Audit dependencies + probe-workspace-audit = craneLib.cargoAudit { + inherit src advisory-db; + }; + + # Audit licenses + probe-workspace-deny = craneLib.cargoDeny { + inherit src; + }; + + # Run tests with cargo-nextest + # this is why `doCheck = false` on the crate derivations, so as to not + # run the tests twice. + probe-workspace-nextest = craneLib.cargoNextest (commonArgs + // { + inherit (packages) cargoArtifacts; + partitions = 1; + partitionType = "count"; + }); + }; +} diff --git a/probe_src/frontend/lib/Cargo.toml b/cli-wrapper/lib/Cargo.toml similarity index 92% rename from probe_src/frontend/lib/Cargo.toml rename to cli-wrapper/lib/Cargo.toml index 90b871e2..692cf5d9 100644 --- a/probe_src/frontend/lib/Cargo.toml +++ b/cli-wrapper/lib/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "probe_frontend" +name = "probe_lib" version.workspace = true license.workspace = true authors.workspace = true @@ -10,7 +10,7 @@ edition.workspace = true # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [lib] -name = "probe_frontend" +name = "probe_lib" path = "src/lib.rs" [dependencies] diff --git a/probe_src/frontend/lib/build.rs b/cli-wrapper/lib/build.rs similarity index 100% rename from probe_src/frontend/lib/build.rs rename to cli-wrapper/lib/build.rs diff --git a/probe_src/frontend/lib/src/error.rs b/cli-wrapper/lib/src/error.rs similarity index 100% rename from probe_src/frontend/lib/src/error.rs rename to cli-wrapper/lib/src/error.rs diff --git a/probe_src/frontend/lib/src/lib.rs b/cli-wrapper/lib/src/lib.rs similarity index 100% rename from probe_src/frontend/lib/src/lib.rs rename to cli-wrapper/lib/src/lib.rs diff --git a/probe_src/frontend/lib/src/metadata.rs b/cli-wrapper/lib/src/metadata.rs similarity index 100% rename from probe_src/frontend/lib/src/metadata.rs rename to cli-wrapper/lib/src/metadata.rs diff --git a/probe_src/frontend/lib/src/ops.rs b/cli-wrapper/lib/src/ops.rs similarity index 100% rename from probe_src/frontend/lib/src/ops.rs rename to cli-wrapper/lib/src/ops.rs diff --git a/probe_src/frontend/lib/src/transcribe.rs b/cli-wrapper/lib/src/transcribe.rs similarity index 100% rename from probe_src/frontend/lib/src/transcribe.rs rename to cli-wrapper/lib/src/transcribe.rs diff --git a/probe_src/frontend/macros/Cargo.toml b/cli-wrapper/macros/Cargo.toml similarity index 100% rename from probe_src/frontend/macros/Cargo.toml rename to cli-wrapper/macros/Cargo.toml diff --git a/probe_src/frontend/macros/src/lib.rs b/cli-wrapper/macros/src/lib.rs similarity index 100% rename from probe_src/frontend/macros/src/lib.rs rename to cli-wrapper/macros/src/lib.rs diff --git a/probe_src/frontend/macros/src/pygen.rs b/cli-wrapper/macros/src/pygen.rs similarity index 100% rename from probe_src/frontend/macros/src/pygen.rs rename to cli-wrapper/macros/src/pygen.rs diff --git a/probe_src/frontend/python/pyproject.toml b/cli-wrapper/pyproject.toml similarity index 100% rename from probe_src/frontend/python/pyproject.toml rename to cli-wrapper/pyproject.toml diff --git a/probe_src/README.md b/docs/developers_readme.md similarity index 97% rename from probe_src/README.md rename to docs/developers_readme.md index fd0c9432..44913ee5 100644 --- a/probe_src/README.md +++ b/docs/developers_readme.md @@ -77,3 +77,9 @@ I don't like replaying libcalls by intercepting and copy/pasting contents of the The entire close of `temp_file` in PID 101 precedes waitpid 101 in fork-join order, waitpid 101 precedes fork 102 in program order, and fork 102 precedess the open of `temp_file` in PID 102, so we can conclude that information may flow from `foo` to `bar`. From which, we can deduce the provenance graph `input_file` -> `foo` -> `temp_file` -> `bar` -> `output_file` with only the open/close intervals and happens-before order. + +# Python package + +probe_py is a package that implements experimental, non-core CLI functions of PROBE and Python library functionality of PROBE. + +Required reading: diff --git a/probe_src/notes.md b/docs/notes.md similarity index 100% rename from probe_src/notes.md rename to docs/notes.md diff --git a/tasks.org b/docs/old_tasks.org similarity index 100% rename from tasks.org rename to docs/old_tasks.org diff --git a/docs/.gitignore b/docs/publications/.gitignore similarity index 100% rename from docs/.gitignore rename to docs/publications/.gitignore diff --git a/docs/acm-rep-pres.html b/docs/publications/acm-rep-pres.html similarity index 100% rename from docs/acm-rep-pres.html rename to docs/publications/acm-rep-pres.html diff --git a/docs/acm-rep-pres.pdf b/docs/publications/acm-rep-pres.pdf similarity index 100% rename from docs/acm-rep-pres.pdf rename to docs/publications/acm-rep-pres.pdf diff --git a/docs/acm-template.tex b/docs/publications/acm-template.tex similarity index 100% rename from docs/acm-template.tex rename to docs/publications/acm-template.tex diff --git a/docs/benchmark_suite/README.md b/docs/publications/benchmark_suite/README.md similarity index 100% rename from docs/benchmark_suite/README.md rename to docs/publications/benchmark_suite/README.md diff --git a/docs/benchmark_suite/README.pdf b/docs/publications/benchmark_suite/README.pdf similarity index 100% rename from docs/benchmark_suite/README.pdf rename to docs/publications/benchmark_suite/README.pdf diff --git a/docs/benchmark_suite/app-lvl-prov.dot b/docs/publications/benchmark_suite/app-lvl-prov.dot similarity index 100% rename from docs/benchmark_suite/app-lvl-prov.dot rename to docs/publications/benchmark_suite/app-lvl-prov.dot diff --git a/docs/benchmark_suite/generated/clustering2.svg b/docs/publications/benchmark_suite/generated/clustering2.svg similarity index 100% rename from docs/benchmark_suite/generated/clustering2.svg rename to docs/publications/benchmark_suite/generated/clustering2.svg diff --git a/docs/benchmark_suite/generated/dendrogram.pdf b/docs/publications/benchmark_suite/generated/dendrogram.pdf similarity index 100% rename from docs/benchmark_suite/generated/dendrogram.pdf rename to docs/publications/benchmark_suite/generated/dendrogram.pdf diff --git a/docs/benchmark_suite/generated/dendrogram.svg b/docs/publications/benchmark_suite/generated/dendrogram.svg similarity index 100% rename from docs/benchmark_suite/generated/dendrogram.svg rename to docs/publications/benchmark_suite/generated/dendrogram.svg diff --git a/docs/benchmark_suite/generated/dendrogram_full.pdf b/docs/publications/benchmark_suite/generated/dendrogram_full.pdf similarity index 100% rename from docs/benchmark_suite/generated/dendrogram_full.pdf rename to docs/publications/benchmark_suite/generated/dendrogram_full.pdf diff --git a/docs/benchmark_suite/generated/log_overhead_hist.svg b/docs/publications/benchmark_suite/generated/log_overhead_hist.svg similarity index 100% rename from docs/benchmark_suite/generated/log_overhead_hist.svg rename to docs/publications/benchmark_suite/generated/log_overhead_hist.svg diff --git a/docs/benchmark_suite/generated/pca0.pdf b/docs/publications/benchmark_suite/generated/pca0.pdf similarity index 100% rename from docs/benchmark_suite/generated/pca0.pdf rename to docs/publications/benchmark_suite/generated/pca0.pdf diff --git a/docs/benchmark_suite/generated/pca1.pdf b/docs/publications/benchmark_suite/generated/pca1.pdf similarity index 100% rename from docs/benchmark_suite/generated/pca1.pdf rename to docs/publications/benchmark_suite/generated/pca1.pdf diff --git a/docs/benchmark_suite/generated/predictive-performance.pdf b/docs/publications/benchmark_suite/generated/predictive-performance.pdf similarity index 100% rename from docs/benchmark_suite/generated/predictive-performance.pdf rename to docs/publications/benchmark_suite/generated/predictive-performance.pdf diff --git a/docs/benchmark_suite/generated/subsetting-accuracy.pdf b/docs/publications/benchmark_suite/generated/subsetting-accuracy.pdf similarity index 100% rename from docs/benchmark_suite/generated/subsetting-accuracy.pdf rename to docs/publications/benchmark_suite/generated/subsetting-accuracy.pdf diff --git a/docs/benchmark_suite/generated/subsetting-dist.pdf b/docs/publications/benchmark_suite/generated/subsetting-dist.pdf similarity index 100% rename from docs/benchmark_suite/generated/subsetting-dist.pdf rename to docs/publications/benchmark_suite/generated/subsetting-dist.pdf diff --git a/docs/benchmark_suite/generated/subsetting.pdf b/docs/publications/benchmark_suite/generated/subsetting.pdf similarity index 100% rename from docs/benchmark_suite/generated/subsetting.pdf rename to docs/publications/benchmark_suite/generated/subsetting.pdf diff --git a/docs/benchmark_suite/prov-example.dot b/docs/publications/benchmark_suite/prov-example.dot similarity index 100% rename from docs/benchmark_suite/prov-example.dot rename to docs/publications/benchmark_suite/prov-example.dot diff --git a/docs/benchmark_suite/prov-example.png b/docs/publications/benchmark_suite/prov-example.png similarity index 100% rename from docs/benchmark_suite/prov-example.png rename to docs/publications/benchmark_suite/prov-example.png diff --git a/docs/benchmark_suite/simple-prov-example.dot b/docs/publications/benchmark_suite/simple-prov-example.dot similarity index 100% rename from docs/benchmark_suite/simple-prov-example.dot rename to docs/publications/benchmark_suite/simple-prov-example.dot diff --git a/docs/benchmark_suite/simple-prov-example.png b/docs/publications/benchmark_suite/simple-prov-example.png similarity index 100% rename from docs/benchmark_suite/simple-prov-example.png rename to docs/publications/benchmark_suite/simple-prov-example.png diff --git a/docs/benchmark_suite/submitted.pdf b/docs/publications/benchmark_suite/submitted.pdf similarity index 100% rename from docs/benchmark_suite/submitted.pdf rename to docs/publications/benchmark_suite/submitted.pdf diff --git a/docs/benchmark_suite/sys-lvl-log.svg b/docs/publications/benchmark_suite/sys-lvl-log.svg similarity index 100% rename from docs/benchmark_suite/sys-lvl-log.svg rename to docs/publications/benchmark_suite/sys-lvl-log.svg diff --git a/docs/benchmark_suite/sys-lvl-prov.dot b/docs/publications/benchmark_suite/sys-lvl-prov.dot similarity index 100% rename from docs/benchmark_suite/sys-lvl-prov.dot rename to docs/publications/benchmark_suite/sys-lvl-prov.dot diff --git a/docs/benchmark_suite/tech_report.md b/docs/publications/benchmark_suite/tech_report.md similarity index 100% rename from docs/benchmark_suite/tech_report.md rename to docs/publications/benchmark_suite/tech_report.md diff --git a/docs/benchmark_suite/wf-lvl-prov.dot b/docs/publications/benchmark_suite/wf-lvl-prov.dot similarity index 100% rename from docs/benchmark_suite/wf-lvl-prov.dot rename to docs/publications/benchmark_suite/wf-lvl-prov.dot diff --git a/docs/citations-to-latex.lua b/docs/publications/citations-to-latex.lua similarity index 100% rename from docs/citations-to-latex.lua rename to docs/publications/citations-to-latex.lua diff --git a/docs/dataflow-graph.png b/docs/publications/dataflow-graph.png similarity index 100% rename from docs/dataflow-graph.png rename to docs/publications/dataflow-graph.png diff --git a/docs/dataflow-graph.svg b/docs/publications/dataflow-graph.svg similarity index 100% rename from docs/dataflow-graph.svg rename to docs/publications/dataflow-graph.svg diff --git a/docs/flake.lock b/docs/publications/flake.lock similarity index 100% rename from docs/flake.lock rename to docs/publications/flake.lock diff --git a/docs/flake.nix b/docs/publications/flake.nix similarity index 100% rename from docs/flake.nix rename to docs/publications/flake.nix diff --git a/docs/illinois.png b/docs/publications/illinois.png similarity index 100% rename from docs/illinois.png rename to docs/publications/illinois.png diff --git a/docs/lit_review/application-level.dot b/docs/publications/lit_review/application-level.dot similarity index 100% rename from docs/lit_review/application-level.dot rename to docs/publications/lit_review/application-level.dot diff --git a/docs/lit_review/datasets.org b/docs/publications/lit_review/datasets.org similarity index 100% rename from docs/lit_review/datasets.org rename to docs/publications/lit_review/datasets.org diff --git a/docs/lit_review/flake.lock b/docs/publications/lit_review/flake.lock similarity index 100% rename from docs/lit_review/flake.lock rename to docs/publications/lit_review/flake.lock diff --git a/docs/lit_review/flake.nix b/docs/publications/lit_review/flake.nix similarity index 100% rename from docs/lit_review/flake.nix rename to docs/publications/lit_review/flake.nix diff --git a/docs/lit_review/index.pdf b/docs/publications/lit_review/index.pdf similarity index 100% rename from docs/lit_review/index.pdf rename to docs/publications/lit_review/index.pdf diff --git a/docs/lit_review/index.tex b/docs/publications/lit_review/index.tex similarity index 100% rename from docs/lit_review/index.tex rename to docs/publications/lit_review/index.tex diff --git a/docs/lit_review/lit_review.md b/docs/publications/lit_review/lit_review.md similarity index 100% rename from docs/lit_review/lit_review.md rename to docs/publications/lit_review/lit_review.md diff --git a/docs/lit_review/lit_review.turtle b/docs/publications/lit_review/lit_review.turtle similarity index 100% rename from docs/lit_review/lit_review.turtle rename to docs/publications/lit_review/lit_review.turtle diff --git a/docs/lit_review/meat.tex b/docs/publications/lit_review/meat.tex similarity index 100% rename from docs/lit_review/meat.tex rename to docs/publications/lit_review/meat.tex diff --git a/docs/lit_review/search.csv b/docs/publications/lit_review/search.csv similarity index 100% rename from docs/lit_review/search.csv rename to docs/publications/lit_review/search.csv diff --git a/docs/lit_review/system-level.dot b/docs/publications/lit_review/system-level.dot similarity index 100% rename from docs/lit_review/system-level.dot rename to docs/publications/lit_review/system-level.dot diff --git a/docs/lit_review/workflow-level.dot b/docs/publications/lit_review/workflow-level.dot similarity index 100% rename from docs/lit_review/workflow-level.dot rename to docs/publications/lit_review/workflow-level.dot diff --git a/docs/low_provenance_overhead/main.md b/docs/publications/low_provenance_overhead/main.md similarity index 100% rename from docs/low_provenance_overhead/main.md rename to docs/publications/low_provenance_overhead/main.md diff --git a/docs/poster/main.svg b/docs/publications/poster/main.svg similarity index 100% rename from docs/poster/main.svg rename to docs/publications/poster/main.svg diff --git a/docs/poster/poster.html b/docs/publications/poster/poster.html similarity index 100% rename from docs/poster/poster.html rename to docs/publications/poster/poster.html diff --git a/docs/poster/poster.pdf b/docs/publications/poster/poster.pdf similarity index 100% rename from docs/poster/poster.pdf rename to docs/publications/poster/poster.pdf diff --git a/docs/poster/poster.scrbl b/docs/publications/poster/poster.scrbl similarity index 100% rename from docs/poster/poster.scrbl rename to docs/publications/poster/poster.scrbl diff --git a/docs/probe-qr.svg b/docs/publications/probe-qr.svg similarity index 100% rename from docs/probe-qr.svg rename to docs/publications/probe-qr.svg diff --git a/docs/prov-example.svg b/docs/publications/prov-example.svg similarity index 100% rename from docs/prov-example.svg rename to docs/publications/prov-example.svg diff --git a/docs/prov_pres/main.md b/docs/publications/prov_pres/main.md similarity index 100% rename from docs/prov_pres/main.md rename to docs/publications/prov_pres/main.md diff --git a/docs/prov_pres/prov_example.dot b/docs/publications/prov_pres/prov_example.dot similarity index 100% rename from docs/prov_pres/prov_example.dot rename to docs/publications/prov_pres/prov_example.dot diff --git a/docs/prov_pres/prov_example.svg b/docs/publications/prov_pres/prov_example.svg similarity index 100% rename from docs/prov_pres/prov_example.svg rename to docs/publications/prov_pres/prov_example.svg diff --git a/docs/record_replay/.gitignore b/docs/publications/record_replay/.gitignore similarity index 100% rename from docs/record_replay/.gitignore rename to docs/publications/record_replay/.gitignore diff --git a/docs/record_replay/main.md b/docs/publications/record_replay/main.md similarity index 100% rename from docs/record_replay/main.md rename to docs/publications/record_replay/main.md diff --git a/docs/record_replay/main.pdf b/docs/publications/record_replay/main.pdf similarity index 100% rename from docs/record_replay/main.pdf rename to docs/publications/record_replay/main.pdf diff --git a/docs/record_replay/old-main.md b/docs/publications/record_replay/old-main.md similarity index 100% rename from docs/record_replay/old-main.md rename to docs/publications/record_replay/old-main.md diff --git a/docs/record_replay/repro_comparison.ods b/docs/publications/record_replay/repro_comparison.ods similarity index 100% rename from docs/record_replay/repro_comparison.ods rename to docs/publications/record_replay/repro_comparison.ods diff --git a/docs/record_replay/zotero.bib b/docs/publications/record_replay/zotero.bib similarity index 100% rename from docs/record_replay/zotero.bib rename to docs/publications/record_replay/zotero.bib diff --git a/docs/reed.bib b/docs/publications/reed.bib similarity index 100% rename from docs/reed.bib rename to docs/publications/reed.bib diff --git a/docs/sandia.svg b/docs/publications/sandia.svg similarity index 100% rename from docs/sandia.svg rename to docs/publications/sandia.svg diff --git a/docs/script.sh b/docs/publications/script.sh similarity index 100% rename from docs/script.sh rename to docs/publications/script.sh diff --git a/docs/supplemental.bib b/docs/publications/supplemental.bib similarity index 100% rename from docs/supplemental.bib rename to docs/publications/supplemental.bib diff --git a/docs/us-rse.html b/docs/publications/us-rse.html similarity index 100% rename from docs/us-rse.html rename to docs/publications/us-rse.html diff --git a/docs/us-rse.pdf b/docs/publications/us-rse.pdf similarity index 100% rename from docs/us-rse.pdf rename to docs/publications/us-rse.pdf diff --git a/docs/why_prov/main.md b/docs/publications/why_prov/main.md similarity index 100% rename from docs/why_prov/main.md rename to docs/publications/why_prov/main.md diff --git a/docs/zotero.bib b/docs/publications/zotero.bib similarity index 100% rename from docs/zotero.bib rename to docs/publications/zotero.bib diff --git a/docs/publications_index.md b/docs/publications_index.md new file mode 100644 index 00000000..6e89e118 --- /dev/null +++ b/docs/publications_index.md @@ -0,0 +1,15 @@ +- Evaluating system-level provenance tools for practical use by Grayson, Milewicz + - 🔓 [fulltext](./publications/lit_review/index.pdf) + +- Trick or Research + - 🔓 [presentation](./publications/prov_pres/main.md) + +- How to collect computational provenance by Grayson, Milewicz, Katz, Marinov + - 🔓 [poster](./publications/poster/poster.pdf) + +- A benchmark suite and performance analysis of user-space provenance collectors by Grayson, Aguilar, Milewicz, Katz, Darko @ ACM REP '24 [10.1145/3641525.3663627](https://doi.org/10.1145/3641525.3663627) + - 🔓 [fulltext](./publications/benchmark_suite/README.pdf) + - 🔓 [slides](./publications/acm-rep-pres.pdf) + +- PROBE4RSE: Provenance Replay/Observation Engine for Research Software Engineers by Grayson, Milewicz, Katz, Marinov @ US-RSE '24 [10.1145/3641525.3663627](https://doi.org/10.1145/3641525.3663627) + - 🔓 [slides](./publications/us-rse.pdf) diff --git a/docs/research_reading_list.md b/docs/research_reading_list.md new file mode 100644 index 00000000..76fd74c1 --- /dev/null +++ b/docs/research_reading_list.md @@ -0,0 +1,27 @@ +## Research reading list + +- [_Provenance for Computational Tasks: A Survey_ by Freire et al. in CiSE '08](https://sci.utah.edu/~csilva/papers/cise2008a.pdf) for an overview of provenance in general. + +- [_Transparent Result Caching_ by Vahdat and Anderson @ USENIX ATC '98](https://www.usenix.org/legacy/publications/library/proceedings/usenix98/full_papers/vahdat/vahdat.pdf) for an early system-level provenance tracer in Solaris using the `/proc` fs. Linux's `/proc` fs doesn't have the same functionality. However, this paper discusses two interesting application of provenance: unmake (query lineage information) and transparent Make (more generally, incremental computation). + +- [_CDE: Using System Call Interposition to Automatically Create Portable Software Packages_ by Guo and Engler @ USENIX ATC '11](https://www.usenix.org/legacy/events/atc11/tech/final_files/GuoEngler.pdf) for an early system-level provenance tracer. Their only application is software execution replay, but replay is quite an important application. + +- [_Techniques for Preserving Scientific Software Executions: Preserve the Mess or Encourage Cleanliness?_ by Thain, Meng, and Ivie @ iPRES 2015 ](https://curate.nd.edu/articles/journal_contribution/Techniques_for_Preserving_Scientific_Software_Executions_Preserve_the_Mess_or_Encourage_Cleanliness_/24824439?file=43664937) discusses whether enabling automatic-replay is actually a good idea. A cursory glance makes PROBE seem more like "preserving the mess", but I think, with some care in the design choices, it actually can be more like "encouraging cleanliness", for example, by having heuristics that help cull/simplify provenance and generating human readable/editable package-manager recipes. + +- [_SoK: History is a Vast Early Warning System: Auditing the Provenance of System Intrusions_ by Inam et al. @ SOSP '23](https://adambates.org/documents/Inam_Oakland23.pdf) see specifically Inam's survey of different possibilities for the "Capture layer", "Reduction layer", and "Infrastructure layer". Although provenance-for-security has different constraints than provenacne for other purposes, the taxonomy that Inam lays out is still useful. PROBE operates by intercepting libc calls, which is essentially a "middleware" in Table I (platform modification, no program modification, no config change, incomplete mediation, not tamperproof, inter-process tracing, etc.). + +- [_System-Level Provenance Tracers_ by me et al. @ ACM REP 2023](./docs/acm-rep-pres.pdf) for a motivation of this work. It surveys prior work, identifies potential gaps, and explains why I think library interposition is a promising path for future research. + +- [_Computational Experiment Comprehension using Provenance Summarization_ by Bufford et al. @ ACM REP 2023](https://dl.acm.org/doi/pdf/10.1145/3641525.3663617) discusses how to implement an interface for querying provenance information. They compare classical graph-based visualization with an interactive LLM in a user-study. + +## Prior art + +- [RR-debugger](https://github.com/rr-debugger/rr) which is much slower, but features more complete capturing, lets you replay but doesn't let you do any other analysis. + +- [Sciunits](https://github.com/depaul-dice/sciunit) which is much slower, more likely to crash, has less complete capturing, lets you replay but doesn't let you do other analysis. + +- [Reprozip](https://www.reprozip.org/) which is much slower and has less complete capturing. + +- [CARE](https://proot-me.github.io/care/) which is much slower, has less complete capturing, and lets you do containerized replay but not unpriveleged native replay and not other analysis. + +- [FSAtrace](https://github.com/jacereda/fsatrace) which is more likely to crash, has less complete capturing, and doesn't have replay or other analyses. diff --git a/probe_src/tasks.md b/docs/tasks.md similarity index 84% rename from probe_src/tasks.md rename to docs/tasks.md index 4bdec56e..95e82366 100644 --- a/probe_src/tasks.md +++ b/docs/tasks.md @@ -26,6 +26,20 @@ Core functionality: - Provenance graph should get stored in user-wide directory. - It should be SQLite. +- [ ] We should record rusage of each process. + - Include: + - Time of start + - Time of stop + - Compute time + - IO + - MaxRSS + - [ ] Render that information somewhere? Maybe generated Makefile or Workflow should print wall time estimate, based on the planned computational steps. + +- [ ] Discuss Windows and MacOS implementation?? + - https://en.wikipedia.org/wiki/DLL_injection#Approaches_on_Microsoft_Windows + - MacOS: `DYLD_INSERT_LIBRARIES="./test.dylib" DYLD_FORCE_FLAT_NAMESPACE=1 prog` + - [Detours: Binary interception of Win32 functions ](https://www.usenix.org/legacy/publications/library/proceedings/usenix-nt99/full_papers/hunt/hunt.pdf) + Core tests: - [x] Write end-to-end-tests. End-to-end test should verify properties of the NetworkX graph returned by `provlog_to_digraph`. - [x] Check generic properties Shofiya and Sam finished this. @@ -59,9 +73,9 @@ Core tests: - [x] Write a CI script that uses Nix to install dependencies and run the Justfiles. - [x] Check (not format) code in Alejandra and Black. - [x] Figure out why tests don't work. - - [ ] Run tests in an Ubuntu Docker container. - - [ ] Run tests in a really old Ubuntu Docker container. - - [ ] Figure out how to intelligently combine Nix checks, Just checks, and GitHub CI checks, so we aren't duplicating checks. + - [x] Run tests in an Ubuntu Docker container. + - [x] Run tests in a really old Ubuntu Docker container. + - [x] Figure out how to intelligently combine Nix checks, Just checks, and GitHub CI checks, so we aren't duplicating checks. - [ ] Clang-analyzer - [x] Write microbenchmarking - [x] Run performance test-cases in two steps: one with just libprobe record and one with just transcription. (3 new CLI entrypoints, described in comments in CLI.py) @@ -69,13 +83,11 @@ Core tests: Downstream applications: - [ ] Should export the PROBE log to the following formats: - - [ ] [OCI image](https://opencontainers.org/) (runnable with Docker) + - [x] [OCI image](https://opencontainers.org/) (runnable with Docker) - [ ] Test that executing this image produces the same stdout, stderr, and files for the tests we already have. - - [ ] Tar-ball intended for chroot - - [ ] Directory - [ ] VM image. - [ ] Test execution again. - - [ ] Research ways to speed up the recording phase. + - [ ] Commented script. Comments would include files in, out, and time taken - [ ] SSH wrapper - [ ] There should be a shell script named `ssh` that calls `./PROBE ssh `. @@ -119,7 +131,6 @@ Design issues: - [ ] Think about in situ transcription and analysis - Think about assumptions in analysis - - Think about front-end and UI/UX Performance issues: - [ ] Have better benchmarks @@ -130,18 +141,22 @@ Performance issues: - [ ] Test high mem - [ ] Put magic bytes in arena - - [ ] Use lock-free implementation of InodeTable -Documentation: -- [ ] Make the CLI better. You shouldn't need to give `-f` to make repeated applications work. You shouldn't need to give `--input`. +- [ ] Put rdtsc performance counters in libprobe to instrument startup and re-exec cost. Write them to disk somehow. Look at the results. + + +Better UI/UX: + +- [ ] Probe -o should support formatted-strings, including: %pid, %iso_datetime, %exe, %host, syntax subject to change. + - PROBE should default to `recording_%exe_%iso_datetime.tar.gz`; that way, you can run probe twice in a row with no error without needing `-f`. It's currently an unexpected pain-point, I think. CARE does something like `something.%pid`, I think. + +- [ ] Make the CLI better. You shouldn't need to give `--input`. - [ ] Document CLI tool. - [ ] Do we need to have a file-extension for probe_log? -- [ ] Combine Python and Rust CLIs. - - [ ] Improve the README. - [ ] Style output with Rich. @@ -149,13 +164,10 @@ Documentation: - [ ] Style output of Rust tool. - [ ] Package for the following platforms: - - [ ] It should be obvious how to build libprobe and probe cli (Rust) with Nix from the README. - - [ ] The repository should be an installable Python package, using the PEP 518 (pyproject.toml). Consider having one Python package with bundled binaries and one without. + - [x] Nix - [ ] PyPI - - [ ] Nix - [ ] Spack - [ ] Guix - - [ ] Docker image (consider whether to publish DockerHub, Quay, GHCR, or somewhere else). - [ ] Statically linked, downloadable binary - Built in CI on each "release" and downloadable from GitHub. @@ -164,10 +176,6 @@ Documentation: - [ ] Explain design decisions Nice to have: -- [ ] Make it easier to get to the debug build of probe cli. - - Build both versions, called `probe` and `probe_dbg`. - - `probe_dbg` should use `libprobe_dbg` - - Get rid of `--debug` - [ ] Don't check in generated code to VCS @@ -176,28 +184,22 @@ Nice to have: - [ ] Add more syscalls - [ ] Add Dup ops and debug `bash -c 'head foo > bar'` (branch add-new-ops). Sam is working on this -- [ ] Add more Ops (see branch add-new-ops) - - [ ] Libprobe should identify which was the "root" process. - [ ] Sort readdir order in record and replay phases. - [ ] Re-enable some of the tests I disabled. -- [ ] Write a FUSE that maps inodes (underlying fs) to inodes (of our choosing). Write an option for replay to use this FUSE. - -- [ ] Link with libbacktrace on `--debug` runs. - - [ ] Refactor some identifiers in codebase. - [ ] `prov_log_process_tree` -> `process_tree` - [ ] `prov_log` -> `probe_log` - [ ] `(pid, ex_id, tid, op_id)` -> `dataclass` - [ ] `digraph`, `process_graph` -> `hb_graph` - - [ ] Format Python with Ruff - - [ ] Use Clang's non-null attribute. - - [ ] Having fewer Python imports (e.g., generated.parser, generated.ops. Maybe we should re-export stuff in `__init__.py` of generated). - [ ] Reformat repository layout + +- [ ] Have `probe` and `probe-dbg`; `probe` load `libprobe.so`; `probe-dbg` loads `libprobe-dbg.so` and possibly `libbacktrace.so`. + +- [ ] Reformat repository layout - [ ] Probably have 1 top-level folder for each language, but make sure all the pieces compose nicely. - [ ] `reproducibility_tests` -> `tests`? - [ ] Move tests to root level? @@ -206,6 +208,10 @@ Nice to have: - [ ] Run pre-commit in GitHub Actions, committing fixes to PR branch +- [ ] We currently assume that coreutils will exist on the local and remote hosts. They might not. In that case, we should excise all invocations of coreutils. We could replace them with subcommands of PROBE, which _are_ guaranteed to exist on hosts that have PROBE. However, that doesn't feel necessary, since ost people _do_ have coreutils. So we will record this issue and do nothing until/unless someone complains. + +- [ ] Write a FUSE that maps inodes (underlying fs) to inodes (of our choosing). Write an option for replay to use this FUSE. + Research tasks: - [ ] Develop user study diff --git a/probe_src/threading.md b/docs/threading.md similarity index 100% rename from probe_src/threading.md rename to docs/threading.md diff --git a/flake.lock b/flake.lock index 4728dbc9..93e064e0 100644 --- a/flake.lock +++ b/flake.lock @@ -17,17 +17,12 @@ } }, "crane": { - "inputs": { - "nixpkgs": [ - "nixpkgs" - ] - }, "locked": { - "lastModified": 1721842668, - "narHash": "sha256-k3oiD2z2AAwBFLa4+xfU+7G5fisRXfkvrMTCJrjZzXo=", + "lastModified": 1731098351, + "narHash": "sha256-HQkYvKvaLQqNa10KEFGgWHfMAbWBfFp+4cAgkut+NNE=", "owner": "ipetkov", "repo": "crane", - "rev": "529c1a0b1f29f0d78fa3086b8f6a134c71ef3aaf", + "rev": "ef80ead953c1b28316cc3f8613904edc2eb90c28", "type": "github" }, "original": { diff --git a/flake.nix b/flake.nix index a23e277d..123972ad 100644 --- a/flake.nix +++ b/flake.nix @@ -58,7 +58,7 @@ p.rust-bin.stable.latest.default.override { targets = [rust-target]; }); - frontend = (import ./probe_src/frontend/frontend.nix) { + frontend = (import ./cli-wrapper/frontend.nix) { inherit system pkgs @@ -71,11 +71,11 @@ }; in rec { packages = rec { - inherit (frontend.packages) cargoArtifacts; + inherit (frontend.packages) cargoArtifacts probe-cli; libprobe = pkgs.stdenv.mkDerivation rec { pname = "libprobe"; version = "0.1.0"; - src = ./probe_src/libprobe; + src = ./libprobe; makeFlags = ["INSTALL_PREFIX=$(out)" "SOURCE_VERSION=${version}"]; buildInputs = [ (pkgs.python312.withPackages (pypkgs: [ @@ -83,6 +83,46 @@ ])) ]; }; + probe-py = python.pkgs.buildPythonPackage rec { + pname = "probe_py"; + version = "0.1.0"; + pyproject = true; + build-system = [ + python.pkgs.flit-core + ]; + src = pkgs.stdenv.mkDerivation { + src = ./probe_py; + pname = "probe-py-with-pygen-code"; + version = "0.1.0"; + buildPhase = "true"; + installPhase = '' + mkdir $out/ + cp --recursive $src/* $out/ + chmod 755 $out/probe_py + cp ${probe-cli}/resources/ops.py $out/probe_py/ + ''; + }; + propagatedBuildInputs = [ + python.pkgs.networkx + python.pkgs.pygraphviz + python.pkgs.pydot + python.pkgs.rich + python.pkgs.typer + python.pkgs.xdg-base-dirs + ]; + nativeCheckInputs = [ + python.pkgs.mypy + pkgs.ruff + ]; + checkPhase = '' + runHook preCheck + #ruff format --check probe_src # TODO: uncomment + ruff check . + python -c 'import probe_py' + MYPYPATH=$src/mypy_stubs:$MYPYPATH mypy --strict --package probe_py + runHook postCheck + ''; + }; probe-bundled = pkgs.stdenv.mkDerivation rec { pname = "probe-bundled"; version = "0.1.0"; @@ -99,40 +139,6 @@ --prefix PATH : ${pkgs.buildah}/bin ''; }; - probe-py-generated = frontend.packages.probe-py-generated; - probe-py = let - probe-py-manual = python.pkgs.buildPythonPackage rec { - pname = "probe_py.manual"; - version = "0.1.0"; - pyproject = true; - build-system = [ - python.pkgs.flit-core - ]; - src = ./probe_src/python; - propagatedBuildInputs = [ - frontend.packages.probe-py-generated - python.pkgs.networkx - python.pkgs.pygraphviz - python.pkgs.pydot - python.pkgs.rich - python.pkgs.typer - ]; - nativeCheckInputs = [ - frontend.packages.probe-py-generated - python.pkgs.mypy - pkgs.ruff - ]; - checkPhase = '' - runHook preCheck - #ruff format --check probe_src # TODO: uncomment - ruff check . - python -c 'import probe_py.manual' - mypy --strict --package probe_py.manual - runHook postCheck - ''; - }; - in - python.withPackages (pypkgs: [probe-py-manual]); default = probe-bundled; }; checks = { @@ -158,12 +164,12 @@ }; probe-integration-tests = pkgs.stdenv.mkDerivation { name = "probe-integration-tests"; - src = ./probe_src/tests; + src = ./tests; nativeBuildInputs = [ packages.probe-bundled - packages.probe-py pkgs.podman pkgs.docker + pkgs.coreutils # so we can `probe record head ...`, etc. ]; buildPhase = "touch $out"; checkPhase = '' @@ -179,9 +185,7 @@ popd ''; inputsFrom = [ - frontend.packages.probe-frontend frontend.packages.probe-cli - frontend.packages.probe-macros ]; packages = [ @@ -204,6 +208,7 @@ pypkgs.pytest pypkgs.mypy pypkgs.ipython + pypkgs.xdg-base-dirs # libprobe build time requirement pypkgs.pycparser diff --git a/libprobe/.gitignore b/libprobe/.gitignore new file mode 100644 index 00000000..3227224f --- /dev/null +++ b/libprobe/.gitignore @@ -0,0 +1,3 @@ +# generated files +build/ +generated/ diff --git a/probe_src/libprobe/Makefile b/libprobe/Makefile similarity index 96% rename from probe_src/libprobe/Makefile rename to libprobe/Makefile index 68b4daeb..58366fb3 100644 --- a/probe_src/libprobe/Makefile +++ b/libprobe/Makefile @@ -19,7 +19,7 @@ build/lib%-dbg.so: $(SOURCE_FILES) $(GENERATED_FILES) gcc $(CFLAGS) $(DBGCFLAGS) -o $@ src/lib.c $(GENERATED_FILES): $(wildcard generator/*) - ./generator/gen_libc_hooks.py + python3 ./generator/gen_libc_hooks.py install: install -D --target-directory $(INSTALL_PREFIX)/lib/ build/lib*.so diff --git a/probe_src/libprobe/README.md b/libprobe/README.md similarity index 100% rename from probe_src/libprobe/README.md rename to libprobe/README.md diff --git a/probe_src/libprobe/arena/.gitignore b/libprobe/arena/.gitignore similarity index 100% rename from probe_src/libprobe/arena/.gitignore rename to libprobe/arena/.gitignore diff --git a/probe_src/libprobe/arena/Makefile b/libprobe/arena/Makefile similarity index 100% rename from probe_src/libprobe/arena/Makefile rename to libprobe/arena/Makefile diff --git a/probe_src/libprobe/arena/README.md b/libprobe/arena/README.md similarity index 100% rename from probe_src/libprobe/arena/README.md rename to libprobe/arena/README.md diff --git a/probe_src/libprobe/arena/include/arena.h b/libprobe/arena/include/arena.h similarity index 100% rename from probe_src/libprobe/arena/include/arena.h rename to libprobe/arena/include/arena.h diff --git a/probe_src/libprobe/arena/parse_arena.py b/libprobe/arena/parse_arena.py similarity index 100% rename from probe_src/libprobe/arena/parse_arena.py rename to libprobe/arena/parse_arena.py diff --git a/probe_src/libprobe/arena/test_arena.c b/libprobe/arena/test_arena.c similarity index 100% rename from probe_src/libprobe/arena/test_arena.c rename to libprobe/arena/test_arena.c diff --git a/probe_src/libprobe/generator/dump_ast.py b/libprobe/generator/dump_ast.py similarity index 100% rename from probe_src/libprobe/generator/dump_ast.py rename to libprobe/generator/dump_ast.py diff --git a/probe_src/libprobe/generator/gen_libc_hooks.py b/libprobe/generator/gen_libc_hooks.py similarity index 97% rename from probe_src/libprobe/generator/gen_libc_hooks.py rename to libprobe/generator/gen_libc_hooks.py index 05f4f3af..09ed3e0e 100755 --- a/probe_src/libprobe/generator/gen_libc_hooks.py +++ b/libprobe/generator/gen_libc_hooks.py @@ -326,6 +326,12 @@ def wrapper_func_body(func: ParsedFunc) -> typing.Sequence[Node]: name=pycparser.c_ast.ID(name="maybe_init_thread"), args=pycparser.c_ast.ExprList(exprs=[]), ), + pycparser.c_ast.FuncCall( + name=pycparser.c_ast.ID(name="DEBUG"), + args=pycparser.c_ast.ExprList(exprs=[ + pycparser.c_ast.Constant(type="string", value='"' + func.name + '(...)"'), + ]), + ), ] post_call_stmts = [] @@ -421,14 +427,16 @@ def wrapper_func_body(func: ParsedFunc) -> typing.Sequence[Node]: ).definition() for _, func in funcs.items() ] -pathlib.Path("generated/libc_hooks.h").write_text( +generated = pathlib.Path("generated") +generated.mkdir(exist_ok=True) +(generated / "libc_hooks.h").write_text( GccCGenerator().visit( pycparser.c_ast.FileAST(ext=[ *func_pointer_declarations, ]) ) ) -pathlib.Path("generated/libc_hooks.c").write_text( +(generated / "libc_hooks.c").write_text( GccCGenerator().visit( pycparser.c_ast.FileAST(ext=[ init_function_pointers, diff --git a/probe_src/libprobe/generator/libc_hooks_source.c b/libprobe/generator/libc_hooks_source.c similarity index 100% rename from probe_src/libprobe/generator/libc_hooks_source.c rename to libprobe/generator/libc_hooks_source.c diff --git a/probe_src/libprobe/include/libprobe/prov_ops.h b/libprobe/include/libprobe/prov_ops.h similarity index 100% rename from probe_src/libprobe/include/libprobe/prov_ops.h rename to libprobe/include/libprobe/prov_ops.h diff --git a/probe_src/libprobe/src/fd_table.c b/libprobe/src/fd_table.c similarity index 100% rename from probe_src/libprobe/src/fd_table.c rename to libprobe/src/fd_table.c diff --git a/probe_src/libprobe/src/global_state.c b/libprobe/src/global_state.c similarity index 100% rename from probe_src/libprobe/src/global_state.c rename to libprobe/src/global_state.c diff --git a/probe_src/libprobe/src/inode_table.c b/libprobe/src/inode_table.c similarity index 100% rename from probe_src/libprobe/src/inode_table.c rename to libprobe/src/inode_table.c diff --git a/probe_src/libprobe/src/lib.c b/libprobe/src/lib.c similarity index 100% rename from probe_src/libprobe/src/lib.c rename to libprobe/src/lib.c diff --git a/probe_src/libprobe/src/lookup_on_path.c b/libprobe/src/lookup_on_path.c similarity index 93% rename from probe_src/libprobe/src/lookup_on_path.c rename to libprobe/src/lookup_on_path.c index db7cd146..5ee7b684 100644 --- a/probe_src/libprobe/src/lookup_on_path.c +++ b/libprobe/src/lookup_on_path.c @@ -11,7 +11,7 @@ static bool lookup_on_path(BORROWED const char* bin_name, BORROWED char* bin_pat * * -- https://man7.org/linux/man-pages/man3/exec.3.html */ - char* path = env_path ? env_path : get_default_path(); + char* path = strndup(env_path ? env_path : get_default_path(), sysconf(_SC_ARG_MAX)); DEBUG("looking up \"%s\" on $PATH=\"%.50s...\"", bin_name, path); diff --git a/probe_src/libprobe/src/prov_buffer.c b/libprobe/src/prov_buffer.c similarity index 100% rename from probe_src/libprobe/src/prov_buffer.c rename to libprobe/src/prov_buffer.c diff --git a/probe_src/libprobe/src/prov_enable.c b/libprobe/src/prov_enable.c similarity index 100% rename from probe_src/libprobe/src/prov_enable.c rename to libprobe/src/prov_enable.c diff --git a/probe_src/libprobe/src/prov_ops.c b/libprobe/src/prov_ops.c similarity index 100% rename from probe_src/libprobe/src/prov_ops.c rename to libprobe/src/prov_ops.c diff --git a/probe_src/libprobe/src/util.c b/libprobe/src/util.c similarity index 100% rename from probe_src/libprobe/src/util.c rename to libprobe/src/util.c diff --git a/lightweight_env.sh b/lightweight_env.sh deleted file mode 100755 index bb4fd861..00000000 --- a/lightweight_env.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/usr/bin/env bash - -# nix develop brings in a ton of stuff to the env -# which complicates testing probe -# To simplify, use this script. - -env - __PROBE_LIB=$__PROBE_LIB PATH=$PATH PYTHONPATH=$PYTHONPATH $@ diff --git a/output.png b/output.png deleted file mode 100644 index ab90c424..00000000 Binary files a/output.png and /dev/null differ diff --git a/probe_py/.gitignore b/probe_py/.gitignore new file mode 100644 index 00000000..69e0e2ac --- /dev/null +++ b/probe_py/.gitignore @@ -0,0 +1,2 @@ +# Generated files +ops.py diff --git a/probe_src/python/probe_py/manual/__init__.py b/probe_py/probe_py/__init__.py similarity index 100% rename from probe_src/python/probe_py/manual/__init__.py rename to probe_py/probe_py/__init__.py diff --git a/probe_src/python/probe_py/manual/analysis.py b/probe_py/probe_py/analysis.py similarity index 92% rename from probe_src/python/probe_py/manual/analysis.py rename to probe_py/probe_py/analysis.py index f2a977ba..e8a81786 100644 --- a/probe_src/python/probe_py/manual/analysis.py +++ b/probe_py/probe_py/analysis.py @@ -1,8 +1,8 @@ import typing -from typing import Dict, Tuple import networkx as nx # type: ignore -from probe_py.generated.ops import Op, CloneOp, ExecOp, WaitOp, OpenOp, CloseOp, InitProcessOp, InitExecEpochOp, InitThreadOp, StatOp -from probe_py.generated import parser +from .ptypes import TaskType, ProvLog +from .ops import Op, CloneOp, ExecOp, WaitOp, OpenOp, CloseOp, InitProcessOp, InitExecEpochOp, InitThreadOp, StatOp +from .graph_utils import list_edges_from_start_node from enum import IntEnum import rich import sys @@ -11,14 +11,6 @@ import os import collections -# TODO: implement this in probe_py.generated.ops -class TaskType(IntEnum): - TASK_PID = 0 - TASK_TID = 1 - TASK_ISO_C_THREAD = 2 - TASK_PTHREAD = 3 - - class EdgeLabels(IntEnum): PROGRAM_ORDER = 1 FORK_JOIN = 2 @@ -55,13 +47,13 @@ def label(self) -> str: return f"{self.file} v{self.version}" # type alias for a node -Node = Tuple[int, int, int, int] +Node: typing.TypeAlias = tuple[int, int, int, int] # type for the edges -EdgeType = Tuple[Node, Node] +EdgeType: typing.TypeAlias = tuple[Node, Node] def validate_provlog( - provlog: parser.ProvLog, + provlog: ProvLog, ) -> list[str]: ret = list[str]() waited_processes = set[tuple[TaskType, int]]() @@ -151,7 +143,7 @@ def validate_provlog( # TODO: Rename "digraph" to "hb_graph" in the entire project. # Digraph (aka "directed graph") is too vague a term; the proper name is "happens-before graph". # Later on, we will have a function that transforms an hb graph to file graph (both of which are digraphs) -def provlog_to_digraph(process_tree_prov_log: parser.ProvLog) -> nx.DiGraph: +def provlog_to_digraph(process_tree_prov_log: ProvLog) -> nx.DiGraph: # [pid, exec_epoch_no, tid, op_index] program_order_edges = list[tuple[Node, Node]]() fork_join_edges = list[tuple[Node, Node]]() @@ -258,7 +250,7 @@ def add_edges(edges:list[tuple[Node, Node]], label:EdgeLabels) -> None: add_edges(fork_join_edges, EdgeLabels.FORK_JOIN) return process_graph -def traverse_hb_for_dfgraph(process_tree_prov_log: parser.ProvLog, starting_node: Node, traversed: set[int] , dataflow_graph:nx.DiGraph, file_version_map: Dict[InodeOnDevice, int], shared_files: set[InodeOnDevice], cmd_map: Dict[int, list[str]]) -> None: +def traverse_hb_for_dfgraph(process_tree_prov_log: ProvLog, starting_node: Node, traversed: set[int] , dataflow_graph:nx.DiGraph, file_version_map: dict[InodeOnDevice, int], shared_files: set[InodeOnDevice], cmd_map: dict[int, list[str]]) -> None: starting_pid = starting_node[0] starting_op = prov_log_get_node(process_tree_prov_log, starting_node[0], starting_node[1], starting_node[2], starting_node[3]) @@ -342,13 +334,7 @@ def traverse_hb_for_dfgraph(process_tree_prov_log: parser.ProvLog, starting_node if next_op.task_id == starting_pid or next_op.task_id == starting_op.pthread_id: return -def list_edges_from_start_node(graph: nx.DiGraph, start_node: Node) -> list[EdgeType]: - all_edges = list(graph.edges()) - start_index = next(i for i, edge in enumerate(all_edges) if edge[0] == start_node) - ordered_edges = all_edges[start_index:] + all_edges[:start_index] - return ordered_edges - -def provlog_to_dataflow_graph(process_tree_prov_log: parser.ProvLog) -> nx.DiGraph: +def provlog_to_dataflow_graph(process_tree_prov_log: ProvLog) -> nx.DiGraph: dataflow_graph = nx.DiGraph() file_version_map = collections.defaultdict[InodeOnDevice, int](lambda: 0) process_graph = provlog_to_digraph(process_tree_prov_log) @@ -365,11 +351,11 @@ def provlog_to_dataflow_graph(process_tree_prov_log: parser.ProvLog) -> nx.DiGra traverse_hb_for_dfgraph(process_tree_prov_log, root_node, traversed, dataflow_graph, file_version_map, shared_files, cmd_map) return dataflow_graph -def prov_log_get_node(prov_log: parser.ProvLog, pid: int, exec_epoch: int, tid: int, op_no: int) -> Op: +def prov_log_get_node(prov_log: ProvLog, pid: int, exec_epoch: int, tid: int, op_no: int) -> Op: return prov_log.processes[pid].exec_epochs[exec_epoch].threads[tid].ops[op_no] -def validate_hb_closes(provlog: parser.ProvLog, process_graph: nx.DiGraph) -> list[str]: +def validate_hb_closes(provlog: ProvLog, process_graph: nx.DiGraph) -> list[str]: # Note that this test doesn't work if a process "intentionally" leaves a fd open for its child. # E.g., bash-in-pipe provlog_reverse = process_graph.reverse() @@ -389,7 +375,7 @@ def validate_hb_closes(provlog: parser.ProvLog, process_graph: nx.DiGraph) -> li return ret -def validate_hb_waits(provlog: parser.ProvLog, process_graph: nx.DiGraph) -> list[str]: +def validate_hb_waits(provlog: ProvLog, process_graph: nx.DiGraph) -> list[str]: provlog_reverse = process_graph.reverse() ret = list[str]() for node in process_graph.nodes: @@ -404,7 +390,7 @@ def validate_hb_waits(provlog: parser.ProvLog, process_graph: nx.DiGraph) -> lis ret.append(f"Wait of {op.data.task_id} in {node} is not preceeded by corresponding clone") return ret -def validate_hb_clones(provlog: parser.ProvLog, process_graph: nx.DiGraph) -> list[str]: +def validate_hb_clones(provlog: ProvLog, process_graph: nx.DiGraph) -> list[str]: ret = list[str]() for node in process_graph.nodes: op = prov_log_get_node(provlog, *node) @@ -433,7 +419,7 @@ def validate_hb_clones(provlog: parser.ProvLog, process_graph: nx.DiGraph) -> li return ret -def validate_hb_degree(provlog: parser.ProvLog, process_graph: nx.DiGraph) -> list[str]: +def validate_hb_degree(provlog: ProvLog, process_graph: nx.DiGraph) -> list[str]: ret = list[str]() found_entry = False found_exit = False @@ -455,7 +441,7 @@ def validate_hb_degree(provlog: parser.ProvLog, process_graph: nx.DiGraph) -> li return ret -def validate_hb_acyclic(provlog: parser.ProvLog, process_graph: nx.DiGraph) -> list[str]: +def validate_hb_acyclic(provlog: ProvLog, process_graph: nx.DiGraph) -> list[str]: try: cycle = nx.find_cycle(process_graph) except nx.NetworkXNoCycle: @@ -464,7 +450,7 @@ def validate_hb_acyclic(provlog: parser.ProvLog, process_graph: nx.DiGraph) -> l return [f"Cycle detected: {cycle}"] -def validate_hb_execs(provlog: parser.ProvLog, process_graph: nx.DiGraph) -> list[str]: +def validate_hb_execs(provlog: ProvLog, process_graph: nx.DiGraph) -> list[str]: ret = list[str]() for node0 in process_graph.nodes(): pid0, eid0, tid0, op0 = node0 @@ -482,7 +468,7 @@ def validate_hb_execs(provlog: parser.ProvLog, process_graph: nx.DiGraph) -> lis return ret -def validate_hb_graph(processes: parser.ProvLog, hb_graph: nx.DiGraph) -> list[str]: +def validate_hb_graph(processes: ProvLog, hb_graph: nx.DiGraph) -> list[str]: ret = list[str]() # ret.extend(validate_hb_closes(processes, hb_graph)) ret.extend(validate_hb_waits(processes, hb_graph)) @@ -503,7 +489,7 @@ def relax_node(graph: nx.DiGraph, node: typing.Any) -> list[tuple[typing.Any, ty graph.remove_node(node) return ret -def color_hb_graph(prov_log: parser.ProvLog, process_graph: nx.DiGraph) -> None: +def color_hb_graph(prov_log: ProvLog, process_graph: nx.DiGraph) -> None: label_color_map = { EdgeLabels.EXEC: 'yellow', EdgeLabels.FORK_JOIN: 'red', diff --git a/probe_src/python/probe_py/manual/cli.py b/probe_py/probe_py/cli.py similarity index 99% rename from probe_src/python/probe_py/manual/cli.py rename to probe_py/probe_py/cli.py index 114c2f82..b4198aae 100644 --- a/probe_src/python/probe_py/manual/cli.py +++ b/probe_py/probe_py/cli.py @@ -1,18 +1,18 @@ from typing_extensions import Annotated import pathlib -import typer +import subprocess import shutil +import os +import typer +import tempfile import rich.console import rich.pretty -from ..generated.parser import parse_probe_log, parse_probe_log_ctx +from .parser import parse_probe_log, parse_probe_log_ctx from . import analysis from .workflows import MakefileGenerator -from .ssh_argparser import parse_ssh_args from . import file_closure from . import graph_utils -import subprocess -import os -import tempfile +from .ssh_argparser import parse_ssh_args import enum diff --git a/probe_src/python/probe_py/manual/consts.py b/probe_py/probe_py/consts.py similarity index 73% rename from probe_src/python/probe_py/manual/consts.py rename to probe_py/probe_py/consts.py index fbe136df..4250413e 100644 --- a/probe_src/python/probe_py/manual/consts.py +++ b/probe_py/probe_py/consts.py @@ -1,6 +1,9 @@ import typing +import xdg_base_dirs +PROBE_HOME: typing.Final = xdg_base_dirs.xdg_data_home() / "PROBE" + # echo -e '#include \nAT_FDCWD' | gcc -E - | tail --lines=1 AT_FDCWD: typing.Final = -100 diff --git a/probe_src/python/probe_py/manual/file_closure.py b/probe_py/probe_py/file_closure.py similarity index 98% rename from probe_src/python/probe_py/manual/file_closure.py rename to probe_py/probe_py/file_closure.py index 419e1425..48b409bf 100644 --- a/probe_src/python/probe_py/manual/file_closure.py +++ b/probe_py/probe_py/file_closure.py @@ -9,8 +9,8 @@ import warnings import pathlib import typing -from probe_py.generated.parser import ProvLog, InodeVersionLog -from probe_py.generated.ops import Path, ChdirOp, OpenOp, CloseOp, InitProcessOp, ExecOp +from .ptypes import ProvLog, InodeVersionLog +from .ops import Path, ChdirOp, OpenOp, CloseOp, InitProcessOp, ExecOp from .consts import AT_FDCWD diff --git a/probe_py/probe_py/graph_utils.py b/probe_py/probe_py/graph_utils.py new file mode 100644 index 00000000..10d70fd1 --- /dev/null +++ b/probe_py/probe_py/graph_utils.py @@ -0,0 +1,42 @@ +import typing +import pathlib +import networkx # type: ignore + + +_Node = typing.TypeVar("_Node") + + +if typing.TYPE_CHECKING: + DiGraph: typing.TypeAlias = networkx.DiGraph[_Node] +else: + class DiGraph(typing.Generic[_Node], networkx.DiGraph): + pass + + +def serialize_graph( + graph: DiGraph[_Node], + output: pathlib.Path, +) -> None: + pydot_graph = networkx.drawing.nx_pydot.to_pydot(graph) + if output.suffix == "dot": + pydot_graph.write_raw(output) + else: + pydot_graph.write_png(output) + + +def relax_node(graph: DiGraph[_Node], node: _Node) -> list[tuple[_Node, _Node]]: + """Remove node from graph and attach its predecessors to its successors""" + ret = list[tuple[typing.Any, typing.Any]]() + for predecessor in graph.predecessors(node): + for successor in graph.successors(node): + ret.append((predecessor, successor)) + graph.add_edge(predecessor, successor) + graph.remove_node(node) + return ret + + +def list_edges_from_start_node(graph: DiGraph[_Node], start_node: _Node) -> typing.Iterable[tuple[_Node, _Node]]: + all_edges = list(graph.edges()) + start_index = next(i for i, edge in enumerate(all_edges) if edge[0] == start_node) + ordered_edges = all_edges[start_index:] + all_edges[:start_index] + return ordered_edges diff --git a/probe_src/frontend/python/probe_py/generated/parser.py b/probe_py/probe_py/parser.py similarity index 70% rename from probe_src/frontend/python/probe_py/generated/parser.py rename to probe_py/probe_py/parser.py index fb46508b..fbe50fa1 100644 --- a/probe_src/frontend/python/probe_py/generated/parser.py +++ b/probe_py/probe_py/parser.py @@ -1,58 +1,14 @@ from __future__ import annotations -import os -import contextlib -import tempfile import pathlib import typing import json import tarfile -from dataclasses import dataclass, replace +import tempfile +import contextlib from . import ops +from .ptypes import ProvLog, InodeVersionLog, ThreadProvLog, ExecEpochProvLog, ProcessProvLog +from dataclasses import replace -@dataclass(frozen=True) -class ThreadProvLog: - tid: int - ops: typing.Sequence[ops.Op] - -@dataclass(frozen=True) -class ExecEpochProvLog: - epoch: int - threads: typing.Mapping[int, ThreadProvLog] - - -@dataclass(frozen=True) -class ProcessProvLog: - pid: int - exec_epochs: typing.Mapping[int, ExecEpochProvLog] - - -@dataclass(frozen=True) -class InodeVersionLog: - device_major: int - device_minor: int - inode: int - tv_sec: int - tv_nsec: int - size: int - - @staticmethod - def from_path(path: pathlib.Path) -> InodeVersionLog: - s = path.stat() - return InodeVersionLog( - os.major(s.st_dev), - os.minor(s.st_dev), - s.st_ino, - s.st_mtime_ns // int(1e9), - s.st_mtime_ns % int(1e9), - s.st_size, - ) - - -@dataclass(frozen=True) -class ProvLog: - processes: typing.Mapping[int, ProcessProvLog] - inodes: typing.Mapping[InodeVersionLog, pathlib.Path] - has_inodes: bool @contextlib.contextmanager def parse_probe_log_ctx( @@ -95,6 +51,7 @@ def parse_probe_log( with parse_probe_log_ctx(probe_log) as prov_log: return replace(prov_log, has_inodes=False, inodes={}) + def op_hook(json_map: typing.Dict[str, typing.Any]) -> typing.Any: ty: str = json_map["_type"] json_map.pop("_type") @@ -102,6 +59,7 @@ def op_hook(json_map: typing.Dict[str, typing.Any]) -> typing.Any: constructor = ops.__dict__[ty] # HACK: convert jsonlines' lists of integers into python byte types + # This is because json cannot actually represent byte strings, only unicode strings. for ident, ty in constructor.__annotations__.items(): if ty == "bytes" and ident in json_map: json_map[ident] = bytes(json_map[ident]) diff --git a/probe_py/probe_py/ptypes.py b/probe_py/probe_py/ptypes.py new file mode 100644 index 00000000..57f4daea --- /dev/null +++ b/probe_py/probe_py/ptypes.py @@ -0,0 +1,60 @@ +from __future__ import annotations +import pathlib +from . import ops +import os +from dataclasses import dataclass +import enum +import typing + + +@dataclass(frozen=True) +class ThreadProvLog: + tid: int + ops: typing.Sequence[ops.Op] + +@dataclass(frozen=True) +class ExecEpochProvLog: + epoch: int + threads: typing.Mapping[int, ThreadProvLog] + + +@dataclass(frozen=True) +class ProcessProvLog: + pid: int + exec_epochs: typing.Mapping[int, ExecEpochProvLog] + + +@dataclass(frozen=True) +class InodeVersionLog: + device_major: int + device_minor: int + inode: int + tv_sec: int + tv_nsec: int + size: int + + @staticmethod + def from_path(path: pathlib.Path) -> InodeVersionLog: + s = path.stat() + return InodeVersionLog( + os.major(s.st_dev), + os.minor(s.st_dev), + s.st_ino, + s.st_mtime_ns // int(1e9), + s.st_mtime_ns % int(1e9), + s.st_size, + ) + + +@dataclass(frozen=True) +class ProvLog: + processes: typing.Mapping[int, ProcessProvLog] + inodes: typing.Mapping[InodeVersionLog, pathlib.Path] + has_inodes: bool + +# TODO: implement this in probe_py.generated.ops +class TaskType(enum.IntEnum): + TASK_PID = 0 + TASK_TID = 1 + TASK_ISO_C_THREAD = 2 + TASK_PTHREAD = 3 diff --git a/probe_src/python/probe_py/manual/ssh_argparser.py b/probe_py/probe_py/ssh_argparser.py similarity index 99% rename from probe_src/python/probe_py/manual/ssh_argparser.py rename to probe_py/probe_py/ssh_argparser.py index 9c024e86..41414668 100644 --- a/probe_src/python/probe_py/manual/ssh_argparser.py +++ b/probe_py/probe_py/ssh_argparser.py @@ -49,4 +49,3 @@ def parse_ssh_args(ssh_args: list[str]) -> tuple[list[str], str, list[str]]: assert destination is not None return flags, destination, remote_host - diff --git a/probe_src/python/probe_py/manual/util.py b/probe_py/probe_py/util.py similarity index 100% rename from probe_src/python/probe_py/manual/util.py rename to probe_py/probe_py/util.py diff --git a/probe_src/python/probe_py/manual/workflows.py b/probe_py/probe_py/workflows.py similarity index 99% rename from probe_src/python/probe_py/manual/workflows.py rename to probe_py/probe_py/workflows.py index 4a93606a..6c60962d 100644 --- a/probe_src/python/probe_py/manual/workflows.py +++ b/probe_py/probe_py/workflows.py @@ -1,4 +1,4 @@ -from probe_py.manual.analysis import ProcessNode, FileNode +from probe_py.analysis import ProcessNode, FileNode import networkx as nx # type: ignore import abc from typing import List, Set, Optional diff --git a/probe_src/python/pyproject.toml b/probe_py/pyproject.toml similarity index 71% rename from probe_src/python/pyproject.toml rename to probe_py/pyproject.toml index 5b0d08b5..7cf31c50 100644 --- a/probe_src/python/pyproject.toml +++ b/probe_py/pyproject.toml @@ -3,17 +3,20 @@ requires = ["flit_core >=3.2,<4"] build-backend = "flit_core.buildapi" [project] -name = "probe_py.manual" +name = "probe_py" +version = "0.1.0" +description = "Python library and CLI extensions for PROBE" authors = [ {name = "Samuel Grayson", email = "sam@samgrayson.me"}, {name = "Shofiya Bootwala"}, {name = "Saleha Muzammil"}, + {name = "Asif Zubayer Palak"}, + {name = "Kyrillos Ishak"}, + {name = "Jenna Fligor"}, ] license = {file = "LICENSE"} classifiers = ["License :: OSI Approved :: MIT License"] -dynamic = ["version", "description"] dependencies = [ - "probe_py.generated", "networkx", "pygraphviz", "pydot", diff --git a/probe_src/tests/test_graph.py b/probe_py/tests/test_graph.py similarity index 98% rename from probe_src/tests/test_graph.py rename to probe_py/tests/test_graph.py index 60632cb9..11af9b78 100644 --- a/probe_src/tests/test_graph.py +++ b/probe_py/tests/test_graph.py @@ -1,8 +1,9 @@ import pytest import typing -from probe_py.generated.parser import ProvLog, parse_probe_log -from probe_py.generated.ops import OpenOp, CloneOp, ExecOp, InitProcessOp, InitExecEpochOp, CloseOp, WaitOp, Op -from probe_py.manual.analysis import provlog_to_digraph, validate_hb_graph +from probe_py.parser import parse_probe_log +from probe_py.ptypes import ProvLog +from probe_py.ops import OpenOp, CloneOp, ExecOp, InitProcessOp, InitExecEpochOp, CloseOp, WaitOp, Op +from probe_py.analysis import provlog_to_digraph, validate_hb_graph import pathlib import networkx as nx # type: ignore import subprocess diff --git a/test/ssh_wrapper_test/test_ssh_arg_parse.py b/probe_py/tests/test_ssh_arg_parse.py similarity index 71% rename from test/ssh_wrapper_test/test_ssh_arg_parse.py rename to probe_py/tests/test_ssh_arg_parse.py index bf2d335a..2b05e4f2 100644 --- a/test/ssh_wrapper_test/test_ssh_arg_parse.py +++ b/probe_py/tests/test_ssh_arg_parse.py @@ -1,5 +1,4 @@ -import sys -from probe_py.manual.ssh_argparser import parse_ssh_args +from probe_py.ssh_argparser import parse_ssh_args # List of test cases test_cases = [ @@ -14,16 +13,10 @@ (['-v', '-p', '22', '-A', 'user@host.com', 'uptime'], (['-v', '-p', '22', '-A'], 'user@host.com', ['uptime'])) ] -def run_test_cases(): +def run_test_cases() -> None: for i, (input_args, expected_output) in enumerate(test_cases): result = parse_ssh_args(input_args) - if result == expected_output: - print(f"Test case {i+1} passed!") - else: - print(f"Test case {i+1} failed!") - print(f"Input: {input_args}") - print(f"Expected: {expected_output}") - print(f"Got: {result}") + assert result == expected_output if __name__ == "__main__": diff --git a/probe_src/tests/test_workflow.py b/probe_py/tests/test_workflow.py similarity index 95% rename from probe_src/tests/test_workflow.py rename to probe_py/tests/test_workflow.py index 00c6cb35..b3e261b5 100644 --- a/probe_src/tests/test_workflow.py +++ b/probe_py/tests/test_workflow.py @@ -1,9 +1,9 @@ import re import pytest import pathlib -import networkx as nx -from probe_py.manual.analysis import FileNode, ProcessNode, InodeOnDevice -from probe_py.manual.workflows import NextflowGenerator +import networkx as nx # type: ignore +from probe_py.analysis import FileNode, ProcessNode, InodeOnDevice +from probe_py.workflows import NextflowGenerator tmpdir = pathlib.Path(__file__).resolve().parent / "tmp" diff --git a/probe_src/.gitignore b/probe_src/.gitignore deleted file mode 100644 index 4e6311be..00000000 --- a/probe_src/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -probe_log -.prov -__pycache__/ diff --git a/probe_src/frontend/frontend.nix b/probe_src/frontend/frontend.nix deleted file mode 100644 index f82fce6e..00000000 --- a/probe_src/frontend/frontend.nix +++ /dev/null @@ -1,164 +0,0 @@ -{ - pkgs, - craneLib, - rust-target, - advisory-db, - system, - python, - lib, -}: rec { - src = lib.cleanSource ./.; - filter = name: type: - !(builtins.any (x: x) [ - (lib.hasSuffix ".nix" name) - (lib.hasPrefix "." (builtins.baseNameOf name)) - ]); - - # Common arguments can be set here to avoid repeating them later - commonArgs = { - inherit src; - strictDeps = true; - - # all the crates in this workspace either use rust-bindgen or depend - # on local crate that does. - nativeBuildInputs = [ - pkgs.rustPlatform.bindgenHook - ]; - - # pygen needs to know where to write the python file - preConfigurePhases = [ - "pygenConfigPhase" - ]; - pygenConfigPhase = '' - export PYGEN_OUTFILE="$(realpath ./python/probe_py/generated/ops.py)" - ''; - - CARGO_BUILD_TARGET = rust-target; - CARGO_BUILD_RUSTFLAGS = "-C target-feature=+crt-static"; - CPATH = ../libprobe/include; - }; - - individualCrateArgs = - commonArgs - // { - # inherit cargoArtifacts; - inherit (craneLib.crateNameFromCargoToml {inherit src;}) version; - # disable tests since we'll run them all via cargo-nextest - doCheck = false; - }; - - packages = rec { - # Build *just* the cargo dependencies (of the entire workspace), - # so we can reuse all of that work (e.g. via cachix) when running in CI - # It is *highly* recommended to use something like cargo-hakari to avoid - # cache misses when building individual top-level-crates - cargoArtifacts = craneLib.buildDepsOnly commonArgs; - - # Build the top-level crates of the workspace as individual derivations. - # This allows consumers to only depend on (and build) only what they need. - # Though it is possible to build the entire workspace as a single derivation, - # so this is left up to you on how to organize things - probe-frontend = craneLib.buildPackage (individualCrateArgs - // { - pname = "probe-frontend"; - cargoExtraArgs = "-p probe_frontend"; - installPhase = '' - cp -r ./python/ $out - cp ./LICENSE $out/LICENSE - ''; - }); - probe-py-generated = let - workspace = (builtins.fromTOML (builtins.readFile ./Cargo.toml)).workspace; - # TODO: Simplify this - # Perhaps by folding the substituteAllFiles into probe-py-generated (upstream) or probe-py-frontend (downstream) - # Could we combine all the packages? - in - python.pkgs.buildPythonPackage rec { - src = pkgs.substituteAllFiles rec { - src = probe-frontend; - files = [ - "./pyproject.toml" - "./LICENSE" - "./probe_py/generated/__init__.py" - "./probe_py/generated/ops.py" - "./probe_py/generated/parser.py" - "./probe_py/generated/py.typed" - ]; - authors = builtins.concatStringsSep "" (builtins.map (match: let - name = builtins.elemAt match 0; - email = builtins.elemAt match 1; - in "\n {name = \"${name}\", email = \"${email}\"},") ( - builtins.map - (author-str: builtins.match "(.+) <(.+)>" author-str) - (workspace.package.authors) - )); - version = workspace.package.version; - }; - pname = "probe_py.generated"; - version = workspace.package.version; - pyproject = true; - build-system = [ - python.pkgs.flit-core - ]; - nativeCheckInputs = [ - python.pkgs.mypy - pkgs.ruff - ]; - # ruff, mypy - checkPhase = '' - runHook preCheck - python -c 'import probe_py.generated' - mypy --strict --package probe_py.generated - runHook postCheck - ''; - }; - - probe-cli = craneLib.buildPackage (individualCrateArgs - // { - pname = "probe-cli"; - cargoExtraArgs = "-p probe_cli"; - }); - probe-macros = craneLib.buildPackage (individualCrateArgs - // { - pname = "probe-macros"; - cargoExtraArgs = "-p probe_macros"; - }); - }; - checks = { - probe-workspace-clippy = craneLib.cargoClippy (commonArgs - // { - inherit (packages) cargoArtifacts; - cargoClippyExtraArgs = "--all-targets -- --deny warnings"; - }); - - probe-workspace-doc = craneLib.cargoDoc (commonArgs - // { - inherit (packages) cargoArtifacts; - }); - - # Check formatting - probe-workspace-fmt = craneLib.cargoFmt { - inherit src; - }; - - # Audit dependencies - probe-workspace-audit = craneLib.cargoAudit { - inherit src advisory-db; - }; - - # Audit licenses - probe-workspace-deny = craneLib.cargoDeny { - inherit src; - }; - - # Run tests with cargo-nextest - # this is why `doCheck = false` on the crate derivations, so as to not - # run the tests twice. - probe-workspace-nextest = craneLib.cargoNextest (commonArgs - // { - inherit (packages) cargoArtifacts; - partitions = 1; - partitionType = "count"; - }); - }; -} diff --git a/probe_src/frontend/python/probe_py/generated/__init__.py b/probe_src/frontend/python/probe_py/generated/__init__.py deleted file mode 100644 index 9f8e34d3..00000000 --- a/probe_src/frontend/python/probe_py/generated/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -""" -Generated code for reading with PROBE logs. - -See https://github.com/charmoniumQ/PROBE -""" - -__version__ = "@version@" diff --git a/probe_src/frontend/python/probe_py/generated/ops.py b/probe_src/frontend/python/probe_py/generated/ops.py deleted file mode 100644 index e4f97fdd..00000000 --- a/probe_src/frontend/python/probe_py/generated/ops.py +++ /dev/null @@ -1,233 +0,0 @@ -# This file was @generated by probe_macros -from __future__ import annotations -import typing -from dataclasses import dataclass - -# https://github.com/torvalds/linux/blob/73e931504f8e0d42978bfcda37b323dbbd1afc08/include/uapi/linux/fcntl.h#L98 -AT_FDCWD: int = -100 - -@dataclass(init=True, frozen=True) -class Timespec: - sec: int - nsec: int - - -@dataclass(init=True, frozen=True) -class StatxTimestamp: - sec: int - nsec: int - - -@dataclass(init=True, frozen=True) -class Timeval: - sec: int - usec: int - - -@dataclass(init=True, frozen=True) -class Rusage: - utime: Timeval - stime: Timeval - maxrss: int - ixrss: int - idrss: int - isrss: int - minflt: int - majflt: int - nswap: int - inblock: int - oublock: int - msgsnd: int - msgrcv: int - nsignals: int - nvcsw: int - nivcsw: int - - -@dataclass(init=True, frozen=True) -class Path: - dirfd_minus_at_fdcwd: int - path: bytes - device_major: int - device_minor: int - inode: int - mtime: StatxTimestamp - ctime: StatxTimestamp - size: int - stat_valid: bool - dirfd_valid: bool - - @property - def dirfd(self) -> int: - return self.dirfd_minus_at_fdcwd + AT_FDCWD - - -@dataclass(init=True, frozen=True) -class InitProcessOp: - pid: int - is_root: bool - cwd: Path - - -@dataclass(init=True, frozen=True) -class InitExecEpochOp: - epoch: int - program_name: bytes - - -@dataclass(init=True, frozen=True) -class InitThreadOp: - tid: int - - -@dataclass(init=True, frozen=True) -class OpenOp: - path: Path - flags: int - mode: int - fd: int - ferrno: int - - -@dataclass(init=True, frozen=True) -class CloseOp: - low_fd: int - high_fd: int - ferrno: int - - -@dataclass(init=True, frozen=True) -class ChdirOp: - path: Path - ferrno: int - - -@dataclass(init=True, frozen=True) -class ExecOp: - path: Path - ferrno: int - argc: int - argv: list[bytes, ] - envc: int - env: list[bytes, ] - - -@dataclass(init=True, frozen=True) -class CloneOp: - flags: int - run_pthread_atfork_handlers: bool - task_type: int - task_id: int - ferrno: int - - -@dataclass(init=True, frozen=True) -class ExitOp: - status: int - run_atexit_handlers: bool - - -@dataclass(init=True, frozen=True) -class AccessOp: - path: Path - mode: int - flags: int - ferrno: int - - -@dataclass(init=True, frozen=True) -class StatResult: - mask: int - nlink: int - uid: int - gid: int - mode: int - ino: int - size: int - blocks: int - blksize: int - atime: StatxTimestamp - btime: StatxTimestamp - ctime: StatxTimestamp - mtime: StatxTimestamp - dev_major: int - dev_minor: int - - -@dataclass(init=True, frozen=True) -class StatOp: - path: Path - flags: int - ferrno: int - stat_result: StatResult - - -@dataclass(init=True, frozen=True) -class ReaddirOp: - dir: Path - child: bytes - all_children: bool - ferrno: int - - -@dataclass(init=True, frozen=True) -class WaitOp: - task_type: int - task_id: int - options: int - status: int - ferrno: int - - -@dataclass(init=True, frozen=True) -class GetRUsageOp: - waitpid_arg: int - getrusage_arg: int - usage: Rusage - ferrno: int - - -@dataclass(init=True, frozen=True) -class ReadLinkOp: - path: Path - resolved: bytes - ferrno: int - - -@dataclass(init=True, frozen=True) -class UpdateMetadataOp: - path: Path - flags: int - metadata: Metadata - ferrno: int - - -@dataclass(init=True, frozen=True) -class Op: - data: OpInternal - time: Timespec - pthread_id: int - iso_c_thread_id: int - - -@dataclass(init=True, frozen=True) -class Mode: - mode: int - - -@dataclass(init=True, frozen=True) -class Ownership: - uid: int - gid: int - - -@dataclass(init=True, frozen=True) -class Times: - is_null: bool - atime: Timeval - mtime: Timeval - - -Metadata: typing.TypeAlias = Mode | Ownership | Times -OpInternal: typing.TypeAlias = InitProcessOp | InitExecEpochOp | InitThreadOp | OpenOp | CloseOp | ChdirOp | ExecOp | CloneOp | ExitOp | AccessOp | StatOp | ReaddirOp | WaitOp | GetRUsageOp | UpdateMetadataOp | ReadLinkOp - diff --git a/probe_src/frontend/python/probe_py/generated/py.typed b/probe_src/frontend/python/probe_py/generated/py.typed deleted file mode 100644 index e69de29b..00000000 diff --git a/probe_src/libprobe/.gitignore b/probe_src/libprobe/.gitignore deleted file mode 100644 index 567609b1..00000000 --- a/probe_src/libprobe/.gitignore +++ /dev/null @@ -1 +0,0 @@ -build/ diff --git a/probe_src/libprobe/generated/libc_hooks.c b/probe_src/libprobe/generated/libc_hooks.c deleted file mode 100644 index e1610207..00000000 --- a/probe_src/libprobe/generated/libc_hooks.c +++ /dev/null @@ -1,2381 +0,0 @@ -void init_function_pointers() -{ - unwrapped_fopen = dlsym(RTLD_NEXT, "fopen"); - unwrapped_freopen = dlsym(RTLD_NEXT, "freopen"); - unwrapped_fclose = dlsym(RTLD_NEXT, "fclose"); - unwrapped_fcloseall = dlsym(RTLD_NEXT, "fcloseall"); - unwrapped_openat = dlsym(RTLD_NEXT, "openat"); - unwrapped_open = dlsym(RTLD_NEXT, "open"); - unwrapped_creat = dlsym(RTLD_NEXT, "creat"); - unwrapped_close = dlsym(RTLD_NEXT, "close"); - unwrapped_close_range = dlsym(RTLD_NEXT, "close_range"); - unwrapped_closefrom = dlsym(RTLD_NEXT, "closefrom"); - unwrapped_dup = dlsym(RTLD_NEXT, "dup"); - unwrapped_dup2 = dlsym(RTLD_NEXT, "dup2"); - unwrapped_dup3 = dlsym(RTLD_NEXT, "dup3"); - unwrapped_fcntl = dlsym(RTLD_NEXT, "fcntl"); - unwrapped_chdir = dlsym(RTLD_NEXT, "chdir"); - unwrapped_fchdir = dlsym(RTLD_NEXT, "fchdir"); - unwrapped_opendir = dlsym(RTLD_NEXT, "opendir"); - unwrapped_fdopendir = dlsym(RTLD_NEXT, "fdopendir"); - unwrapped_readdir = dlsym(RTLD_NEXT, "readdir"); - unwrapped_readdir_r = dlsym(RTLD_NEXT, "readdir_r"); - unwrapped_readdir64 = dlsym(RTLD_NEXT, "readdir64"); - unwrapped_readdir64_r = dlsym(RTLD_NEXT, "readdir64_r"); - unwrapped_closedir = dlsym(RTLD_NEXT, "closedir"); - unwrapped_rewinddir = dlsym(RTLD_NEXT, "rewinddir"); - unwrapped_telldir = dlsym(RTLD_NEXT, "telldir"); - unwrapped_seekdir = dlsym(RTLD_NEXT, "seekdir"); - unwrapped_scandir = dlsym(RTLD_NEXT, "scandir"); - unwrapped_scandir64 = dlsym(RTLD_NEXT, "scandir64"); - unwrapped_scandirat = dlsym(RTLD_NEXT, "scandirat"); - unwrapped_getdents64 = dlsym(RTLD_NEXT, "getdents64"); - unwrapped_ftw = dlsym(RTLD_NEXT, "ftw"); - unwrapped_ftw64 = dlsym(RTLD_NEXT, "ftw64"); - unwrapped_nftw = dlsym(RTLD_NEXT, "nftw"); - unwrapped_nftw64 = dlsym(RTLD_NEXT, "nftw64"); - unwrapped_link = dlsym(RTLD_NEXT, "link"); - unwrapped_linkat = dlsym(RTLD_NEXT, "linkat"); - unwrapped_symlink = dlsym(RTLD_NEXT, "symlink"); - unwrapped_symlinkat = dlsym(RTLD_NEXT, "symlinkat"); - unwrapped_readlink = dlsym(RTLD_NEXT, "readlink"); - unwrapped_readlinkat = dlsym(RTLD_NEXT, "readlinkat"); - unwrapped_canonicalize_file_name = dlsym(RTLD_NEXT, "canonicalize_file_name"); - unwrapped_realpath = dlsym(RTLD_NEXT, "realpath"); - unwrapped_unlink = dlsym(RTLD_NEXT, "unlink"); - unwrapped_rmdir = dlsym(RTLD_NEXT, "rmdir"); - unwrapped_remove = dlsym(RTLD_NEXT, "remove"); - unwrapped_rename = dlsym(RTLD_NEXT, "rename"); - unwrapped_mkdir = dlsym(RTLD_NEXT, "mkdir"); - unwrapped_mkdirat = dlsym(RTLD_NEXT, "mkdirat"); - unwrapped_stat = dlsym(RTLD_NEXT, "stat"); - unwrapped_stat64 = dlsym(RTLD_NEXT, "stat64"); - unwrapped_fstat = dlsym(RTLD_NEXT, "fstat"); - unwrapped_fstat64 = dlsym(RTLD_NEXT, "fstat64"); - unwrapped_lstat = dlsym(RTLD_NEXT, "lstat"); - unwrapped_lstat64 = dlsym(RTLD_NEXT, "lstat64"); - unwrapped_statx = dlsym(RTLD_NEXT, "statx"); - unwrapped_fstatat = dlsym(RTLD_NEXT, "fstatat"); - unwrapped_fstatat64 = dlsym(RTLD_NEXT, "fstatat64"); - unwrapped_chown = dlsym(RTLD_NEXT, "chown"); - unwrapped_fchown = dlsym(RTLD_NEXT, "fchown"); - unwrapped_lchown = dlsym(RTLD_NEXT, "lchown"); - unwrapped_fchownat = dlsym(RTLD_NEXT, "fchownat"); - unwrapped_chmod = dlsym(RTLD_NEXT, "chmod"); - unwrapped_fchmod = dlsym(RTLD_NEXT, "fchmod"); - unwrapped_fchmodat = dlsym(RTLD_NEXT, "fchmodat"); - unwrapped_access = dlsym(RTLD_NEXT, "access"); - unwrapped_faccessat = dlsym(RTLD_NEXT, "faccessat"); - unwrapped_utime = dlsym(RTLD_NEXT, "utime"); - unwrapped_utimes = dlsym(RTLD_NEXT, "utimes"); - unwrapped_lutimes = dlsym(RTLD_NEXT, "lutimes"); - unwrapped_futimes = dlsym(RTLD_NEXT, "futimes"); - unwrapped_truncate = dlsym(RTLD_NEXT, "truncate"); - unwrapped_truncate64 = dlsym(RTLD_NEXT, "truncate64"); - unwrapped_ftruncate = dlsym(RTLD_NEXT, "ftruncate"); - unwrapped_ftruncate64 = dlsym(RTLD_NEXT, "ftruncate64"); - unwrapped_mknod = dlsym(RTLD_NEXT, "mknod"); - unwrapped_tmpfile = dlsym(RTLD_NEXT, "tmpfile"); - unwrapped_tmpfile64 = dlsym(RTLD_NEXT, "tmpfile64"); - unwrapped_tmpnam = dlsym(RTLD_NEXT, "tmpnam"); - unwrapped_tmpnam_r = dlsym(RTLD_NEXT, "tmpnam_r"); - unwrapped_tempnam = dlsym(RTLD_NEXT, "tempnam"); - unwrapped_mktemp = dlsym(RTLD_NEXT, "mktemp"); - unwrapped_mkstemp = dlsym(RTLD_NEXT, "mkstemp"); - unwrapped_mkdtemp = dlsym(RTLD_NEXT, "mkdtemp"); - unwrapped_execv = dlsym(RTLD_NEXT, "execv"); - unwrapped_execl = dlsym(RTLD_NEXT, "execl"); - unwrapped_execve = dlsym(RTLD_NEXT, "execve"); - unwrapped_fexecve = dlsym(RTLD_NEXT, "fexecve"); - unwrapped_execle = dlsym(RTLD_NEXT, "execle"); - unwrapped_execvp = dlsym(RTLD_NEXT, "execvp"); - unwrapped_execlp = dlsym(RTLD_NEXT, "execlp"); - unwrapped_execvpe = dlsym(RTLD_NEXT, "execvpe"); - unwrapped_fork = dlsym(RTLD_NEXT, "fork"); - unwrapped__Fork = dlsym(RTLD_NEXT, "_Fork"); - unwrapped_vfork = dlsym(RTLD_NEXT, "vfork"); - unwrapped_clone = dlsym(RTLD_NEXT, "clone"); - unwrapped_waitpid = dlsym(RTLD_NEXT, "waitpid"); - unwrapped_wait = dlsym(RTLD_NEXT, "wait"); - unwrapped_wait4 = dlsym(RTLD_NEXT, "wait4"); - unwrapped_wait3 = dlsym(RTLD_NEXT, "wait3"); - unwrapped_waitid = dlsym(RTLD_NEXT, "waitid"); - unwrapped_thrd_create = dlsym(RTLD_NEXT, "thrd_create"); - unwrapped_thrd_join = dlsym(RTLD_NEXT, "thrd_join"); - unwrapped_pthread_create = dlsym(RTLD_NEXT, "pthread_create"); - unwrapped_pthread_join = dlsym(RTLD_NEXT, "pthread_join"); - unwrapped_fopen64 = dlsym(RTLD_NEXT, "fopen64"); - unwrapped_freopen64 = dlsym(RTLD_NEXT, "freopen64"); - unwrapped_openat64 = dlsym(RTLD_NEXT, "openat64"); - unwrapped_open64 = dlsym(RTLD_NEXT, "open64"); - unwrapped_create64 = dlsym(RTLD_NEXT, "create64"); -} - -FILE * fopen(const char *filename, const char *opentype) -{ - maybe_init_thread(); - struct Op op = {open_op_code, {.open = {.path = create_path_lazy(AT_FDCWD, filename, 0), .flags = fopen_to_flags(opentype), .mode = 0, .fd = -1, .ferrno = 0}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - } - FILE * ret = unwrapped_fopen(filename, opentype); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - if (ret == NULL) - { - op.data.open.ferrno = saved_errno; - } - else - { - op.data.open.fd = fileno(ret); - } - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -FILE * freopen(const char *filename, const char *opentype, FILE *stream) -{ - maybe_init_thread(); - int original_fd = fileno(stream); - struct Op open_op = {open_op_code, {.open = {.path = create_path_lazy(AT_FDCWD, filename, 0), .flags = fopen_to_flags(opentype), .mode = 0, .fd = -1, .ferrno = 0}}, {0}, 0, 0}; - struct Op close_op = {close_op_code, {.close = {original_fd, original_fd, 0}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(open_op); - prov_log_try(close_op); - } - FILE * ret = unwrapped_freopen(filename, opentype, stream); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - if (ret == NULL) - { - open_op.data.open.ferrno = saved_errno; - close_op.data.close.ferrno = saved_errno; - } - else - { - open_op.data.open.fd = fileno(ret); - } - prov_log_record(open_op); - prov_log_record(close_op); - } - errno = saved_errno; - return ret; -} - -int fclose(FILE *stream) -{ - maybe_init_thread(); - int fd = fileno(stream); - struct Op op = {close_op_code, {.close = {fd, fd, 0}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - } - int ret = unwrapped_fclose(stream); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - op.data.close.ferrno = (ret == 0) ? (0) : (errno); - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -int fcloseall() -{ - maybe_init_thread(); - struct Op op = {close_op_code, {.close = {0, INT_MAX, 0}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - } - int ret = unwrapped_fcloseall(); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - op.data.close.ferrno = (ret == 0) ? (0) : (errno); - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -int openat(int dirfd, const char *filename, int flags, ...) -{ - maybe_init_thread(); - bool has_mode_arg = ((flags & O_CREAT) != 0) || ((flags & __O_TMPFILE) == __O_TMPFILE); - struct Op op = {open_op_code, {.open = {.path = create_path_lazy(dirfd, filename, (flags & O_NOFOLLOW) ? (AT_SYMLINK_NOFOLLOW) : (0)), .flags = flags, .mode = 0, .fd = -1, .ferrno = 0}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - if (has_mode_arg) - { - va_list ap; - va_start(ap, flags); - op.data.open.mode = va_arg(ap, __type_mode_t); - va_end(ap); - } - prov_log_try(op); - } - size_t varargs_size = (((sizeof(dirfd)) + (sizeof(filename))) + (sizeof(flags))) + ((has_mode_arg) ? (sizeof(mode_t)) : (0)); - int ret = *((int *) __builtin_apply((void (*)()) unwrapped_openat, __builtin_apply_args(), varargs_size)); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - op.data.open.ferrno = (unlikely(ret == (-1))) ? (errno) : (0); - op.data.open.fd = ret; - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -int open(const char *filename, int flags, ...) -{ - maybe_init_thread(); - bool has_mode_arg = ((flags & O_CREAT) != 0) || ((flags & __O_TMPFILE) == __O_TMPFILE); - struct Op op = {open_op_code, {.open = {.path = create_path_lazy(AT_FDCWD, filename, (flags & O_NOFOLLOW) ? (AT_SYMLINK_NOFOLLOW) : (0)), .flags = flags, .mode = 0, .fd = -1, .ferrno = 0}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - if (has_mode_arg) - { - va_list ap; - va_start(ap, flags); - op.data.open.mode = va_arg(ap, __type_mode_t); - va_end(ap); - } - prov_log_try(op); - } - size_t varargs_size = ((sizeof(filename)) + (sizeof(flags))) + ((has_mode_arg) ? (sizeof(mode_t)) : (0)); - int ret = *((int *) __builtin_apply((void (*)()) unwrapped_open, __builtin_apply_args(), varargs_size)); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - op.data.open.ferrno = (unlikely(ret == (-1))) ? (errno) : (0); - op.data.open.fd = ret; - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -int creat(const char *filename, mode_t mode) -{ - maybe_init_thread(); - struct Op op = {open_op_code, {.open = {.path = create_path_lazy(AT_FDCWD, filename, 0), .flags = (O_WRONLY | O_CREAT) | O_TRUNC, .mode = mode, .fd = -1, .ferrno = 0}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - } - int ret = unwrapped_creat(filename, mode); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - op.data.open.ferrno = (unlikely(ret == (-1))) ? (errno) : (0); - op.data.open.fd = ret; - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -int close(int filedes) -{ - maybe_init_thread(); - struct Op op = {close_op_code, {.close = {filedes, filedes, 0}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - } - int ret = unwrapped_close(filedes); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - op.data.close.ferrno = (ret == 0) ? (0) : (errno); - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -int close_range(unsigned int lowfd, unsigned int maxfd, int flags) -{ - maybe_init_thread(); - if (flags != 0) - { - NOT_IMPLEMENTED("I don't know how to handle close_rnage flags yet"); - } - struct Op op = {close_op_code, {.close = {lowfd, maxfd, 0}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - } - int ret = unwrapped_close_range(lowfd, maxfd, flags); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - op.data.close.ferrno = (ret == 0) ? (0) : (errno); - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -void closefrom(int lowfd) -{ - maybe_init_thread(); - struct Op op = {close_op_code, {.close = {lowfd, INT_MAX, 0}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - } - unwrapped_closefrom(lowfd); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - prov_log_record(op); - } - errno = saved_errno; -} - -int dup(int old) -{ - maybe_init_thread(); - int ret = unwrapped_dup(old); - return ret; -} - -int dup2(int old, int new) -{ - maybe_init_thread(); - int ret = unwrapped_dup2(old, new); - return ret; -} - -int dup3(int old, int new, int flags) -{ - maybe_init_thread(); - int ret = unwrapped_dup3(old, new, flags); - return ret; -} - -int fcntl(int filedes, int command, ...) -{ - maybe_init_thread(); - bool int_arg = (((((((((command == F_DUPFD) || (command == F_DUPFD_CLOEXEC)) || (command == F_SETFD)) || (command == F_SETFL)) || (command == F_SETOWN)) || (command == F_SETSIG)) || (command == F_SETLEASE)) || (command == F_NOTIFY)) || (command == F_SETPIPE_SZ)) || (command == F_ADD_SEALS); - bool ptr_arg = ((((((((command == F_SETLK) || (command == F_SETLKW)) || (command == F_GETLK)) || (command == F_GETOWN_EX)) || (command == F_SETOWN_EX)) || (command == F_GET_RW_HINT)) || (command == F_SET_RW_HINT)) || (command == F_GET_FILE_RW_HINT)) || (command == F_SET_FILE_RW_HINT); - assert((!int_arg) || (!ptr_arg)); - size_t varargs_size = ((sizeof(filedes)) + (sizeof(command))) + ((int_arg) ? (sizeof(int)) : ((ptr_arg) ? (sizeof(void *)) : (0))); - int ret = *((int *) __builtin_apply((void (*)()) unwrapped_fcntl, __builtin_apply_args(), varargs_size)); - return ret; -} - -int chdir(const char *filename) -{ - maybe_init_thread(); - struct Op op = {chdir_op_code, {.chdir = {.path = create_path_lazy(AT_FDCWD, filename, 0), .ferrno = 0}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - } - int ret = unwrapped_chdir(filename); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - op.data.chdir.ferrno = (ret == 0) ? (0) : (errno); - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -int fchdir(int filedes) -{ - maybe_init_thread(); - struct Op op = {chdir_op_code, {.chdir = {.path = create_path_lazy(filedes, "", AT_EMPTY_PATH), .ferrno = 0}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - } - int ret = unwrapped_fchdir(filedes); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - op.data.chdir.ferrno = (ret == 0) ? (0) : (errno); - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -DIR * opendir(const char *dirname) -{ - maybe_init_thread(); - struct Op op = {open_op_code, {.open = {.path = create_path_lazy(AT_FDCWD, dirname, 0), .flags = (O_RDONLY | O_DIRECTORY) | O_CLOEXEC, .mode = 0, .fd = -1, .ferrno = 0}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - } - DIR * ret = unwrapped_opendir(dirname); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - op.data.open.ferrno = (ret == NULL) ? (errno) : (0); - op.data.open.fd = try_dirfd(ret); - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -DIR * fdopendir(int fd) -{ - maybe_init_thread(); - struct Op op = {open_op_code, {.open = {.path = create_path_lazy(fd, "", AT_EMPTY_PATH), .flags = (O_RDONLY | O_DIRECTORY) | O_CLOEXEC, .mode = 0, .fd = -1, .ferrno = 0}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - } - DIR * ret = unwrapped_fdopendir(fd); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - op.data.open.ferrno = (ret == NULL) ? (errno) : (0); - op.data.open.fd = try_dirfd(ret); - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -struct dirent * readdir(DIR *dirstream) -{ - maybe_init_thread(); - int fd = try_dirfd(dirstream); - struct Op op = {readdir_op_code, {.readdir = {.dir = create_path_lazy(fd, "", AT_EMPTY_PATH), .child = NULL, .all_children = false, .ferrno = 0}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - } - struct dirent * ret = unwrapped_readdir(dirstream); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - if (ret == NULL) - { - op.data.readdir.ferrno = saved_errno; - } - else - { - op.data.readdir.child = arena_strndup(get_data_arena(), ret->d_name, sizeof(ret->d_name)); - } - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -int readdir_r(DIR *dirstream, struct dirent *entry, struct dirent **result) -{ - maybe_init_thread(); - int fd = try_dirfd(dirstream); - struct Op op = {readdir_op_code, {.readdir = {.dir = create_path_lazy(fd, "", AT_EMPTY_PATH), .child = NULL, .all_children = false, .ferrno = 0}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - } - int ret = unwrapped_readdir_r(dirstream, entry, result); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - if ((*result) == NULL) - { - op.data.readdir.ferrno = saved_errno; - } - else - { - op.data.readdir.child = arena_strndup(get_data_arena(), entry->d_name, sizeof(entry->d_name)); - } - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -struct dirent64 * readdir64(DIR *dirstream) -{ - maybe_init_thread(); - int fd = try_dirfd(dirstream); - struct Op op = {readdir_op_code, {.readdir = {.dir = create_path_lazy(fd, "", AT_EMPTY_PATH), .child = NULL, .all_children = false, .ferrno = 0}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - } - struct dirent64 * ret = unwrapped_readdir64(dirstream); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - if (ret == NULL) - { - op.data.readdir.ferrno = saved_errno; - } - else - { - op.data.readdir.child = arena_strndup(get_data_arena(), ret->d_name, sizeof(ret->d_name)); - } - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -int readdir64_r(DIR *dirstream, struct dirent64 *entry, struct dirent64 **result) -{ - maybe_init_thread(); - int fd = try_dirfd(dirstream); - struct Op op = {readdir_op_code, {.readdir = {.dir = create_path_lazy(fd, "", AT_EMPTY_PATH), .child = NULL, .all_children = false, .ferrno = 0}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - } - int ret = unwrapped_readdir64_r(dirstream, entry, result); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - if ((*result) == NULL) - { - op.data.readdir.ferrno = saved_errno; - } - else - { - op.data.readdir.child = arena_strndup(get_data_arena(), entry->d_name, sizeof(entry->d_name)); - } - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -int closedir(DIR *dirstream) -{ - maybe_init_thread(); - int fd = try_dirfd(dirstream); - struct Op op = {close_op_code, {.close = {fd, fd, 0}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - } - int ret = unwrapped_closedir(dirstream); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - op.data.close.ferrno = (ret == 0) ? (0) : (errno); - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -void rewinddir(DIR *dirstream) -{ - maybe_init_thread(); - unwrapped_rewinddir(dirstream); -} - -long int telldir(DIR *dirstream) -{ - maybe_init_thread(); - long int ret = unwrapped_telldir(dirstream); - return ret; -} - -void seekdir(DIR *dirstream, long int pos) -{ - maybe_init_thread(); - unwrapped_seekdir(dirstream, pos); -} - -int scandir(const char *dir, struct dirent ***namelist, int (*selector)(const struct dirent *), int (*cmp)(const struct dirent **, const struct dirent **)) -{ - maybe_init_thread(); - struct Op op = {readdir_op_code, {.readdir = {.dir = create_path_lazy(AT_FDCWD, dir, 0), .child = NULL, .all_children = true}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - } - int ret = unwrapped_scandir(dir, namelist, selector, cmp); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - if (ret != 0) - { - op.data.readdir.ferrno = saved_errno; - } - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -int scandir64(const char *dir, struct dirent64 ***namelist, int (*selector)(const struct dirent64 *), int (*cmp)(const struct dirent64 **, const struct dirent64 **)) -{ - maybe_init_thread(); - struct Op op = {readdir_op_code, {.readdir = {.dir = create_path_lazy(AT_FDCWD, dir, 0), .child = NULL, .all_children = true}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - } - int ret = unwrapped_scandir64(dir, namelist, selector, cmp); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - if (ret != 0) - { - op.data.readdir.ferrno = saved_errno; - } - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -int scandirat(int dirfd, const char * restrict dirp, struct dirent *** restrict namelist, int (*filter)(const struct dirent *), int (*compar)(const struct dirent **, const struct dirent **)) -{ - maybe_init_thread(); - struct Op op = {readdir_op_code, {.readdir = {.dir = create_path_lazy(dirfd, dirp, 0), .child = NULL, .all_children = true}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - } - int ret = unwrapped_scandirat(dirfd, dirp, namelist, filter, compar); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - if (ret != 0) - { - op.data.readdir.ferrno = saved_errno; - } - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -ssize_t getdents64(int fd, void *buffer, size_t length) -{ - maybe_init_thread(); - struct Op op = {readdir_op_code, {.readdir = {.dir = create_path_lazy(fd, "", AT_EMPTY_PATH), .child = NULL, .all_children = true}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - } - ssize_t ret = unwrapped_getdents64(fd, buffer, length); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - if (unlikely(ret == (-1))) - { - op.data.readdir.ferrno = saved_errno; - } - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -int ftw(const char *filename, __ftw_func_t func, int descriptors) -{ - maybe_init_thread(); - struct Op op = {readdir_op_code, {.readdir = {.dir = create_path_lazy(AT_FDCWD, filename, 0), .child = NULL, .all_children = true}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - } - int ret = unwrapped_ftw(filename, func, descriptors); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - if (ret != 0) - { - op.data.readdir.ferrno = saved_errno; - } - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -int ftw64(const char *filename, __ftw64_func_t func, int descriptors) -{ - maybe_init_thread(); - struct Op op = {readdir_op_code, {.readdir = {.dir = create_path_lazy(AT_FDCWD, filename, 0), .child = NULL, .all_children = true}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - } - int ret = unwrapped_ftw64(filename, func, descriptors); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - if (ret != 0) - { - op.data.readdir.ferrno = saved_errno; - } - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -int nftw(const char *filename, __nftw_func_t func, int descriptors, int flag) -{ - maybe_init_thread(); - struct Op op = {readdir_op_code, {.readdir = {.dir = create_path_lazy(AT_FDCWD, filename, 0), .child = NULL, .all_children = true}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - } - int ret = unwrapped_nftw(filename, func, descriptors, flag); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - if (ret != 0) - { - op.data.readdir.ferrno = saved_errno; - } - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -int nftw64(const char *filename, __nftw64_func_t func, int descriptors, int flag) -{ - maybe_init_thread(); - struct Op op = {readdir_op_code, {.readdir = {.dir = create_path_lazy(AT_FDCWD, filename, 0), .child = NULL, .all_children = true}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - } - int ret = unwrapped_nftw64(filename, func, descriptors, flag); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - if (ret != 0) - { - op.data.readdir.ferrno = saved_errno; - } - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -int link(const char *oldname, const char *newname) -{ - maybe_init_thread(); - int ret = unwrapped_link(oldname, newname); - return ret; -} - -int linkat(int oldfd, const char *oldname, int newfd, const char *newname, int flags) -{ - maybe_init_thread(); - int ret = unwrapped_linkat(oldfd, oldname, newfd, newname, flags); - return ret; -} - -int symlink(const char *oldname, const char *newname) -{ - maybe_init_thread(); - int ret = unwrapped_symlink(oldname, newname); - return ret; -} - -int symlinkat(const char *target, int newdirfd, const char *linkpath) -{ - maybe_init_thread(); - int ret = unwrapped_symlinkat(target, newdirfd, linkpath); - return ret; -} - -ssize_t readlink(const char *filename, char *buffer, size_t size) -{ - maybe_init_thread(); - ssize_t ret = unwrapped_readlink(filename, buffer, size); - return ret; -} - -ssize_t readlinkat(int dirfd, const char *filename, char *buffer, size_t size) -{ - maybe_init_thread(); - ssize_t ret = unwrapped_readlinkat(dirfd, filename, buffer, size); - return ret; -} - -char * canonicalize_file_name(const char *name) -{ - maybe_init_thread(); - char * ret = unwrapped_canonicalize_file_name(name); - return ret; -} - -char * realpath(const char * restrict name, char * restrict resolved) -{ - maybe_init_thread(); - char * ret = unwrapped_realpath(name, resolved); - return ret; -} - -int unlink(const char *filename) -{ - maybe_init_thread(); - int ret = unwrapped_unlink(filename); - return ret; -} - -int rmdir(const char *filename) -{ - maybe_init_thread(); - int ret = unwrapped_rmdir(filename); - return ret; -} - -int remove(const char *filename) -{ - maybe_init_thread(); - int ret = unwrapped_remove(filename); - return ret; -} - -int rename(const char *oldname, const char *newname) -{ - maybe_init_thread(); - int ret = unwrapped_rename(oldname, newname); - return ret; -} - -int mkdir(const char *filename, mode_t mode) -{ - maybe_init_thread(); - int ret = unwrapped_mkdir(filename, mode); - return ret; -} - -int mkdirat(int dirfd, const char *pathname, mode_t mode) -{ - maybe_init_thread(); - int ret = unwrapped_mkdirat(dirfd, pathname, mode); - return ret; -} - -int stat(const char *filename, struct stat *buf) -{ - maybe_init_thread(); - struct Op op = {stat_op_code, {.stat = {.path = create_path_lazy(AT_FDCWD, filename, 0), .flags = 0, .ferrno = 0, .stat_result = {0}}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - } - int ret = unwrapped_stat(filename, buf); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - if (ret != 0) - { - op.data.stat.ferrno = saved_errno; - } - else - { - stat_result_from_stat(&op.data.stat.stat_result, buf); - } - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -int stat64(const char *filename, struct stat64 *buf) -{ - maybe_init_thread(); - struct Op op = {stat_op_code, {.stat = {.path = create_path_lazy(AT_FDCWD, filename, 0), .flags = 0, .stat_result = {0}, .ferrno = 0}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - } - int ret = unwrapped_stat64(filename, buf); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - if (ret != 0) - { - op.data.stat.ferrno = saved_errno; - } - else - { - stat_result_from_stat64(&op.data.stat.stat_result, buf); - } - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -int fstat(int filedes, struct stat *buf) -{ - maybe_init_thread(); - struct Op op = {stat_op_code, {.stat = {.path = create_path_lazy(filedes, "", AT_EMPTY_PATH), .flags = 0, .stat_result = {0}, .ferrno = 0}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - } - int ret = unwrapped_fstat(filedes, buf); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - if (ret != 0) - { - op.data.stat.ferrno = saved_errno; - } - else - { - stat_result_from_stat(&op.data.stat.stat_result, buf); - } - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -int fstat64(int filedes, struct stat64 * restrict buf) -{ - maybe_init_thread(); - struct Op op = {stat_op_code, {.stat = {.path = create_path_lazy(filedes, "", AT_EMPTY_PATH), .flags = 0, .stat_result = {0}, .ferrno = 0}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - } - int ret = unwrapped_fstat64(filedes, buf); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - if (ret != 0) - { - op.data.stat.ferrno = saved_errno; - } - else - { - stat_result_from_stat64(&op.data.stat.stat_result, buf); - } - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -int lstat(const char *filename, struct stat *buf) -{ - maybe_init_thread(); - struct Op op = {stat_op_code, {.stat = {.path = create_path_lazy(AT_FDCWD, filename, AT_SYMLINK_NOFOLLOW), .flags = AT_SYMLINK_NOFOLLOW, .stat_result = {0}, .ferrno = 0}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - } - int ret = unwrapped_lstat(filename, buf); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - if (ret != 0) - { - op.data.stat.ferrno = saved_errno; - } - else - { - stat_result_from_stat(&op.data.stat.stat_result, buf); - } - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -int lstat64(const char *filename, struct stat64 *buf) -{ - maybe_init_thread(); - struct Op op = {stat_op_code, {.stat = {.path = create_path_lazy(AT_FDCWD, filename, AT_SYMLINK_NOFOLLOW), .flags = AT_SYMLINK_NOFOLLOW, .stat_result = {0}, .ferrno = 0}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - } - int ret = unwrapped_lstat64(filename, buf); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - if (ret != 0) - { - op.data.readdir.ferrno = saved_errno; - } - else - { - stat_result_from_stat64(&op.data.stat.stat_result, buf); - } - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -int statx(int dirfd, const char * restrict pathname, int flags, unsigned int mask, struct statx * restrict statxbuf) -{ - maybe_init_thread(); - struct Op op = {stat_op_code, {.stat = {.path = create_path_lazy(dirfd, pathname, flags), .flags = flags, .stat_result = {0}, .ferrno = 0}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - } - int ret = unwrapped_statx(dirfd, pathname, flags, mask, statxbuf); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - if (ret != 0) - { - op.data.stat.ferrno = saved_errno; - } - else - { - stat_result_from_statx(&op.data.stat.stat_result, statxbuf); - } - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -int fstatat(int dirfd, const char * restrict pathname, struct stat * restrict buf, int flags) -{ - maybe_init_thread(); - struct Op op = {stat_op_code, {.stat = {.path = create_path_lazy(dirfd, pathname, flags), .flags = flags, .stat_result = {0}, .ferrno = 0}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - } - int ret = unwrapped_fstatat(dirfd, pathname, buf, flags); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - if (ret != 0) - { - op.data.stat.ferrno = saved_errno; - } - else - { - stat_result_from_stat(&op.data.stat.stat_result, buf); - } - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -int fstatat64(int fd, const char * restrict file, struct stat64 * restrict buf, int flags) -{ - maybe_init_thread(); - struct Op op = {stat_op_code, {.stat = {.path = create_path_lazy(fd, file, flags), .flags = flags, .stat_result = {0}, .ferrno = 0}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - } - int ret = unwrapped_fstatat64(fd, file, buf, flags); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - if (ret != 0) - { - op.data.stat.ferrno = saved_errno; - } - else - { - stat_result_from_stat64(&op.data.stat.stat_result, buf); - } - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -int chown(const char *filename, uid_t owner, gid_t group) -{ - maybe_init_thread(); - struct Op op = {update_metadata_op_code, {.update_metadata = {.path = create_path_lazy(AT_FDCWD, filename, 0), .flags = 0, .kind = MetadataOwnership, .value = {.ownership = {.uid = owner, .gid = group}}, .ferrno = 0}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - } - int ret = unwrapped_chown(filename, owner, group); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - if (ret != 0) - { - op.data.readdir.ferrno = saved_errno; - } - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -int fchown(int filedes, uid_t owner, gid_t group) -{ - maybe_init_thread(); - struct Op op = {update_metadata_op_code, {.update_metadata = {.path = create_path_lazy(filedes, "", AT_EMPTY_PATH), .flags = AT_EMPTY_PATH, .kind = MetadataOwnership, .value = {.ownership = {.uid = owner, .gid = group}}, .ferrno = 0}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - } - int ret = unwrapped_fchown(filedes, owner, group); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - if (ret != 0) - { - op.data.readdir.ferrno = saved_errno; - } - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -int lchown(const char *pathname, uid_t owner, gid_t group) -{ - maybe_init_thread(); - struct Op op = {update_metadata_op_code, {.update_metadata = {.path = create_path_lazy(AT_FDCWD, pathname, AT_SYMLINK_NOFOLLOW), .flags = AT_SYMLINK_NOFOLLOW, .kind = MetadataOwnership, .value = {.ownership = {.uid = owner, .gid = group}}, .ferrno = 0}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - } - int ret = unwrapped_lchown(pathname, owner, group); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - if (ret != 0) - { - op.data.readdir.ferrno = saved_errno; - } - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -int fchownat(int dirfd, const char *pathname, uid_t owner, gid_t group, int flags) -{ - maybe_init_thread(); - struct Op op = {update_metadata_op_code, {.update_metadata = {.path = create_path_lazy(dirfd, pathname, flags), .flags = flags, .kind = MetadataOwnership, .value = {.ownership = {.uid = owner, .gid = group}}, .ferrno = 0}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - } - int ret = unwrapped_fchownat(dirfd, pathname, owner, group, flags); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - if (ret != 0) - { - op.data.readdir.ferrno = saved_errno; - } - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -int chmod(const char *filename, mode_t mode) -{ - maybe_init_thread(); - struct Op op = {update_metadata_op_code, {.update_metadata = {.path = create_path_lazy(AT_FDCWD, filename, 0), .flags = 0, .kind = MetadataMode, .value = {.mode = mode}, .ferrno = 0}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - } - int ret = unwrapped_chmod(filename, mode); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - if (ret != 0) - { - op.data.readdir.ferrno = saved_errno; - } - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -int fchmod(int filedes, mode_t mode) -{ - maybe_init_thread(); - struct Op op = {update_metadata_op_code, {.update_metadata = {.path = create_path_lazy(filedes, "", AT_EMPTY_PATH), .flags = AT_EMPTY_PATH, .kind = MetadataMode, .value = {.mode = mode}, .ferrno = 0}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - } - int ret = unwrapped_fchmod(filedes, mode); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - if (ret != 0) - { - op.data.readdir.ferrno = saved_errno; - } - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -int fchmodat(int dirfd, const char *pathname, mode_t mode, int flags) -{ - maybe_init_thread(); - struct Op op = {update_metadata_op_code, {.update_metadata = {.path = create_path_lazy(dirfd, pathname, flags), .flags = flags, .kind = MetadataMode, .value = {.mode = mode}, .ferrno = 0}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - } - int ret = unwrapped_fchmodat(dirfd, pathname, mode, flags); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - if (ret != 0) - { - op.data.readdir.ferrno = saved_errno; - } - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -int access(const char *filename, int how) -{ - maybe_init_thread(); - struct Op op = {access_op_code, {.access = {create_path_lazy(AT_FDCWD, filename, 0), how, 0, 0}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - } - int ret = unwrapped_access(filename, how); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - op.data.access.ferrno = (ret == 0) ? (0) : (errno); - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -int faccessat(int dirfd, const char *pathname, int mode, int flags) -{ - maybe_init_thread(); - struct Op op = {access_op_code, {.access = {.path = create_path_lazy(dirfd, pathname, 0), .mode = mode, .flags = flags, .ferrno = 0}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - } - int ret = unwrapped_faccessat(dirfd, pathname, mode, flags); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - op.data.access.ferrno = (ret == 0) ? (0) : (errno); - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -int utime(const char *filename, const struct utimbuf *times) -{ - maybe_init_thread(); - struct Op op = {update_metadata_op_code, {.update_metadata = {.path = create_path_lazy(AT_FDCWD, filename, 0), .flags = 0, .kind = MetadataTimes, .value = {0}, .ferrno = 0}}, {0}, 0, 0}; - if (times) - { - op.data.update_metadata.value.times.is_null = false; - op.data.update_metadata.value.times.atime.tv_sec = times->actime; - op.data.update_metadata.value.times.mtime.tv_sec = times->modtime; - } - else - { - op.data.update_metadata.value.times.is_null = true; - } - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - } - int ret = unwrapped_utime(filename, times); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - if (ret != 0) - { - op.data.readdir.ferrno = saved_errno; - } - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -int utimes(const char *filename, const struct timeval tvp[2]) -{ - maybe_init_thread(); - struct Op op = {update_metadata_op_code, {.update_metadata = {.path = create_path_lazy(AT_FDCWD, filename, 0), .flags = 0, .kind = MetadataTimes, .value = {0}, .ferrno = 0}}, {0}, 0, 0}; - if (tvp) - { - op.data.update_metadata.value.times.is_null = false; - op.data.update_metadata.value.times.atime = tvp[0]; - op.data.update_metadata.value.times.mtime = tvp[1]; - } - else - { - op.data.update_metadata.value.times.is_null = true; - } - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - } - int ret = unwrapped_utimes(filename, tvp); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - if (ret != 0) - { - op.data.readdir.ferrno = saved_errno; - } - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -int lutimes(const char *filename, const struct timeval tvp[2]) -{ - maybe_init_thread(); - struct Op op = {update_metadata_op_code, {.update_metadata = {.path = create_path_lazy(AT_FDCWD, filename, AT_SYMLINK_NOFOLLOW), .flags = AT_SYMLINK_NOFOLLOW, .kind = MetadataTimes, .value = {0}, .ferrno = 0}}, {0}, 0, 0}; - if (tvp) - { - op.data.update_metadata.value.times.is_null = false; - op.data.update_metadata.value.times.atime = tvp[0]; - op.data.update_metadata.value.times.mtime = tvp[1]; - } - else - { - op.data.update_metadata.value.times.is_null = true; - } - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - } - int ret = unwrapped_lutimes(filename, tvp); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - if (ret != 0) - { - op.data.readdir.ferrno = saved_errno; - } - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -int futimes(int fd, const struct timeval tvp[2]) -{ - maybe_init_thread(); - struct Op op = {update_metadata_op_code, {.update_metadata = {.path = create_path_lazy(fd, "", AT_EMPTY_PATH), .flags = AT_EMPTY_PATH, .kind = MetadataTimes, .value = {0}, .ferrno = 0}}, {0}, 0, 0}; - if (tvp) - { - op.data.update_metadata.value.times.is_null = false; - op.data.update_metadata.value.times.atime = tvp[0]; - op.data.update_metadata.value.times.mtime = tvp[1]; - } - else - { - op.data.update_metadata.value.times.is_null = true; - } - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - } - int ret = unwrapped_futimes(fd, tvp); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - if (ret != 0) - { - op.data.readdir.ferrno = saved_errno; - } - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -int truncate(const char *filename, off_t length) -{ - maybe_init_thread(); - int ret = unwrapped_truncate(filename, length); - return ret; -} - -int truncate64(const char *name, off64_t length) -{ - maybe_init_thread(); - int ret = unwrapped_truncate64(name, length); - return ret; -} - -int ftruncate(int fd, off_t length) -{ - maybe_init_thread(); - int ret = unwrapped_ftruncate(fd, length); - return ret; -} - -int ftruncate64(int id, off64_t length) -{ - maybe_init_thread(); - int ret = unwrapped_ftruncate64(id, length); - return ret; -} - -int mknod(const char *filename, mode_t mode, dev_t dev) -{ - maybe_init_thread(); - int ret = unwrapped_mknod(filename, mode, dev); - return ret; -} - -FILE * tmpfile() -{ - maybe_init_thread(); - FILE * ret = unwrapped_tmpfile(); - return ret; -} - -FILE * tmpfile64() -{ - maybe_init_thread(); - FILE * ret = unwrapped_tmpfile64(); - return ret; -} - -char * tmpnam(char *result) -{ - maybe_init_thread(); - char * ret = unwrapped_tmpnam(result); - return ret; -} - -char * tmpnam_r(char *result) -{ - maybe_init_thread(); - char * ret = unwrapped_tmpnam_r(result); - return ret; -} - -char * tempnam(const char *dir, const char *prefix) -{ - maybe_init_thread(); - char * ret = unwrapped_tempnam(dir, prefix); - return ret; -} - -char * mktemp(char *template) -{ - maybe_init_thread(); - char * ret = unwrapped_mktemp(template); - return ret; -} - -int mkstemp(char *template) -{ - maybe_init_thread(); - int ret = unwrapped_mkstemp(template); - return ret; -} - -char * mkdtemp(char *template) -{ - maybe_init_thread(); - char * ret = unwrapped_mkdtemp(template); - return ret; -} - -int execv(const char *filename, char * const argv[]) -{ - maybe_init_thread(); - size_t argc = 0; - char * const *copied_argv = arena_copy_argv(get_data_arena(), argv, &argc); - size_t envc = 0; - char * const *updated_env = update_env_with_probe_vars(environ, &envc); - char * const *copied_updated_env = arena_copy_argv(get_data_arena(), updated_env, &envc); - struct Op op = {exec_op_code, {.exec = {.path = create_path_lazy(0, filename, 0), .ferrno = 0, .argc = argc, .argv = copied_argv, .envc = envc, .env = copied_updated_env}}, {0}, 0, 0}; - op.data.exec.argc = argc; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - prov_log_save(); - } - else - { - prov_log_save(); - } - int ret = unwrapped_execvpe(filename, argv, updated_env); - int saved_errno = errno; - free((char **) updated_env); - if (likely(prov_log_is_enabled())) - { - assert(errno > 0); - op.data.exec.ferrno = saved_errno; - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -int execl(const char *filename, const char *arg0, ...) -{ - maybe_init_thread(); - size_t argc = COUNT_NONNULL_VARARGS(arg0); - char **argv = malloc((argc + 1) * (sizeof(char *))); - va_list ap; - va_start(ap, arg0); - for (size_t i = 0; i < argc; ++i) - { - argv[i] = va_arg(ap, __type_charp); - } - - va_end(ap); - argv[argc] = NULL; - char * const *copied_argv = arena_copy_argv(get_data_arena(), argv, &argc); - size_t envc = 0; - char * const *updated_env = update_env_with_probe_vars(environ, &envc); - char * const *copied_updated_env = arena_copy_argv(get_data_arena(), updated_env, &envc); - struct Op op = {exec_op_code, {.exec = {.path = create_path_lazy(0, filename, 0), .ferrno = 0, .argc = argc, .argv = copied_argv, .envc = envc, .env = copied_updated_env}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - prov_log_save(); - } - else - { - prov_log_save(); - } - int ret = unwrapped_execvpe(filename, argv, updated_env); - int saved_errno = errno; - free((char **) updated_env); - free((char **) argv); - if (likely(prov_log_is_enabled())) - { - assert(errno > 0); - op.data.exec.ferrno = saved_errno; - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -int execve(const char *filename, char * const argv[], char * const env[]) -{ - maybe_init_thread(); - size_t argc = 0; - char * const *copied_argv = arena_copy_argv(get_data_arena(), argv, &argc); - size_t envc = 0; - char * const *updated_env = update_env_with_probe_vars(env, &envc); - char * const *copied_updated_env = arena_copy_argv(get_data_arena(), updated_env, &envc); - struct Op op = {exec_op_code, {.exec = {.path = create_path_lazy(0, filename, 0), .ferrno = 0, .argc = argc, .argv = copied_argv, .envc = envc, .env = copied_updated_env}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - prov_log_save(); - } - else - { - prov_log_save(); - } - int ret = unwrapped_execvpe(filename, argv, updated_env); - int saved_errno = errno; - free((char **) updated_env); - if (likely(prov_log_is_enabled())) - { - assert(errno > 0); - op.data.exec.ferrno = saved_errno; - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -int fexecve(int fd, char * const argv[], char * const env[]) -{ - maybe_init_thread(); - size_t argc = 0; - char * const *copied_argv = arena_copy_argv(get_data_arena(), argv, &argc); - size_t envc = 0; - char * const *updated_env = update_env_with_probe_vars(env, &envc); - char * const *copied_updated_env = arena_copy_argv(get_data_arena(), updated_env, &envc); - struct Op op = {exec_op_code, {.exec = {.path = create_path_lazy(fd, "", AT_EMPTY_PATH), .ferrno = 0, .argc = argc, .argv = copied_argv, .envc = envc, .env = copied_updated_env}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - prov_log_save(); - } - else - { - prov_log_save(); - } - int ret = unwrapped_fexecve(fd, argv, updated_env); - int saved_errno = errno; - free((char **) updated_env); - if (likely(prov_log_is_enabled())) - { - assert(errno > 0); - op.data.exec.ferrno = saved_errno; - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -int execle(const char *filename, const char *arg0, ...) -{ - maybe_init_thread(); - size_t argc = COUNT_NONNULL_VARARGS(arg0) - 1; - char **argv = malloc((argc + 1) * (sizeof(char *))); - va_list ap; - va_start(ap, arg0); - for (size_t i = 0; i < argc; ++i) - { - argv[i] = va_arg(ap, __type_charp); - } - - argv[argc] = NULL; - char * const *copied_argv = arena_copy_argv(get_data_arena(), argv, &argc); - char **env = va_arg(ap, __type_charpp); - va_end(ap); - size_t envc = 0; - char * const *updated_env = update_env_with_probe_vars(env, &envc); - char * const *copied_updated_env = arena_copy_argv(get_data_arena(), updated_env, &envc); - struct Op op = {exec_op_code, {.exec = {.path = create_path_lazy(0, filename, 0), .ferrno = 0, .argc = argc, .argv = copied_argv, .envc = envc, .env = copied_updated_env}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - prov_log_save(); - } - else - { - prov_log_save(); - } - ERROR("Not implemented; I need to figure out how to update the environment."); - int ret = unwrapped_execvpe(filename, argv, updated_env); - int saved_errno = errno; - free((char **) updated_env); - free((char **) argv); - if (likely(prov_log_is_enabled())) - { - assert(errno > 0); - op.data.exec.ferrno = saved_errno; - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -int execvp(const char *filename, char * const argv[]) -{ - maybe_init_thread(); - char *bin_path = arena_calloc(get_data_arena(), PATH_MAX + 1, sizeof(char)); - bool found = lookup_on_path(filename, bin_path); - size_t argc = 0; - char * const *copied_argv = arena_copy_argv(get_data_arena(), argv, &argc); - size_t envc = 0; - char * const *updated_env = update_env_with_probe_vars(environ, &envc); - char * const *copied_updated_env = arena_copy_argv(get_data_arena(), updated_env, &envc); - struct Op op = {exec_op_code, {.exec = {.path = (found) ? (create_path_lazy(0, bin_path, 0)) : (null_path), .ferrno = 0, .argc = argc, .argv = copied_argv, .envc = envc, .env = copied_updated_env}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - prov_log_save(); - } - else - { - prov_log_save(); - } - int ret = unwrapped_execvpe(filename, argv, updated_env); - int saved_errno = errno; - free((char **) updated_env); - if (likely(prov_log_is_enabled())) - { - assert(errno > 0); - op.data.exec.ferrno = saved_errno; - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -int execlp(const char *filename, const char *arg0, ...) -{ - maybe_init_thread(); - char *bin_path = arena_calloc(get_data_arena(), PATH_MAX + 1, sizeof(char)); - bool found = lookup_on_path(filename, bin_path); - size_t argc = COUNT_NONNULL_VARARGS(arg0); - char **argv = malloc((argc + 1) * (sizeof(char *))); - va_list ap; - va_start(ap, arg0); - for (size_t i = 0; i < argc; ++i) - { - argv[i] = va_arg(ap, __type_charp); - } - - argv[argc] = NULL; - va_end(ap); - char * const *copied_argv = arena_copy_argv(get_data_arena(), argv, &argc); - size_t envc = 0; - char * const *updated_env = update_env_with_probe_vars(environ, &envc); - char * const *copied_updated_env = arena_copy_argv(get_data_arena(), updated_env, &envc); - struct Op op = {exec_op_code, {.exec = {.path = (found) ? (create_path_lazy(0, bin_path, 0)) : (null_path), .ferrno = 0, .argc = argc, .argv = copied_argv, .envc = envc, .env = copied_updated_env}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - prov_log_save(); - } - else - { - prov_log_save(); - } - int ret = unwrapped_execvpe(filename, argv, updated_env); - int saved_errno = errno; - free((char **) updated_env); - free((char **) argv); - if (likely(prov_log_is_enabled())) - { - assert(errno > 0); - op.data.exec.ferrno = saved_errno; - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -int execvpe(const char *filename, char * const argv[], char * const envp[]) -{ - maybe_init_thread(); - char *bin_path = arena_calloc(get_data_arena(), PATH_MAX + 1, sizeof(char)); - bool found = lookup_on_path(filename, bin_path); - size_t argc = 0; - char * const *copied_argv = arena_copy_argv(get_data_arena(), argv, &argc); - size_t envc = 0; - char * const *updated_env = update_env_with_probe_vars(envp, &envc); - char * const *copied_updated_env = arena_copy_argv(get_data_arena(), updated_env, &envc); - struct Op op = {exec_op_code, {.exec = {.path = (found) ? (create_path_lazy(0, bin_path, 0)) : (null_path), .ferrno = 0, .argc = argc, .argv = copied_argv, .envc = envc, .env = copied_updated_env}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - prov_log_save(); - } - else - { - prov_log_save(); - } - int ret = unwrapped_execvpe(filename, argv, updated_env); - int saved_errno = errno; - free((char **) updated_env); - if (likely(prov_log_is_enabled())) - { - assert(errno > 0); - op.data.exec.ferrno = saved_errno; - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -pid_t fork() -{ - maybe_init_thread(); - struct Op op = {clone_op_code, {.clone = {.flags = 0, .run_pthread_atfork_handlers = true, .task_type = TASK_PID, .task_id = -1, .ferrno = 0}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - prov_log_save(); - } - else - { - prov_log_save(); - } - pid_t ret = unwrapped_fork(); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - if (unlikely(ret == (-1))) - { - op.data.clone.ferrno = saved_errno; - prov_log_record(op); - } - else - if (ret == 0) - { - reinit_process(); - } - else - { - op.data.clone.task_id = ret; - prov_log_record(op); - } - } - errno = saved_errno; - return ret; -} - -pid_t _Fork() -{ - maybe_init_thread(); - struct Op op = {clone_op_code, {.clone = {.flags = 0, .run_pthread_atfork_handlers = false, .task_type = TASK_PID, .task_id = 0, .ferrno = 0}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - prov_log_save(); - } - else - { - prov_log_save(); - } - pid_t ret = unwrapped__Fork(); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - if (unlikely(ret == (-1))) - { - op.data.clone.ferrno = saved_errno; - prov_log_record(op); - } - else - if (ret == 0) - { - reinit_process(); - } - else - { - op.data.clone.task_id = ret; - prov_log_record(op); - } - } - errno = saved_errno; - return ret; -} - -pid_t vfork() -{ - maybe_init_thread(); - struct Op op = {clone_op_code, {.clone = {.flags = 0, .run_pthread_atfork_handlers = true, .task_type = TASK_PID, .task_id = 0, .ferrno = 0}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - prov_log_save(); - } - else - { - prov_log_save(); - } - int ret = unwrapped_fork(); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - if (unlikely(ret == (-1))) - { - op.data.clone.ferrno = saved_errno; - prov_log_record(op); - } - else - if (ret == 0) - { - reinit_process(); - } - else - { - op.data.clone.task_id = ret; - prov_log_record(op); - } - } - errno = saved_errno; - return ret; -} - -int clone(fn_ptr_int_void_ptr fn, void *stack, int flags, void *arg, ...) -{ - maybe_init_thread(); - (void) fn; - (void) stack; - (void) arg; - flags = flags & (~CLONE_VFORK); - struct Op op = {clone_op_code, {.clone = {.flags = flags, .run_pthread_atfork_handlers = false, .task_type = (flags & CLONE_THREAD) ? (TASK_TID) : (TASK_PID), .task_id = 0, .ferrno = 0}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - prov_log_save(); - if ((flags & CLONE_THREAD) != (flags & CLONE_VM)) - { - NOT_IMPLEMENTED("I conflate cloning a new thread (resulting in a process with the same PID, new TID) with sharing the memory space. If CLONE_SIGHAND is set, then Linux asserts CLONE_THREAD == CLONE_VM; If it is not set and CLONE_THREAD != CLONE_VM, by a real application, I will consider disentangling the assumptions (required to support this combination)."); - } - } - else - { - prov_log_save(); - } - size_t varargs_size = ((((((sizeof(void *)) + (sizeof(void *))) + (sizeof(int))) + ((COUNT_NONNULL_VARARGS(arg) + 1) * (sizeof(void *)))) + (sizeof(pid_t *))) + (sizeof(void *))) + (sizeof(pid_t *)); - int ret = *((int *) __builtin_apply((void (*)()) unwrapped_clone, __builtin_apply_args(), varargs_size)); - int saved_errno = errno; - if (unlikely(ret == (-1))) - { - if (likely(prov_log_is_enabled())) - { - op.data.clone.ferrno = saved_errno; - prov_log_record(op); - } - } - else - if (ret == 0) - { - if (flags & CLONE_THREAD) - { - maybe_init_thread(); - } - else - { - reinit_process(); - } - } - else - { - if (likely(prov_log_is_enabled())) - { - op.data.clone.task_id = ret; - prov_log_record(op); - } - } - errno = saved_errno; - return ret; -} - -pid_t waitpid(pid_t pid, int *status_ptr, int options) -{ - maybe_init_thread(); - int status; - if (status_ptr == NULL) - { - status_ptr = &status; - } - struct Op op = {wait_op_code, {.wait = {.task_type = TASK_PID, .task_id = 0, .options = options, .status = 0, .ferrno = 0}}, {0}, 0, 0}; - prov_log_try(op); - pid_t ret = unwrapped_waitpid(pid, status_ptr, options); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - if (unlikely(ret == (-1))) - { - op.data.wait.ferrno = saved_errno; - } - else - { - op.data.wait.task_id = ret; - op.data.wait.status = *status_ptr; - } - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -pid_t wait(int *status_ptr) -{ - maybe_init_thread(); - int status; - if (status_ptr == NULL) - { - status_ptr = &status; - } - struct Op op = {wait_op_code, {.wait = {.task_type = TASK_PID, .task_id = -1, .options = 0, .status = 0, .ferrno = 0}}, {0}, 0, 0}; - prov_log_try(op); - pid_t ret = unwrapped_wait(status_ptr); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - if (unlikely(ret == (-1))) - { - op.data.wait.ferrno = saved_errno; - } - else - { - op.data.wait.task_id = ret; - op.data.wait.status = *status_ptr; - } - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -pid_t wait4(pid_t pid, int *status_ptr, int options, struct rusage *usage) -{ - maybe_init_thread(); - struct Op wait_op = {wait_op_code, {.wait = {.task_type = TASK_TID, .task_id = 0, .options = options, .status = 0, .ferrno = 0}}, {0}, 0, 0}; - prov_log_try(wait_op); - struct Op getrusage_op = {getrusage_op_code, {.getrusage = {.waitpid_arg = pid, .getrusage_arg = 0, .usage = {{0}}, .ferrno = 0}}, {0}, 0, 0}; - if (usage) - { - prov_log_try(getrusage_op); - } - pid_t ret = unwrapped_wait4(pid, status_ptr, options, usage); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - if (unlikely(ret == (-1))) - { - wait_op.data.wait.ferrno = saved_errno; - if (usage) - { - getrusage_op.data.getrusage.ferrno = saved_errno; - } - } - else - { - wait_op.data.wait.task_id = ret; - wait_op.data.wait.status = *status_ptr; - if (usage) - { - memcpy(&getrusage_op.data.getrusage.usage, usage, sizeof(struct rusage)); - } - } - prov_log_record(wait_op); - if (usage) - { - prov_log_record(getrusage_op); - } - } - errno = saved_errno; - return ret; -} - -pid_t wait3(int *status_ptr, int options, struct rusage *usage) -{ - maybe_init_thread(); - struct Op wait_op = {wait_op_code, {.wait = {.task_type = TASK_PID, .task_id = 0, .options = options, .status = 0, .ferrno = 0}}, {0}, 0, 0}; - prov_log_try(wait_op); - struct Op getrusage_op = {getrusage_op_code, {.getrusage = {.waitpid_arg = -1, .getrusage_arg = 0, .usage = {{0}}, .ferrno = 0}}, {0}, 0, 0}; - if (usage) - { - prov_log_try(getrusage_op); - } - pid_t ret = unwrapped_wait3(status_ptr, options, usage); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - if (unlikely(ret == (-1))) - { - wait_op.data.wait.ferrno = saved_errno; - if (usage) - { - getrusage_op.data.getrusage.ferrno = saved_errno; - } - } - else - { - wait_op.data.wait.task_id = ret; - wait_op.data.wait.status = *status_ptr; - if (usage) - { - memcpy(&getrusage_op.data.getrusage.usage, usage, sizeof(struct rusage)); - } - } - prov_log_record(wait_op); - if (usage) - { - prov_log_record(getrusage_op); - } - } - errno = saved_errno; - return ret; -} - -int waitid(idtype_t idtype, id_t id, siginfo_t *infop, int options) -{ - maybe_init_thread(); - struct Op wait_op = {wait_op_code, {.wait = {.task_type = TASK_TID, .task_id = 0, .options = options, .status = 0, .ferrno = 0}}, {0}, 0, 0}; - prov_log_try(wait_op); - int ret = unwrapped_waitid(idtype, id, infop, options); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - if (unlikely(ret == (-1))) - { - wait_op.data.wait.ferrno = saved_errno; - } - else - { - wait_op.data.wait.task_id = infop->si_pid; - wait_op.data.wait.status = infop->si_status; - } - prov_log_record(wait_op); - } - errno = saved_errno; - return ret; -} - -int thrd_create(thrd_t *thr, thrd_start_t func, void *arg) -{ - maybe_init_thread(); - struct Op op = {clone_op_code, {.clone = {.flags = (((((CLONE_FILES | CLONE_FS) | CLONE_IO) | CLONE_PARENT) | CLONE_SIGHAND) | CLONE_THREAD) | CLONE_VM, .task_type = TASK_ISO_C_THREAD, .task_id = 0, .run_pthread_atfork_handlers = false, .ferrno = 0}}, {0}, 0, 0}; - int ret = unwrapped_thrd_create(thr, func, arg); - int saved_errno = errno; - if (unlikely(ret != thrd_success)) - { - if (likely(prov_log_is_enabled())) - { - op.data.clone.ferrno = saved_errno; - prov_log_record(op); - } - } - else - { - if (likely(prov_log_is_enabled())) - { - op.data.clone.task_id = ret; - prov_log_record(op); - } - } - errno = saved_errno; - return ret; -} - -int thrd_join(thrd_t thr, int *res) -{ - maybe_init_thread(); - struct Op op = {wait_op_code, {.wait = {.task_type = TASK_ISO_C_THREAD, .task_id = thr, .options = 0, .status = 0, .ferrno = 0}}, {0}, 0, 0}; - int ret = unwrapped_thrd_join(thr, res); - int saved_errno = errno; - if (unlikely(ret != thrd_success)) - { - if (likely(prov_log_is_enabled())) - { - op.data.clone.ferrno = saved_errno; - prov_log_record(op); - } - } - else - { - op.data.wait.status = *res; - if (likely(prov_log_is_enabled())) - { - prov_log_record(op); - } - } - errno = saved_errno; - return ret; -} - -int pthread_create(pthread_t * restrict thread, const pthread_attr_t * restrict attr, void *(*start_routine)(void *), void * restrict arg) -{ - maybe_init_thread(); - struct Op op = {clone_op_code, {.clone = {.flags = (((((CLONE_FILES | CLONE_FS) | CLONE_IO) | CLONE_PARENT) | CLONE_SIGHAND) | CLONE_THREAD) | CLONE_VM, .task_type = TASK_PTHREAD, .task_id = 0, .run_pthread_atfork_handlers = false, .ferrno = 0}}, {0}, 0, 0}; - int ret = unwrapped_pthread_create(thread, attr, start_routine, arg); - int saved_errno = errno; - if (unlikely(ret != 0)) - { - if (likely(prov_log_is_enabled())) - { - op.data.clone.ferrno = saved_errno; - prov_log_record(op); - } - } - else - { - if (likely(prov_log_is_enabled())) - { - op.data.clone.task_id = *thread; - prov_log_record(op); - } - } - errno = saved_errno; - return ret; -} - -int pthread_join(pthread_t thread, void **retval) -{ - maybe_init_thread(); - struct Op op = {wait_op_code, {.wait = {.task_type = TASK_PTHREAD, .task_id = thread, .options = 0, .status = 0, .ferrno = 0}}, {0}, 0, 0}; - int ret = unwrapped_pthread_join(thread, retval); - int saved_errno = errno; - if (unlikely(ret != 0)) - { - if (likely(prov_log_is_enabled())) - { - op.data.clone.ferrno = saved_errno; - prov_log_record(op); - } - } - else - { - if (likely(prov_log_is_enabled())) - { - prov_log_record(op); - } - } - errno = saved_errno; - return ret; -} - -FILE * fopen64(const char *filename, const char *opentype) -{ - maybe_init_thread(); - struct Op op = {open_op_code, {.open = {.path = create_path_lazy(AT_FDCWD, filename, 0), .flags = fopen_to_flags(opentype), .mode = 0, .fd = -1, .ferrno = 0}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - } - FILE * ret = unwrapped_fopen64(filename, opentype); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - if (ret == NULL) - { - op.data.open.ferrno = saved_errno; - } - else - { - op.data.open.fd = fileno(ret); - } - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -FILE * freopen64(const char *filename, const char *opentype, FILE *stream) -{ - maybe_init_thread(); - int original_fd = fileno(stream); - struct Op open_op = {open_op_code, {.open = {.path = create_path_lazy(AT_FDCWD, filename, 0), .flags = fopen_to_flags(opentype), .mode = 0, .fd = -1, .ferrno = 0}}, {0}, 0, 0}; - struct Op close_op = {close_op_code, {.close = {original_fd, original_fd, 0}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(open_op); - prov_log_try(close_op); - } - FILE * ret = unwrapped_freopen64(filename, opentype, stream); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - if (ret == NULL) - { - open_op.data.open.ferrno = saved_errno; - close_op.data.close.ferrno = saved_errno; - } - else - { - open_op.data.open.fd = fileno(ret); - } - prov_log_record(open_op); - prov_log_record(close_op); - } - errno = saved_errno; - return ret; -} - -int openat64(int dirfd, const char *filename, int flags, ...) -{ - maybe_init_thread(); - bool has_mode_arg = ((flags & O_CREAT) != 0) || ((flags & __O_TMPFILE) == __O_TMPFILE); - struct Op op = {open_op_code, {.open = {.path = create_path_lazy(dirfd, filename, (flags & O_NOFOLLOW) ? (AT_SYMLINK_NOFOLLOW) : (0)), .flags = flags, .mode = 0, .fd = -1, .ferrno = 0}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - if (has_mode_arg) - { - va_list ap; - va_start(ap, flags); - op.data.open.mode = va_arg(ap, __type_mode_t); - va_end(ap); - } - prov_log_try(op); - } - size_t varargs_size = (((sizeof(dirfd)) + (sizeof(filename))) + (sizeof(flags))) + ((has_mode_arg) ? (sizeof(mode_t)) : (0)); - int ret = *((int *) __builtin_apply((void (*)()) unwrapped_openat64, __builtin_apply_args(), varargs_size)); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - op.data.open.ferrno = (unlikely(ret == (-1))) ? (errno) : (0); - op.data.open.fd = ret; - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -int open64(const char *filename, int flags, ...) -{ - maybe_init_thread(); - bool has_mode_arg = ((flags & O_CREAT) != 0) || ((flags & __O_TMPFILE) == __O_TMPFILE); - struct Op op = {open_op_code, {.open = {.path = create_path_lazy(AT_FDCWD, filename, (flags & O_NOFOLLOW) ? (AT_SYMLINK_NOFOLLOW) : (0)), .flags = flags, .mode = 0, .fd = -1, .ferrno = 0}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - if (has_mode_arg) - { - va_list ap; - va_start(ap, flags); - op.data.open.mode = va_arg(ap, __type_mode_t); - va_end(ap); - } - prov_log_try(op); - } - size_t varargs_size = ((sizeof(filename)) + (sizeof(flags))) + ((has_mode_arg) ? (sizeof(mode_t)) : (0)); - int ret = *((int *) __builtin_apply((void (*)()) unwrapped_open64, __builtin_apply_args(), varargs_size)); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - op.data.open.ferrno = (unlikely(ret == (-1))) ? (errno) : (0); - op.data.open.fd = ret; - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - -int create64(const char *filename, mode_t mode) -{ - maybe_init_thread(); - struct Op op = {open_op_code, {.open = {.path = create_path_lazy(AT_FDCWD, filename, 0), .flags = (O_WRONLY | O_CREAT) | O_TRUNC, .mode = mode, .fd = -1, .ferrno = 0}}, {0}, 0, 0}; - if (likely(prov_log_is_enabled())) - { - prov_log_try(op); - } - int ret = unwrapped_create64(filename, mode); - int saved_errno = errno; - if (likely(prov_log_is_enabled())) - { - op.data.open.ferrno = (unlikely(ret == (-1))) ? (errno) : (0); - op.data.open.fd = ret; - prov_log_record(op); - } - errno = saved_errno; - return ret; -} - diff --git a/probe_src/libprobe/generated/libc_hooks.h b/probe_src/libprobe/generated/libc_hooks.h deleted file mode 100644 index c85c983f..00000000 --- a/probe_src/libprobe/generated/libc_hooks.h +++ /dev/null @@ -1,109 +0,0 @@ -static FILE * (*unwrapped_fopen)(const char *filename, const char *opentype); -static FILE * (*unwrapped_freopen)(const char *filename, const char *opentype, FILE *stream); -static int (*unwrapped_fclose)(FILE *stream); -static int (*unwrapped_fcloseall)(); -static int (*unwrapped_openat)(int dirfd, const char *filename, int flags, ...); -static int (*unwrapped_open)(const char *filename, int flags, ...); -static int (*unwrapped_creat)(const char *filename, mode_t mode); -static int (*unwrapped_close)(int filedes); -static int (*unwrapped_close_range)(unsigned int lowfd, unsigned int maxfd, int flags); -static void (*unwrapped_closefrom)(int lowfd); -static int (*unwrapped_dup)(int old); -static int (*unwrapped_dup2)(int old, int new); -static int (*unwrapped_dup3)(int old, int new, int flags); -static int (*unwrapped_fcntl)(int filedes, int command, ...); -static int (*unwrapped_chdir)(const char *filename); -static int (*unwrapped_fchdir)(int filedes); -static DIR * (*unwrapped_opendir)(const char *dirname); -static DIR * (*unwrapped_fdopendir)(int fd); -static struct dirent * (*unwrapped_readdir)(DIR *dirstream); -static int (*unwrapped_readdir_r)(DIR *dirstream, struct dirent *entry, struct dirent **result); -static struct dirent64 * (*unwrapped_readdir64)(DIR *dirstream); -static int (*unwrapped_readdir64_r)(DIR *dirstream, struct dirent64 *entry, struct dirent64 **result); -static int (*unwrapped_closedir)(DIR *dirstream); -static void (*unwrapped_rewinddir)(DIR *dirstream); -static long int (*unwrapped_telldir)(DIR *dirstream); -static void (*unwrapped_seekdir)(DIR *dirstream, long int pos); -static int (*unwrapped_scandir)(const char *dir, struct dirent ***namelist, int (*selector)(const struct dirent *), int (*cmp)(const struct dirent **, const struct dirent **)); -static int (*unwrapped_scandir64)(const char *dir, struct dirent64 ***namelist, int (*selector)(const struct dirent64 *), int (*cmp)(const struct dirent64 **, const struct dirent64 **)); -static int (*unwrapped_scandirat)(int dirfd, const char * restrict dirp, struct dirent *** restrict namelist, int (*filter)(const struct dirent *), int (*compar)(const struct dirent **, const struct dirent **)); -static ssize_t (*unwrapped_getdents64)(int fd, void *buffer, size_t length); -static int (*unwrapped_ftw)(const char *filename, __ftw_func_t func, int descriptors); -static int (*unwrapped_ftw64)(const char *filename, __ftw64_func_t func, int descriptors); -static int (*unwrapped_nftw)(const char *filename, __nftw_func_t func, int descriptors, int flag); -static int (*unwrapped_nftw64)(const char *filename, __nftw64_func_t func, int descriptors, int flag); -static int (*unwrapped_link)(const char *oldname, const char *newname); -static int (*unwrapped_linkat)(int oldfd, const char *oldname, int newfd, const char *newname, int flags); -static int (*unwrapped_symlink)(const char *oldname, const char *newname); -static int (*unwrapped_symlinkat)(const char *target, int newdirfd, const char *linkpath); -static ssize_t (*unwrapped_readlink)(const char *filename, char *buffer, size_t size); -static ssize_t (*unwrapped_readlinkat)(int dirfd, const char *filename, char *buffer, size_t size); -static char * (*unwrapped_canonicalize_file_name)(const char *name); -static char * (*unwrapped_realpath)(const char * restrict name, char * restrict resolved); -static int (*unwrapped_unlink)(const char *filename); -static int (*unwrapped_rmdir)(const char *filename); -static int (*unwrapped_remove)(const char *filename); -static int (*unwrapped_rename)(const char *oldname, const char *newname); -static int (*unwrapped_mkdir)(const char *filename, mode_t mode); -static int (*unwrapped_mkdirat)(int dirfd, const char *pathname, mode_t mode); -static int (*unwrapped_stat)(const char *filename, struct stat *buf); -static int (*unwrapped_stat64)(const char *filename, struct stat64 *buf); -static int (*unwrapped_fstat)(int filedes, struct stat *buf); -static int (*unwrapped_fstat64)(int filedes, struct stat64 * restrict buf); -static int (*unwrapped_lstat)(const char *filename, struct stat *buf); -static int (*unwrapped_lstat64)(const char *filename, struct stat64 *buf); -static int (*unwrapped_statx)(int dirfd, const char * restrict pathname, int flags, unsigned int mask, struct statx * restrict statxbuf); -static int (*unwrapped_fstatat)(int dirfd, const char * restrict pathname, struct stat * restrict buf, int flags); -static int (*unwrapped_fstatat64)(int fd, const char * restrict file, struct stat64 * restrict buf, int flags); -static int (*unwrapped_chown)(const char *filename, uid_t owner, gid_t group); -static int (*unwrapped_fchown)(int filedes, uid_t owner, gid_t group); -static int (*unwrapped_lchown)(const char *pathname, uid_t owner, gid_t group); -static int (*unwrapped_fchownat)(int dirfd, const char *pathname, uid_t owner, gid_t group, int flags); -static int (*unwrapped_chmod)(const char *filename, mode_t mode); -static int (*unwrapped_fchmod)(int filedes, mode_t mode); -static int (*unwrapped_fchmodat)(int dirfd, const char *pathname, mode_t mode, int flags); -static int (*unwrapped_access)(const char *filename, int how); -static int (*unwrapped_faccessat)(int dirfd, const char *pathname, int mode, int flags); -static int (*unwrapped_utime)(const char *filename, const struct utimbuf *times); -static int (*unwrapped_utimes)(const char *filename, const struct timeval tvp[2]); -static int (*unwrapped_lutimes)(const char *filename, const struct timeval tvp[2]); -static int (*unwrapped_futimes)(int fd, const struct timeval tvp[2]); -static int (*unwrapped_truncate)(const char *filename, off_t length); -static int (*unwrapped_truncate64)(const char *name, off64_t length); -static int (*unwrapped_ftruncate)(int fd, off_t length); -static int (*unwrapped_ftruncate64)(int id, off64_t length); -static int (*unwrapped_mknod)(const char *filename, mode_t mode, dev_t dev); -static FILE * (*unwrapped_tmpfile)(); -static FILE * (*unwrapped_tmpfile64)(); -static char * (*unwrapped_tmpnam)(char *result); -static char * (*unwrapped_tmpnam_r)(char *result); -static char * (*unwrapped_tempnam)(const char *dir, const char *prefix); -static char * (*unwrapped_mktemp)(char *template); -static int (*unwrapped_mkstemp)(char *template); -static char * (*unwrapped_mkdtemp)(char *template); -static int (*unwrapped_execv)(const char *filename, char * const argv[]); -static int (*unwrapped_execl)(const char *filename, const char *arg0, ...); -static int (*unwrapped_execve)(const char *filename, char * const argv[], char * const env[]); -static int (*unwrapped_fexecve)(int fd, char * const argv[], char * const env[]); -static int (*unwrapped_execle)(const char *filename, const char *arg0, ...); -static int (*unwrapped_execvp)(const char *filename, char * const argv[]); -static int (*unwrapped_execlp)(const char *filename, const char *arg0, ...); -static int (*unwrapped_execvpe)(const char *filename, char * const argv[], char * const envp[]); -static pid_t (*unwrapped_fork)(); -static pid_t (*unwrapped__Fork)(); -static pid_t (*unwrapped_vfork)(); -static int (*unwrapped_clone)(fn_ptr_int_void_ptr fn, void *stack, int flags, void *arg, ...); -static pid_t (*unwrapped_waitpid)(pid_t pid, int *status_ptr, int options); -static pid_t (*unwrapped_wait)(int *status_ptr); -static pid_t (*unwrapped_wait4)(pid_t pid, int *status_ptr, int options, struct rusage *usage); -static pid_t (*unwrapped_wait3)(int *status_ptr, int options, struct rusage *usage); -static int (*unwrapped_waitid)(idtype_t idtype, id_t id, siginfo_t *infop, int options); -static int (*unwrapped_thrd_create)(thrd_t *thr, thrd_start_t func, void *arg); -static int (*unwrapped_thrd_join)(thrd_t thr, int *res); -static int (*unwrapped_pthread_create)(pthread_t * restrict thread, const pthread_attr_t * restrict attr, void *(*start_routine)(void *), void * restrict arg); -static int (*unwrapped_pthread_join)(pthread_t thread, void **retval); -static FILE * (*unwrapped_fopen64)(const char *filename, const char *opentype); -static FILE * (*unwrapped_freopen64)(const char *filename, const char *opentype, FILE *stream); -static int (*unwrapped_openat64)(int dirfd, const char *filename, int flags, ...); -static int (*unwrapped_open64)(const char *filename, int flags, ...); -static int (*unwrapped_create64)(const char *filename, mode_t mode); diff --git a/probe_src/python/README.md b/probe_src/python/README.md deleted file mode 100644 index a68a2987..00000000 --- a/probe_src/python/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# probe_py - -probe_py is a package that implements CLI functionality of PROBE and Python library functionality of PROBE. - -Required reading: diff --git a/probe_src/python/probe_py/manual/graph_utils.py b/probe_src/python/probe_py/manual/graph_utils.py deleted file mode 100644 index 4d629dd7..00000000 --- a/probe_src/python/probe_py/manual/graph_utils.py +++ /dev/null @@ -1,13 +0,0 @@ -import pathlib -import networkx # type: ignore - - -def serialize_graph( - graph: networkx.Graph, - output: pathlib.Path, -) -> None: - pydot_graph = networkx.drawing.nx_pydot.to_pydot(graph) - if output.suffix == "dot": - pydot_graph.write_raw(output) - else: - pydot_graph.write_png(output) diff --git a/probe_src/tests/c/README.md b/probe_src/tests/c/README.md deleted file mode 100644 index e69de29b..00000000 diff --git a/reproducibility_tests/.gitignore b/reproducibility_tests/.gitignore deleted file mode 100644 index 9a1a6a2d..00000000 --- a/reproducibility_tests/.gitignore +++ /dev/null @@ -1 +0,0 @@ -test_files/ diff --git a/setup_devshell.sh b/setup_devshell.sh index 419703a4..7dea7d4b 100644 --- a/setup_devshell.sh +++ b/setup_devshell.sh @@ -4,34 +4,30 @@ red='\033[0;31m' clr='\033[0m' project_root="$(dirname "$(realpath "${BASH_SOURCE[0]}")")" +printf "project_root = %s\n" "$project_root" # Rust frontend uses CPATH to find libprobe headers -export CPATH="$project_root/probe_src/libprobe/include:$CPATH" +export CPATH="$project_root/libprobe/include:$CPATH" # Rust CLI uses __PROBE_LIB to find libprobe binary -export __PROBE_LIB="$project_root/probe_src/libprobe/build" +export __PROBE_LIB="$project_root/libprobe/build" # Ensure libprobe.so gets maked if [ ! -f "$__PROBE_LIB/libprobe.so" ]; then - echo -e "${red}Please run 'just compile-lib' to compile libprobe${clr}" + printf "%sPlease run 'just compile-lib' to compile libprobe%s\n" "$red" "$clr" fi # Rust code uses PYGEN_OUTFILE to determine where to write this file. -# TODO: Replace this with a static path, because it is never not this path. -export PYGEN_OUTFILE="$project_root/probe_src/frontend/python/probe_py/generated/ops.py" +export PYGEN_OUTFILE="$project_root/probe_py/probe_py/ops.py" # Ensure PROBE CLI gets built -if [ ! -f $project_root/probe_src/frontend/target/release/probe ]; then - echo -e "${red}Please run 'just compile-cli' to compile probe binary${clr}" +if [ ! -f "$project_root/cli-wrapper/target/release/probe" ]; then + printf "%sPlease run 'just compile-cli' to compile probe binary%s\n" "$red" "$clr" fi # Add PROBE CLI to path -export PATH="$project_root/probe_src/frontend/target/release:$PATH" +export PATH="$project_root/cli-wrapper/target/release:$PATH" -# Add probe_py.generated to the Python path -export PYTHONPATH="$project_root/probe_src/frontend/python:$PYTHONPATH" -export MYPYPATH="$project_root/probe_src/frontend/python:$MYPYPATH" - -# Add probe_py.manual to the Python path -export PYTHONPATH="$project_root/probe_src/python:$PYTHONPATH" -export MYPYPATH="$project_root/probe_src/python:$MYPYPATH" +# Add probe_py to the Python path +export PYTHONPATH="$project_root/probe_py/:$PYTHONPATH" +export MYPYPATH="$project_root/probe_py/mypy_stubs:$project_root/probe_py/:$MYPYPATH" diff --git a/probe_src/tests/.gitignore b/tests/.gitignore similarity index 100% rename from probe_src/tests/.gitignore rename to tests/.gitignore diff --git a/probe_src/tests/c/.gitignore b/tests/examples/.gitignore similarity index 100% rename from probe_src/tests/c/.gitignore rename to tests/examples/.gitignore diff --git a/probe_src/tests/c/Makefile b/tests/examples/Makefile similarity index 100% rename from probe_src/tests/c/Makefile rename to tests/examples/Makefile diff --git a/probe_src/tests/c/createFile.c b/tests/examples/createFile.c similarity index 98% rename from probe_src/tests/c/createFile.c rename to tests/examples/createFile.c index 7aa1d64c..b32f65f0 100644 --- a/probe_src/tests/c/createFile.c +++ b/tests/examples/createFile.c @@ -1,8 +1,8 @@ #include #include #include -#include -#include +#include +#include #include #define NUM_THREADS 3 @@ -123,4 +123,3 @@ int main() { pthread_exit(NULL); } - diff --git a/probe_src/tests/c/hello_world.c b/tests/examples/hello_world.c similarity index 99% rename from probe_src/tests/c/hello_world.c rename to tests/examples/hello_world.c index 43490cca..60b21937 100644 --- a/probe_src/tests/c/hello_world.c +++ b/tests/examples/hello_world.c @@ -35,4 +35,3 @@ int main() { printf("Main: program exiting.\n"); pthread_exit(NULL); } - diff --git a/probe_src/tests/c/mutex.c b/tests/examples/mutex.c similarity index 100% rename from probe_src/tests/c/mutex.c rename to tests/examples/mutex.c diff --git a/probe_src/tests/c/simple.c b/tests/examples/simple.c similarity index 100% rename from probe_src/tests/c/simple.c rename to tests/examples/simple.c diff --git a/reproducibility_tests/test_determinism.cxx b/tests/examples/test_determinism.cxx similarity index 100% rename from reproducibility_tests/test_determinism.cxx rename to tests/examples/test_determinism.cxx diff --git a/reproducibility_tests/test.sh b/tests/examples/test_determinism.sh similarity index 100% rename from reproducibility_tests/test.sh rename to tests/examples/test_determinism.sh diff --git a/tests/lightweight_env.sh b/tests/lightweight_env.sh new file mode 100755 index 00000000..19d9321b --- /dev/null +++ b/tests/lightweight_env.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +# nix develop brings in a ton of stuff to the env +# which complicates testing probe +# To simplify, use this script. + +project_root="$(dirname "$(dirname "$(realpath "${BASH_SOURCE[0]}")")")" + +path="$project_root/cli-wrapper/target/release" + +env - __PROBE_LIB="$__PROBE_LIB" PATH="$path" "${@}" diff --git a/probe_src/performance_test.py b/tests/performance_test.py similarity index 88% rename from probe_src/performance_test.py rename to tests/performance_test.py index 9e4845c2..aacef1bd 100755 --- a/probe_src/performance_test.py +++ b/tests/performance_test.py @@ -6,12 +6,12 @@ import os import shutil import resource -from dataclasses import dataclass -from typing import Any +import dataclasses +import typing import errno -from pathlib import Path +import pathlib -@dataclass +@dataclasses.dataclass class Result: returncode: int stdout: str @@ -19,11 +19,11 @@ class Result: duration: float rusage: resource.struct_rusage -PROBE_LOG = Path("probe_log") -PROBE_RECORD_DIR = Path("probe_record") +PROBE_LOG = pathlib.Path("probe_log") +PROBE_RECORD_DIR = pathlib.Path("probe_record") -class ResourcePopen(subprocess.Popen): - def _try_wait(self, wait_flags): +class ResourcePopen(subprocess.Popen[bytes]): + def _try_wait(self, wait_flags: int) -> tuple[int, int]: try: (pid, sts, res) = os.wait4(self.pid, wait_flags) except OSError as e: @@ -36,11 +36,10 @@ def _try_wait(self, wait_flags): return (pid, sts) def resource_call( - *popenargs: Any, - timeout: int | None = None, - **kwargs: Any, + popenargs: typing.Sequence[str], + timeout: float | None = None, ) -> Result: - with ResourcePopen(*popenargs, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=False, **kwargs) as p: + with ResourcePopen(popenargs, stdout=subprocess.PIPE, stderr=subprocess.PIPE) as p: start = datetime.datetime.now() try: stdout, stderr = p.communicate(timeout=timeout) @@ -53,7 +52,7 @@ def resource_call( DELAY = 0.0 -def cleanup(): +def cleanup() -> None: if PROBE_LOG.exists(): PROBE_LOG.unlink() if PROBE_RECORD_DIR.exists(): @@ -92,7 +91,12 @@ def benchmark_command(command: list[str], warmup_iterations: int, benchmark_iter return results -def write_results_to_csv(writer, command_to_run, phase, results): +def write_results_to_csv( + writer: csv.DictWriter[str], + command_to_run: str, + phase: str, + results: list[Result], +) -> None: for idx, result in enumerate(results, start=1): rusage = result.rusage writer.writerow({ @@ -118,7 +122,7 @@ def write_results_to_csv(writer, command_to_run, phase, results): 'ru_nivcsw': rusage.ru_nivcsw }) -def benchmark_with_transcription(commands_to_run: list[list[str]], warmup_count: int, benchmark_count: int): +def benchmark_with_transcription(commands_to_run: list[list[str]], warmup_count: int, benchmark_count: int) -> None: with open('benchmark_results.csv', mode='w', newline='') as csv_file: fieldnames = [ 'Command', 'Phase', 'Return Code', 'Duration', @@ -167,5 +171,5 @@ def benchmark_with_transcription(commands_to_run: list[list[str]], warmup_count: ["uptime"], ] - os.chdir(Path(__file__).resolve().parent.parent) + os.chdir(pathlib.Path(__file__).resolve().parent.parent) benchmark_with_transcription(commands, warmup_count=1, benchmark_count=4) diff --git a/test/ssh_wrapper_test/openssh-server.py b/tests/ssh_wrapper_test/openssh-server.py similarity index 92% rename from test/ssh_wrapper_test/openssh-server.py rename to tests/ssh_wrapper_test/openssh-server.py index 2e48c9e9..6b39253b 100644 --- a/test/ssh_wrapper_test/openssh-server.py +++ b/tests/ssh_wrapper_test/openssh-server.py @@ -2,7 +2,7 @@ import threading import os -def run_openssh_server(): +def run_openssh_server() -> None: os.chdir("./openssh-server/") process = subprocess.Popen("docker compose up -d", shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) @@ -13,7 +13,7 @@ def run_openssh_server(): Press "q" and Enter to stop the server. ''') - def wait_for_input(): + def wait_for_input() -> None: while True: user_input = input() if user_input.strip().lower() == 'q': diff --git a/test/ssh_wrapper_test/openssh-server/docker-compose.yml b/tests/ssh_wrapper_test/openssh-server/docker-compose.yml similarity index 100% rename from test/ssh_wrapper_test/openssh-server/docker-compose.yml rename to tests/ssh_wrapper_test/openssh-server/docker-compose.yml diff --git a/probe_src/tests/docker_os_matrix.py b/tests/test_docker_os_matrix.py similarity index 94% rename from probe_src/tests/docker_os_matrix.py rename to tests/test_docker_os_matrix.py index 24f9edb4..0f70231a 100644 --- a/probe_src/tests/docker_os_matrix.py +++ b/tests/test_docker_os_matrix.py @@ -6,6 +6,7 @@ import shlex import pathlib import asyncio +import pytest project_root = pathlib.Path(__file__).resolve().parent.parent.parent @@ -129,7 +130,8 @@ async def run_in_docker( ] -async def main(max_concurrency: int, capture_output: bool) -> bool: +@pytest.mark.skip("This test takes a long time") +async def test_docker(max_concurrency: int = 1, capture_output: bool = True) -> None: results = as_completed_with_concurrency(max_concurrency, [ run_in_docker( f"probe-{image}-{tag}", @@ -143,7 +145,6 @@ async def main(max_concurrency: int, capture_output: bool) -> bool: for image, tags, pre_script in images for tag in tags ]) - any_failed = False for result in results: image, success, stdout, stderr = await result if not success: @@ -151,11 +152,4 @@ async def main(max_concurrency: int, capture_output: bool) -> bool: sys.stdout.buffer.write(stdout) sys.stderr.buffer.write(stderr) print("\n") - any_failed = True - return any_failed - -if asyncio.run(main( - max_concurrency=1, - capture_output=False, -)): - sys.exit(1) + assert success, f"{image} failed" diff --git a/tests/test_handoff_to_python.py b/tests/test_handoff_to_python.py new file mode 100644 index 00000000..cece5cfd --- /dev/null +++ b/tests/test_handoff_to_python.py @@ -0,0 +1,8 @@ +import shlex +import subprocess + + +def test_handoff() -> None: + cmd = ["probe", "validate", "--help"] + print(shlex.join(cmd)) + subprocess.run(cmd, check=True) diff --git a/probe_src/tests/test_path_stuff.py b/tests/test_path_stuff.py similarity index 85% rename from probe_src/tests/test_path_stuff.py rename to tests/test_path_stuff.py index 484f6c9d..7f462af6 100644 --- a/probe_src/tests/test_path_stuff.py +++ b/tests/test_path_stuff.py @@ -1,7 +1,4 @@ import shutil -import pytest -import pathlib -import shlex import subprocess @@ -9,7 +6,7 @@ nonexistent_command = "eugrhuerhuliaflsd" -def test_probe_nonexistent_command(): +def test_probe_nonexistent_command() -> None: assert shutil.which(nonexistent_command) is None, "please choose a nonexistent_command" proc = subprocess.run( ["probe", "record", "-f", nonexistent_command], @@ -20,7 +17,7 @@ def test_probe_nonexistent_command(): assert b"SIGSEGV" not in proc.stderr -def test_probe_empty_path(): +def test_probe_empty_path() -> None: proc = subprocess.run( ["probe", "record", "-f", "env", "PATH=", nonexistent_command], capture_output=True, diff --git a/probe_src/tests/test_integration.py b/tests/test_record.py similarity index 94% rename from probe_src/tests/test_integration.py rename to tests/test_record.py index 188994f7..f2e88c7a 100644 --- a/probe_src/tests/test_integration.py +++ b/tests/test_record.py @@ -15,7 +15,7 @@ def bash(*cmds: str) -> list[str]: commands = [ ["echo", "hi"], - ["head", "../../../flake.nix"], + ["head", "../../flake.nix"], bash( "echo", "#include \n#include \nint main() {open(\".\", 0); printf(\"hello world\\n\"); return 0; }", @@ -66,6 +66,10 @@ def test_cmds(mode: list[str], command: list[str]) -> None: # GCC creates many threads and processes, so this stuff is pretty slow. return + cmd = ["probe", "export", "debug-text"] + print(shlex.join(cmd)) + subprocess.run(cmd, check=True, cwd=tmpdir) + cmd = ["probe", "export", "ops-graph", "test.png"] print(shlex.join(cmd)) subprocess.run(cmd, check=True, cwd=tmpdir)