diff --git a/README.md b/README.md index 80de5a50..76271f2d 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ *Capsula*, a Latin word meaning *box*, is a Python package designed to help researchers and developers easily capture and reproduce their command execution context. The primary aim of Capsula is to tackle the reproducibility problem by providing a way to capture the execution context at any point in time, preserving it for future use. This ensures that you can reproduce the exact conditions of past command execution, fostering reproducibility and consistency over time. -## Usage +Features: 1. **Context Capture:** Capsula logs the details of the execution context for future reference and reproduction. The context includes, but is not limited to, the Python version, system environment variables, and the Git commit hash of the current working directory. @@ -18,63 +18,288 @@ 3. **Context Diffing (to be implemented):** Capsula can compare the current context with the context captured at a previous point in time. This is useful for identifying changes or for reproducing the exact conditions of a past execution. -See the following Python script: +## Usage + +Prepare a `capsula.toml` file in the root directory of your project. An example of the `capsula.toml` file is as follows: + +```toml +[pre-run] +# Contexts to be captured before the execution of the decorated function/CLI command. +context = [ + { type = "CommandContext", command = "poetry check --lock" }, + { type = "CommandContext", command = "pip freeze --exclude-editable > requirements.txt" }, + { type = "FileContext", path = "pyproject.toml", copy = true }, + { type = "FileContext", path = "requirements.txt", move = true }, + { type = "GitRepositoryContext", name = "capsula" }, + { type = "CwdContext" }, + { type = "CpuContext" }, +] +# Reporter to be used to report the captured contexts. +reporter = [{ type = "JsonDumpReporter" }] + +[in-run] +# Watchers to be used during the execution of the decorated function/CLI command. +watcher = [{ type = "UncaughtExceptionWatcher" }, { type = "TimeWatcher" }] +# Reporter to be used to report the execution status. +reporter = [{ type = "JsonDumpReporter" }] + +[post-run] +# Contexts to be captured after the execution of the decorated function/CLI command. +context = [{ type = "FileContext", path = "examples/pi.txt", move = true }] +# Reporter to be used to report the captured contexts. +reporter = [{ type = "JsonDumpReporter" }] +``` + +Then, all you need to do is to decorate your Python function with the `@capsula.run` decorator and specify the `load_from_config` argument as `True`. The following is an example of a Python script that estimates the value of π using the Monte Carlo method: ```python -import logging import random from pathlib import Path import capsula -logger = logging.getLogger(__name__) - - -@capsula.run() -@capsula.reporter(capsula.JsonDumpReporter.default(), mode="all") -@capsula.context(capsula.FileContext.default(Path(__file__).parent / "pi.txt", move=True), mode="post") -@capsula.watcher(capsula.UncaughtExceptionWatcher("Exception")) -@capsula.watcher(capsula.TimeWatcher("calculation_time")) -@capsula.context(capsula.FileContext.default(Path(__file__).parents[1] / "pyproject.toml", copy=True), mode="pre") -@capsula.context(capsula.FileContext.default(Path(__file__).parents[1] / "poetry.lock", copy=True), mode="pre") -@capsula.context(capsula.FileContext.default(Path(__file__).parents[1] / "requirements.txt", move=True), mode="pre") -@capsula.context(capsula.GitRepositoryContext.default(), mode="pre") -@capsula.context(capsula.CommandContext("poetry check --lock"), mode="pre") -@capsula.context(capsula.CommandContext("pip freeze --exclude-editable > requirements.txt"), mode="pre") -@capsula.context(capsula.EnvVarContext("HOME"), mode="pre") -@capsula.context(capsula.EnvVarContext("PATH"), mode="pre") -@capsula.context(capsula.CwdContext(), mode="pre") -@capsula.context(capsula.CpuContext(), mode="pre") -@capsula.pass_pre_run_capsule -def calculate_pi(pre_run_capsule: capsula.Capsule, *, n_samples: int = 1_000, seed: int = 42) -> None: - logger.info(f"Calculating pi with {n_samples} samples.") - logger.debug(f"Seed: {seed}") +@capsula.run(load_from_config=True) +def calculate_pi(n_samples: int = 1_000, seed: int = 42) -> None: random.seed(seed) - xs = (random.random() for _ in range(n_samples)) # noqa: S311 - ys = (random.random() for _ in range(n_samples)) # noqa: S311 + xs = (random.random() for _ in range(n_samples)) + ys = (random.random() for _ in range(n_samples)) inside = sum(x * x + y * y <= 1.0 for x, y in zip(xs, ys)) + # You can record values to the capsule using the `record` method. capsula.record("inside", inside) pi_estimate = (4.0 * inside) / n_samples - logger.info(f"Pi estimate: {pi_estimate}") + print(f"Pi estimate: {pi_estimate}") capsula.record("pi_estimate", pi_estimate) - logger.info(pre_run_capsule.data) - logger.info(capsula.current_run_name()) + print(f"Run name: {capsula.current_run_name()}") with (Path(__file__).parent / "pi.txt").open("w") as output_file: - output_file.write(f"Pi estimate: {pi_estimate}. Git SHA: {pre_run_capsule.data[('git', 'capsula')]['sha']}") - + output_file.write(f"Pi estimate: {pi_estimate}.") if __name__ == "__main__": calculate_pi(n_samples=1_000) ``` -- `@capsula.run` decorator specifies the directory where the execution context is stored. The directory is automatically created if it does not exist. -- `@capsula.context` decorator specifies the context to be captured. The context is captured before the execution of the decorated function if `mode` is `"pre"`, after the execution if `mode` is `"post"`, or both if `mode` is `"all"`. -- `@capsula.reporter` decorator specifies the reporter to be used. The reporter is called after the execution of the decorated function if `mode` is `"post"`, or both before and after the execution if `mode` is `"all"`. -- `@capsula.watcher` decorator specifies the watcher to be used during the execution of the decorated function. -- `capsula.record` function records the value of the specified variable. The value is stored in the execution context. +
+Example of output pre-run-report.json: +
{
+  "command": {
+    "poetry check --lock": {
+      "command": "poetry check --lock",
+      "cwd": null,
+      "returncode": 0,
+      "stdout": "All set!\n",
+      "stderr": ""
+    },
+    "pip freeze --exclude-editable > requirements.txt": {
+      "command": "pip freeze --exclude-editable > requirements.txt",
+      "cwd": null,
+      "returncode": 0,
+      "stdout": "",
+      "stderr": ""
+    }
+  },
+  "file": {
+    "pyproject.toml": {
+      "copied_to": [
+        "vault/calculate_pi_20240225_221901_M7b3/pyproject.toml"
+      ],
+      "moved_to": null,
+      "hash": {
+        "algorithm": "sha256",
+        "digest": "6c59362587bf43411461b69675980ea338d83a468acddbc8f6cac4f2c17f7605"
+      }
+    },
+    "requirements.txt": {
+      "copied_to": [],
+      "moved_to": "vault/calculate_pi_20240225_221901_M7b3",
+      "hash": {
+        "algorithm": "sha256",
+        "digest": "99d0dbddd7f27aa25bd2d7ce3e2f4a555cdb48b039d73a6cf01fc5fa33f527e1"
+      }
+    }
+  },
+  "git": {
+    "capsula": {
+      "working_dir": "/home/nomura/ghq/github.com/shunichironomura/capsula",
+      "sha": "ff51cb6245e43253d036fcaa0b2af09c0089b783",
+      "remotes": {
+        "origin": "ssh://git@github.com/shunichironomura/capsula.git"
+      },
+      "branch": "improve-example",
+      "is_dirty": true
+    }
+  },
+  "cwd": "/home/nomura/ghq/github.com/shunichironomura/capsula",
+  "cpu": {
+    "python_version": "3.8.17.final.0 (64 bit)",
+    "cpuinfo_version": [
+      9,
+      0,
+      0
+    ],
+    "cpuinfo_version_string": "9.0.0",
+    "arch": "X86_64",
+    "bits": 64,
+    "count": 12,
+    "arch_string_raw": "x86_64",
+    "vendor_id_raw": "GenuineIntel",
+    "brand_raw": "Intel(R) Core(TM) i5-10400 CPU @ 2.90GHz",
+    "hz_advertised_friendly": "2.9000 GHz",
+    "hz_actual_friendly": "2.9040 GHz",
+    "hz_advertised": [
+      2900000000,
+      0
+    ],
+    "hz_actual": [
+      2904010000,
+      0
+    ],
+    "stepping": 5,
+    "model": 165,
+    "family": 6,
+    "flags": [
+      "3dnowprefetch",
+      "abm",
+      "adx",
+      "aes",
+      "apic",
+      "arch_capabilities",
+      "arch_perfmon",
+      "avx",
+      "avx2",
+      "bmi1",
+      "bmi2",
+      "clflush",
+      "clflushopt",
+      "cmov",
+      "constant_tsc",
+      "cpuid",
+      "cx16",
+      "cx8",
+      "de",
+      "ept",
+      "ept_ad",
+      "erms",
+      "f16c",
+      "flush_l1d",
+      "fma",
+      "fpu",
+      "fsgsbase",
+      "fxsr",
+      "ht",
+      "hypervisor",
+      "ibpb",
+      "ibrs",
+      "ibrs_enhanced",
+      "invpcid",
+      "invpcid_single",
+      "lahf_lm",
+      "lm",
+      "mca",
+      "mce",
+      "mmx",
+      "movbe",
+      "msr",
+      "mtrr",
+      "nopl",
+      "nx",
+      "osxsave",
+      "pae",
+      "pat",
+      "pcid",
+      "pclmulqdq",
+      "pdcm",
+      "pdpe1gb",
+      "pge",
+      "pni",
+      "popcnt",
+      "pse",
+      "pse36",
+      "rdrand",
+      "rdrnd",
+      "rdseed",
+      "rdtscp",
+      "rep_good",
+      "sep",
+      "smap",
+      "smep",
+      "ss",
+      "ssbd",
+      "sse",
+      "sse2",
+      "sse4_1",
+      "sse4_2",
+      "ssse3",
+      "stibp",
+      "syscall",
+      "tpr_shadow",
+      "tsc",
+      "vme",
+      "vmx",
+      "vnmi",
+      "vpid",
+      "x2apic",
+      "xgetbv1",
+      "xsave",
+      "xsavec",
+      "xsaveopt",
+      "xsaves",
+      "xtopology"
+    ],
+    "l3_cache_size": 12582912,
+    "l2_cache_size": "1.5 MiB",
+    "l1_data_cache_size": 196608,
+    "l1_instruction_cache_size": 196608,
+    "l2_cache_line_size": 256,
+    "l2_cache_associativity": 6
+  }
+}
+
+ +
+Example of output in-run-report.json: +
{
+  "function": {
+    "calculate_pi": {
+      "file_path": "examples/simple_decorator.py",
+      "first_line_no": 10,
+      "args": [],
+      "kwargs": {
+        "n_samples": 1000
+      }
+    }
+  },
+  "inside": 782,
+  "pi_estimate": 3.128,
+  "exception": {
+    "exception": {
+      "exc_type": null,
+      "exc_value": null,
+      "traceback": null
+    }
+  },
+  "time": {
+    "execution_time": "0:00:00.000658"
+  }
+}
+
+ +
+Example of output post-run-report.json: +
{
+  "file": {
+    "examples/pi.txt": {
+      "copied_to": [],
+      "moved_to": "vault/calculate_pi_20240225_221901_M7b3",
+      "hash": {
+        "algorithm": "sha256",
+        "digest": "a64c761cb6b6f9ef1bc1f6afa6ba44d796c5c51d14df0bdc9d3ab9ced7982a74"
+      }
+    }
+  }
+}
+
## Installation diff --git a/capsula.toml b/capsula.toml index f7cefe9e..ef6e8e41 100644 --- a/capsula.toml +++ b/capsula.toml @@ -1,73 +1,19 @@ -[[pre-run.context]] -type = "CommandContext" -command = "poetry check --lock" - -[[pre-run.context]] -type = "CommandContext" -command = "pip freeze --exclude-editable > requirements.txt" - -[[pre-run.context]] -type = "FileContext" -path = "pyproject.toml" -compute_hash = true -hash_algorithm = "sha256" -copy = true -move = false - -[[pre-run.context]] -type = "FileContext" -path = "poetry.lock" -compute_hash = true -hash_algorithm = "sha256" -copy = true -move = false - -[[pre-run.context]] -type = "FileContext" -path = "requirements.txt" -compute_hash = true -hash_algorithm = "sha256" -copy = false -move = true - -[[pre-run.context]] -type = "GitRepositoryContext" -name = "capsula" -allow_dirty = true - -[[pre-run.context]] -type = "EnvVarContext" -name = "HOME" - -[[pre-run.context]] -type = "CwdContext" - -[[pre-run.context]] -type = "CpuContext" - -[[pre-run.reporter]] -type = "JsonDumpReporter" - -[[in-run.watcher]] -type = "UncaughtExceptionWatcher" -name = "Exception" -reraise = false -# TODO: configure the base exception class - -[[in-run.watcher]] -type = "TimeWatcher" -name = "calculation_time" - -[[in-run.reporter]] -type = "JsonDumpReporter" - -[[post-run.context]] -type = "FileContext" -path = "examples/pi.txt" -compute_hash = true -hash_algorithm = "sha256" -copy = false -move = true - -[[post-run.reporter]] -type = "JsonDumpReporter" +[pre-run] +context = [ + { type = "CommandContext", command = "poetry check --lock" }, + { type = "CommandContext", command = "pip freeze --exclude-editable > requirements.txt" }, + { type = "FileContext", path = "pyproject.toml", copy = true }, + { type = "FileContext", path = "requirements.txt", move = true }, + { type = "GitRepositoryContext", name = "capsula" }, + { type = "CwdContext" }, + { type = "CpuContext" }, +] +reporter = [{ type = "JsonDumpReporter" }] + +[in-run] +watcher = [{ type = "UncaughtExceptionWatcher" }, { type = "TimeWatcher" }] +reporter = [{ type = "JsonDumpReporter" }] + +[post-run] +context = [{ type = "FileContext", path = "examples/pi.txt", move = true }] +reporter = [{ type = "JsonDumpReporter" }] diff --git a/capsula/_context/_command.py b/capsula/_context/_command.py index 0a8bc3db..01368de3 100644 --- a/capsula/_context/_command.py +++ b/capsula/_context/_command.py @@ -21,7 +21,7 @@ class _CommandContextData(TypedDict): class CommandContext(ContextBase): - def __init__(self, command: str, *, cwd: Path | None = None, check: bool = False) -> None: + def __init__(self, command: str, *, cwd: Path | None = None, check: bool = True) -> None: self.command = command self.cwd = cwd self.check = check diff --git a/capsula/_context/_git.py b/capsula/_context/_git.py index fc7a6e4f..a4ec0e9f 100644 --- a/capsula/_context/_git.py +++ b/capsula/_context/_git.py @@ -42,7 +42,7 @@ def __init__( path: Path | str, diff_file: Path | str | None = None, search_parent_directories: bool = False, - allow_dirty: bool = False, + allow_dirty: bool = True, ) -> None: self.name = name self.path = Path(path) diff --git a/capsula/_watcher/_exception.py b/capsula/_watcher/_exception.py index 19a13ed0..a2b7befd 100644 --- a/capsula/_watcher/_exception.py +++ b/capsula/_watcher/_exception.py @@ -15,7 +15,13 @@ class UncaughtExceptionWatcher(WatcherBase): - def __init__(self, name: str, *, base: type[BaseException] = Exception, reraise: bool = False) -> None: + def __init__( + self, + name: str = "exception", + *, + base: type[BaseException] = Exception, + reraise: bool = False, + ) -> None: self.name = name self.base = base self.reraise = reraise diff --git a/capsula/_watcher/_time.py b/capsula/_watcher/_time.py index c55e726d..f84aa383 100644 --- a/capsula/_watcher/_time.py +++ b/capsula/_watcher/_time.py @@ -15,7 +15,7 @@ class TimeWatcher(WatcherBase): - def __init__(self, name: str) -> None: + def __init__(self, name: str = "execution_time") -> None: self.name = name self.duration: timedelta | None = None diff --git a/capsula/encapsulator.py b/capsula/encapsulator.py index 7b7870d7..fbc07cbb 100644 --- a/capsula/encapsulator.py +++ b/capsula/encapsulator.py @@ -83,7 +83,7 @@ def add_watcher(self, watcher: WatcherBase, key: _CapsuleItemKey | None = None) raise KeyConflictError(key) self.watchers[key] = watcher - def encapsulate(self, *, abort_on_error: bool = False) -> Capsule: + def encapsulate(self, *, abort_on_error: bool = True) -> Capsule: data = {} fails = {} for key, capsule_item in chain(self.contexts.items(), self.watchers.items()): diff --git a/examples/simple_decorator.py b/examples/simple_decorator.py index d52d13ea..ec03e72e 100644 --- a/examples/simple_decorator.py +++ b/examples/simple_decorator.py @@ -6,29 +6,24 @@ import capsula -logger = logging.getLogger(__name__) - @capsula.run(load_from_config=True) -@capsula.pass_pre_run_capsule -def calculate_pi(pre_run_capsule: capsula.Capsule, *, n_samples: int = 1_000, seed: int = 42) -> None: - logger.info(f"Calculating pi with {n_samples} samples.") - logger.debug(f"Seed: {seed}") +def calculate_pi(n_samples: int = 1_000, seed: int = 42) -> None: random.seed(seed) xs = (random.random() for _ in range(n_samples)) # noqa: S311 ys = (random.random() for _ in range(n_samples)) # noqa: S311 inside = sum(x * x + y * y <= 1.0 for x, y in zip(xs, ys)) + # You can record values to the capsule using the `record` method. capsula.record("inside", inside) pi_estimate = (4.0 * inside) / n_samples - logger.info(f"Pi estimate: {pi_estimate}") + print(f"Pi estimate: {pi_estimate}") capsula.record("pi_estimate", pi_estimate) - # raise CapsulaError("This is a test error.") - logger.info(f"Run name: {capsula.current_run_name()}") + print(f"Run name: {capsula.current_run_name()}") with (Path(__file__).parent / "pi.txt").open("w") as output_file: - output_file.write(f"Pi estimate: {pi_estimate}. Git SHA: {pre_run_capsule.data[('git', 'capsula')]['sha']}") + output_file.write(f"Pi estimate: {pi_estimate}.") if __name__ == "__main__": diff --git a/pyproject.toml b/pyproject.toml index afccafa7..c9e64a07 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -117,6 +117,7 @@ keep-runtime-typing = true [tool.ruff.lint.per-file-ignores] "examples/*" = [ "INP001", # implicit-namespace-package + "T201", # print statement ] "capsula/_backport.py" = [ "ANN202", # Missing return type annotation