Merge pull request #302 from lsst/tickets/DM-41605

DM-41605: Command-line aggregator for pipetask report
lsst · Oct 16, 2024 · 3abcffb · 3abcffb
2 parents 28b775e + 46f434f
commit 3abcffb
Show file tree

Hide file tree

Showing 6 changed files with 242 additions and 287 deletions.
diff --git a/doc/changes/DM-41605.feature.md b/doc/changes/DM-41605.feature.md
@@ -0,0 +1,6 @@
+Aggregate multiple `pipetask report` outputs into one wholistic `Summary`.
+
+While the `QuantumProvenanceGraph` was designed to resolve processing over
+dataquery-identified groups, `pipetask aggregate-reports` is designed to
+combine multiple group-level reports into one which totals the successes,
+issues and failures over the same section of pipeline.
diff --git a/python/lsst/ctrl/mpexec/cli/cmd/__init__.py b/python/lsst/ctrl/mpexec/cli/cmd/__init__.py
@@ -26,6 +26,7 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 __all__ = [
+    "aggregate_reports",
     "build",
     "cleanup",
     "pre_exec_init_qbb",
@@ -38,4 +39,15 @@
 ]
 
 
-from .commands import build, cleanup, pre_exec_init_qbb, purge, qgraph, report, run, run_qbb, update_graph_run
+from .commands import (
+    aggregate_reports,
+    build,
+    cleanup,
+    pre_exec_init_qbb,
+    purge,
+    qgraph,
+    report,
+    run,
+    run_qbb,
+    update_graph_run,
+)
diff --git a/python/lsst/ctrl/mpexec/cli/cmd/commands.py b/python/lsst/ctrl/mpexec/cli/cmd/commands.py
@@ -416,3 +416,37 @@ def report(
     else:
         assert len(qgraphs) == 1, "Cannot make a report without a quantum graph."
         script.report(repo, qgraphs[0], full_output_filename, logs, brief)
+
+
+@click.command(cls=PipetaskCommand)
+@click.argument("filenames", nargs=-1)
+@click.option(
+    "--full-output-filename",
+    default="",
+    help="Output report as a file with this name (json).",
+)
+@click.option(
+    "--brief",
+    default=False,
+    is_flag=True,
+    help="Only show counts in report (a brief summary). Note that counts are"
+    " also printed to the screen when using the --full-output-filename option.",
+)
+def aggregate_reports(
+    filenames: Sequence[str], full_output_filename: str | None, brief: bool = False
+) -> None:
+    """Aggregate pipetask report output on disjoint data-id groups into one
+    Summary over common tasks and datasets. Intended for use when the same
+    pipeline has been run over all groups (i.e., to aggregate all reports
+    for a given step). This functionality is only compatible with reports
+    from the `QuantumProvenanceGraph`, so the reports must be run over multiple
+    groups or with the `--force-v2` option.
+
+    Save the report as a file (`--full-output-filename`) or print it to stdout
+    (default). If the terminal is overwhelmed with data_ids from failures try
+    the `--brief` option.
+
+    FILENAMES are the space-separated paths to json file output created by
+    pipetask report.
+    """
+    script.aggregate_reports(filenames, full_output_filename, brief)
diff --git a/python/lsst/ctrl/mpexec/cli/script/__init__.py b/python/lsst/ctrl/mpexec/cli/script/__init__.py
@@ -31,7 +31,7 @@
 from .pre_exec_init_qbb import pre_exec_init_qbb
 from .purge import PurgeResult, purge
 from .qgraph import qgraph
-from .report import report, report_v2
+from .report import aggregate_reports, report, report_v2
 from .run import run
 from .run_qbb import run_qbb
 from .update_graph_run import update_graph_run
diff --git a/python/lsst/ctrl/mpexec/cli/script/report.py b/python/lsst/ctrl/mpexec/cli/script/report.py
@@ -194,6 +194,39 @@ def report_v2(
     print_summary(summary, full_output_filename, brief)
 
 
+def aggregate_reports(
+    filenames: Sequence[str], full_output_filename: str | None, brief: bool = False
+) -> None:
+    """Aggregrate multiple `QuantumProvenanceGraph` summaries on separate
+    dataquery-identified groups into one wholistic report. This is intended for
+    reports over the same tasks in the same pipeline, after `pipetask report`
+    has been resolved over all graphs associated with each group.
+
+    Parameters
+    ----------
+    filenames : `Sequence[str]`
+        The paths to the JSON files produced by `pipetask report` (note: this
+        is only compatible with the multi-graph or `--force-v2` option). These
+        files correspond to the `QuantumProvenanceGraph.Summary` objects which
+        are produced for each group.
+    full_output_filename : `str | None`
+        The name of the JSON file in which to store the aggregate report, if
+        passed. This is passed to `print_summary` at the end of this function.
+    brief : `bool = False`
+        Only display short (counts-only) summary on stdout. This includes
+        counts and not error messages or data_ids (similar to BPS report).
+        This option will still report all `cursed` datasets and `wonky`
+        quanta. This is passed to `print_summary` at the end of this function.
+    """
+    summaries: list[Summary] = []
+    for filename in filenames:
+        with open(filename) as f:
+            model = Summary.model_validate_json(f.read())
+            summaries.extend([model])
+    result = Summary.aggregate(summaries)
+    print_summary(result, full_output_filename, brief)
+
+
 def print_summary(summary: Summary, full_output_filename: str | None, brief: bool = False) -> None:
     """Take a `QuantumProvenanceGraph.Summary` object and write it to a file
     and/or the screen.