Skip to content

Commit

Permalink
Merge pull request #302 from lsst/tickets/DM-41605
Browse files Browse the repository at this point in the history
DM-41605: Command-line aggregator for pipetask report
  • Loading branch information
eigerx authored Oct 16, 2024
2 parents 28b775e + 46f434f commit 3abcffb
Show file tree
Hide file tree
Showing 6 changed files with 242 additions and 287 deletions.
6 changes: 6 additions & 0 deletions doc/changes/DM-41605.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Aggregate multiple `pipetask report` outputs into one wholistic `Summary`.

While the `QuantumProvenanceGraph` was designed to resolve processing over
dataquery-identified groups, `pipetask aggregate-reports` is designed to
combine multiple group-level reports into one which totals the successes,
issues and failures over the same section of pipeline.
14 changes: 13 additions & 1 deletion python/lsst/ctrl/mpexec/cli/cmd/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.

__all__ = [
"aggregate_reports",
"build",
"cleanup",
"pre_exec_init_qbb",
Expand All @@ -38,4 +39,15 @@
]


from .commands import build, cleanup, pre_exec_init_qbb, purge, qgraph, report, run, run_qbb, update_graph_run
from .commands import (
aggregate_reports,
build,
cleanup,
pre_exec_init_qbb,
purge,
qgraph,
report,
run,
run_qbb,
update_graph_run,
)
34 changes: 34 additions & 0 deletions python/lsst/ctrl/mpexec/cli/cmd/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,3 +416,37 @@ def report(
else:
assert len(qgraphs) == 1, "Cannot make a report without a quantum graph."
script.report(repo, qgraphs[0], full_output_filename, logs, brief)


@click.command(cls=PipetaskCommand)
@click.argument("filenames", nargs=-1)
@click.option(
"--full-output-filename",
default="",
help="Output report as a file with this name (json).",
)
@click.option(
"--brief",
default=False,
is_flag=True,
help="Only show counts in report (a brief summary). Note that counts are"
" also printed to the screen when using the --full-output-filename option.",
)
def aggregate_reports(
filenames: Sequence[str], full_output_filename: str | None, brief: bool = False
) -> None:
"""Aggregate pipetask report output on disjoint data-id groups into one
Summary over common tasks and datasets. Intended for use when the same
pipeline has been run over all groups (i.e., to aggregate all reports
for a given step). This functionality is only compatible with reports
from the `QuantumProvenanceGraph`, so the reports must be run over multiple
groups or with the `--force-v2` option.
Save the report as a file (`--full-output-filename`) or print it to stdout
(default). If the terminal is overwhelmed with data_ids from failures try
the `--brief` option.
FILENAMES are the space-separated paths to json file output created by
pipetask report.
"""
script.aggregate_reports(filenames, full_output_filename, brief)
2 changes: 1 addition & 1 deletion python/lsst/ctrl/mpexec/cli/script/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
from .pre_exec_init_qbb import pre_exec_init_qbb
from .purge import PurgeResult, purge
from .qgraph import qgraph
from .report import report, report_v2
from .report import aggregate_reports, report, report_v2
from .run import run
from .run_qbb import run_qbb
from .update_graph_run import update_graph_run
33 changes: 33 additions & 0 deletions python/lsst/ctrl/mpexec/cli/script/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,39 @@ def report_v2(
print_summary(summary, full_output_filename, brief)


def aggregate_reports(
filenames: Sequence[str], full_output_filename: str | None, brief: bool = False
) -> None:
"""Aggregrate multiple `QuantumProvenanceGraph` summaries on separate
dataquery-identified groups into one wholistic report. This is intended for
reports over the same tasks in the same pipeline, after `pipetask report`
has been resolved over all graphs associated with each group.
Parameters
----------
filenames : `Sequence[str]`
The paths to the JSON files produced by `pipetask report` (note: this
is only compatible with the multi-graph or `--force-v2` option). These
files correspond to the `QuantumProvenanceGraph.Summary` objects which
are produced for each group.
full_output_filename : `str | None`
The name of the JSON file in which to store the aggregate report, if
passed. This is passed to `print_summary` at the end of this function.
brief : `bool = False`
Only display short (counts-only) summary on stdout. This includes
counts and not error messages or data_ids (similar to BPS report).
This option will still report all `cursed` datasets and `wonky`
quanta. This is passed to `print_summary` at the end of this function.
"""
summaries: list[Summary] = []
for filename in filenames:
with open(filename) as f:
model = Summary.model_validate_json(f.read())
summaries.extend([model])
result = Summary.aggregate(summaries)
print_summary(result, full_output_filename, brief)


def print_summary(summary: Summary, full_output_filename: str | None, brief: bool = False) -> None:
"""Take a `QuantumProvenanceGraph.Summary` object and write it to a file
and/or the screen.
Expand Down
Loading

0 comments on commit 3abcffb

Please sign in to comment.