Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-41605: Command-line aggregator for pipetask report #302

Merged
merged 3 commits into from
Oct 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions doc/changes/DM-41605.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Aggregate multiple `pipetask report` outputs into one wholistic `Summary`.

While the `QuantumProvenanceGraph` was designed to resolve processing over
dataquery-identified groups, `pipetask aggregate-reports` is designed to
combine multiple group-level reports into one which totals the successes,
issues and failures over the same section of pipeline.
14 changes: 13 additions & 1 deletion python/lsst/ctrl/mpexec/cli/cmd/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.

__all__ = [
"aggregate_reports",
"build",
"cleanup",
"pre_exec_init_qbb",
Expand All @@ -38,4 +39,15 @@
]


from .commands import build, cleanup, pre_exec_init_qbb, purge, qgraph, report, run, run_qbb, update_graph_run
from .commands import (
aggregate_reports,
build,
cleanup,
pre_exec_init_qbb,
purge,
qgraph,
report,
run,
run_qbb,
update_graph_run,
)
34 changes: 34 additions & 0 deletions python/lsst/ctrl/mpexec/cli/cmd/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,3 +416,37 @@ def report(
else:
assert len(qgraphs) == 1, "Cannot make a report without a quantum graph."
script.report(repo, qgraphs[0], full_output_filename, logs, brief)


@click.command(cls=PipetaskCommand)
@click.argument("filenames", nargs=-1)
@click.option(
"--full-output-filename",
default="",
help="Output report as a file with this name (json).",
)
@click.option(
"--brief",
default=False,
is_flag=True,
help="Only show counts in report (a brief summary). Note that counts are"
" also printed to the screen when using the --full-output-filename option.",
)
def aggregate_reports(
filenames: Sequence[str], full_output_filename: str | None, brief: bool = False
) -> None:
"""Aggregate pipetask report output on disjoint data-id groups into one
Summary over common tasks and datasets. Intended for use when the same
pipeline has been run over all groups (i.e., to aggregate all reports
for a given step). This functionality is only compatible with reports
from the `QuantumProvenanceGraph`, so the reports must be run over multiple
groups or with the `--force-v2` option.

Save the report as a file (`--full-output-filename`) or print it to stdout
(default). If the terminal is overwhelmed with data_ids from failures try
the `--brief` option.

FILENAMES are the space-separated paths to json file output created by
pipetask report.
"""
script.aggregate_reports(filenames, full_output_filename, brief)
2 changes: 1 addition & 1 deletion python/lsst/ctrl/mpexec/cli/script/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
from .pre_exec_init_qbb import pre_exec_init_qbb
from .purge import PurgeResult, purge
from .qgraph import qgraph
from .report import report, report_v2
from .report import aggregate_reports, report, report_v2
from .run import run
from .run_qbb import run_qbb
from .update_graph_run import update_graph_run
33 changes: 33 additions & 0 deletions python/lsst/ctrl/mpexec/cli/script/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,39 @@ def report_v2(
print_summary(summary, full_output_filename, brief)


def aggregate_reports(
filenames: Sequence[str], full_output_filename: str | None, brief: bool = False
) -> None:
"""Aggregrate multiple `QuantumProvenanceGraph` summaries on separate
dataquery-identified groups into one wholistic report. This is intended for
reports over the same tasks in the same pipeline, after `pipetask report`
has been resolved over all graphs associated with each group.

Parameters
----------
filenames : `Sequence[str]`
The paths to the JSON files produced by `pipetask report` (note: this
is only compatible with the multi-graph or `--force-v2` option). These
files correspond to the `QuantumProvenanceGraph.Summary` objects which
are produced for each group.
full_output_filename : `str | None`
The name of the JSON file in which to store the aggregate report, if
passed. This is passed to `print_summary` at the end of this function.
brief : `bool = False`
Only display short (counts-only) summary on stdout. This includes
counts and not error messages or data_ids (similar to BPS report).
This option will still report all `cursed` datasets and `wonky`
quanta. This is passed to `print_summary` at the end of this function.
"""
summaries: list[Summary] = []
for filename in filenames:
with open(filename) as f:
model = Summary.model_validate_json(f.read())
summaries.extend([model])
result = Summary.aggregate(summaries)
print_summary(result, full_output_filename, brief)


def print_summary(summary: Summary, full_output_filename: str | None, brief: bool = False) -> None:
"""Take a `QuantumProvenanceGraph.Summary` object and write it to a file
and/or the screen.
Expand Down
Loading
Loading