diff --git a/.github/workflows/build_docs.yaml b/.github/workflows/build_docs.yaml index b61071e33..c3c9dc7d6 100644 --- a/.github/workflows/build_docs.yaml +++ b/.github/workflows/build_docs.yaml @@ -25,25 +25,21 @@ jobs: - name: Install graphviz run: sudo apt-get install graphviz - - name: Set the VIRTUAL_ENV variable for uv to work - run: | - echo "VIRTUAL_ENV=${Python_ROOT_DIR}" >> $GITHUB_ENV - - name: Update pip/wheel infrastructure and install uv run: | python -m pip install --upgrade pip pip install uv - uv pip install wheel + uv pip install --system wheel - name: Install documenteer - run: uv pip install 'documenteer[pipelines]==0.8.2' + run: uv pip install --system 'documenteer[pipelines]==0.8.2' - name: Install dependencies run: | - uv pip install -r requirements.txt + uv pip install --system -r requirements.txt - name: Build and install - run: uv pip install --no-deps -v -e . + run: uv pip install --system --no-deps -v -e . - name: Build documentation working-directory: ./doc diff --git a/doc/changes/DM-44583.feature.md b/doc/changes/DM-44583.feature.md new file mode 100644 index 000000000..caa00fc9a --- /dev/null +++ b/doc/changes/DM-44583.feature.md @@ -0,0 +1 @@ +Add mocking support for tasks that write regular datasets with config, log, or metadata storage classes. diff --git a/python/lsst/pipe/base/tests/mocks/_pipeline_task.py b/python/lsst/pipe/base/tests/mocks/_pipeline_task.py index eb469517d..f0f8e7248 100644 --- a/python/lsst/pipe/base/tests/mocks/_pipeline_task.py +++ b/python/lsst/pipe/base/tests/mocks/_pipeline_task.py @@ -50,14 +50,19 @@ from lsst.utils.introspection import get_full_type_name from lsst.utils.iteration import ensure_iterable -from ... import automatic_connection_constants as acc from ... import connectionTypes as cT from ...config import PipelineTaskConfig from ...connections import InputQuantizedConnection, OutputQuantizedConnection, PipelineTaskConnections from ...pipeline_graph import PipelineGraph from ...pipelineTask import PipelineTask from ._data_id_match import DataIdMatch -from ._storage_class import MockDataset, MockDatasetQuantum, MockStorageClass, get_mock_name +from ._storage_class import ( + ConvertedUnmockedDataset, + MockDataset, + MockDatasetQuantum, + MockStorageClass, + get_mock_name, +) _LOG = logging.getLogger(__name__) @@ -107,7 +112,8 @@ def mock_pipeline_graph( Original tasks and configuration to mock. unmocked_dataset_types : `~collections.abc.Iterable` [ `str` ], optional Names of overall-input dataset types that should not be replaced with - mocks. + mocks. "Automatic" datasets written by the execution framework such + as configs, logs, and metadata are implicitly included. force_failures : `~collections.abc.Mapping` [ `str`, `ForcedFailure` ] Mapping from original task label to information about an exception one or more quanta for this task should raise. @@ -118,10 +124,15 @@ def mock_pipeline_graph( Pipeline graph using `MockPipelineTask` configurations that target the original tasks. Never resolved. """ - unmocked_dataset_types = tuple(unmocked_dataset_types) + unmocked_dataset_types = list(unmocked_dataset_types) if force_failures is None: force_failures = {} result = PipelineGraph(description=original_graph.description) + for task_node in original_graph.tasks.values(): + unmocked_dataset_types.append(task_node.init.config_output.dataset_type_name) + if task_node.log_output is not None: + unmocked_dataset_types.append(task_node.log_output.dataset_type_name) + unmocked_dataset_types.append(task_node.metadata_output.dataset_type_name) for original_task_node in original_graph.tasks.values(): config = MockPipelineTaskConfig() config.original.retarget(original_task_node.task_class) @@ -306,14 +317,15 @@ def runQuantum( input_dataset = butlerQC.get(ref) if isinstance(input_dataset, DeferredDatasetHandle): input_dataset = input_dataset.get() - if not isinstance(input_dataset, MockDataset): + if isinstance(input_dataset, MockDataset): + # To avoid very deep provenance we trim inputs to a + # single level. + input_dataset.quantum = None + elif not isinstance(input_dataset, ConvertedUnmockedDataset): raise TypeError( - f"Expected MockDataset instance for {ref}; " + f"Expected MockDataset or ConvertedUnmockedDataset instance for {ref}; " f"got {input_dataset!r} of type {type(input_dataset)!r}." ) - # To avoid very deep provenance we trim inputs to a single - # level. - input_dataset.quantum = None else: input_dataset = MockDataset( dataset_id=ref.id, @@ -381,22 +393,12 @@ def __init__(self, *, config: MockPipelineTaskConfig): self.unmocked_dataset_types = frozenset(config.unmocked_dataset_types) for name, connection in self.original.allConnections.items(): if connection.name not in self.unmocked_dataset_types: - if connection.storageClass in ( - acc.CONFIG_INIT_OUTPUT_STORAGE_CLASS, - acc.METADATA_OUTPUT_STORAGE_CLASS, - acc.LOG_OUTPUT_STORAGE_CLASS, - ): - # We don't mock the automatic output connections, so if - # they're used as an input in any other connection, we - # can't mock them there either. - storage_class_name = connection.storageClass - else: - # We register the mock storage class with the global - # singleton here, but can only put its name in the - # connection. That means the same global singleton (or one - # that also has these registrations) has to be available - # whenever this dataset type is used. - storage_class_name = MockStorageClass.get_or_register_mock(connection.storageClass).name + # We register the mock storage class with the global + # singleton here, but can only put its name in the + # connection. That means the same global singleton (or one + # that also has these registrations) has to be available + # whenever this dataset type is used. + storage_class_name = MockStorageClass.get_or_register_mock(connection.storageClass).name kwargs: dict[str, Any] = {} if hasattr(connection, "dimensions"): connection_dimensions = set(connection.dimensions) diff --git a/python/lsst/pipe/base/tests/mocks/_storage_class.py b/python/lsst/pipe/base/tests/mocks/_storage_class.py index 33eaa3e90..e1381b89e 100644 --- a/python/lsst/pipe/base/tests/mocks/_storage_class.py +++ b/python/lsst/pipe/base/tests/mocks/_storage_class.py @@ -28,6 +28,7 @@ from __future__ import annotations __all__ = ( + "ConvertedUnmockedDataset", "MockDataset", "MockStorageClass", "MockDatasetQuantum", @@ -229,6 +230,40 @@ def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any: return super().model_json_schema(*args, **kwargs) +class ConvertedUnmockedDataset(pydantic.BaseModel): + """A marker class that represents a conversion from a regular in-memory + dataset to a mock storage class. + """ + + original_type: str + """The full Python type of the original unmocked in-memory dataset.""" + + # Work around the fact that Sphinx chokes on Pydantic docstring formatting, + # when we inherit those docstrings in our public classes. + if "sphinx" in sys.modules: + + def copy(self, *args: Any, **kwargs: Any) -> Any: + """See `pydantic.BaseModel.copy`.""" + return super().copy(*args, **kwargs) + + def model_dump(self, *args: Any, **kwargs: Any) -> Any: + """See `pydantic.BaseModel.model_dump`.""" + return super().model_dump(*args, **kwargs) + + def model_dump_json(self, *args: Any, **kwargs: Any) -> Any: + """See `pydantic.BaseModel.model_dump_json`.""" + return super().model_dump(*args, **kwargs) + + def model_copy(self, *args: Any, **kwargs: Any) -> Any: + """See `pydantic.BaseModel.model_copy`.""" + return super().model_copy(*args, **kwargs) + + @classmethod + def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any: + """See `pydantic.BaseModel.model_json_schema`.""" + return super().model_json_schema(*args, **kwargs) + + class MockDatasetQuantum(pydantic.BaseModel): """Description of the quantum that produced a mock dataset. @@ -242,7 +277,7 @@ class MockDatasetQuantum(pydantic.BaseModel): data_id: dict[str, DataIdValue] """Data ID for the quantum.""" - inputs: dict[str, list[MockDataset]] + inputs: dict[str, list[MockDataset | ConvertedUnmockedDataset]] """Mock datasets provided as input to the quantum. Keys are task-internal connection names, not dataset type names. @@ -410,16 +445,17 @@ def derivedComponents(self) -> Mapping[str, MockStorageClass]: def can_convert(self, other: StorageClass) -> bool: # Docstring inherited. if not isinstance(other, MockStorageClass): - return False + # Allow conversions from an original type (and others compatible + # with it) to a mock, to allow for cases where an upstream task + # did not use a mock to write something but the downstream one is + # trying to us a mock to read it. + return self.original.can_convert(other) return self.original.can_convert(other.original) def coerce_type(self, incorrect: Any) -> Any: # Docstring inherited. if not isinstance(incorrect, MockDataset): - raise TypeError( - f"Mock storage class {self.name!r} can only convert in-memory datasets " - f"corresponding to other mock storage classes, not {incorrect!r}." - ) + return ConvertedUnmockedDataset(original_type=get_full_type_name(incorrect)) factory = StorageClassFactory() other_storage_class = factory.getStorageClass(incorrect.storage_class) assert isinstance(other_storage_class, MockStorageClass), "Should not get a MockDataset otherwise."