lsst · TallJimbo · May 30, 2024 · May 29, 2024 · May 29, 2024 · May 30, 2024
diff --git a/.github/workflows/build_docs.yaml b/.github/workflows/build_docs.yaml
@@ -25,25 +25,21 @@ jobs:
       - name: Install graphviz
         run: sudo apt-get install graphviz
 
-      - name: Set the VIRTUAL_ENV variable for uv to work
-        run: |
-          echo "VIRTUAL_ENV=${Python_ROOT_DIR}" >> $GITHUB_ENV
-
       - name: Update pip/wheel infrastructure and install uv
         run: |
           python -m pip install --upgrade pip
           pip install uv
-          uv pip install wheel
+          uv pip install --system wheel
 
       - name: Install documenteer
-        run: uv pip install 'documenteer[pipelines]==0.8.2'
+        run: uv pip install --system 'documenteer[pipelines]==0.8.2'
 
       - name: Install dependencies
         run: |
-          uv pip install -r requirements.txt
+          uv pip install --system -r requirements.txt
 
       - name: Build and install
-        run: uv pip install --no-deps -v -e .
+        run: uv pip install --system --no-deps -v -e .
 
       - name: Build documentation
         working-directory: ./doc

diff --git a/doc/changes/DM-44583.feature.md b/doc/changes/DM-44583.feature.md
@@ -0,0 +1 @@
+Add mocking support for tasks that write regular datasets with config, log, or metadata storage classes.
diff --git a/python/lsst/pipe/base/tests/mocks/_pipeline_task.py b/python/lsst/pipe/base/tests/mocks/_pipeline_task.py
@@ -50,14 +50,19 @@
 from lsst.utils.introspection import get_full_type_name
 from lsst.utils.iteration import ensure_iterable
 
-from ... import automatic_connection_constants as acc
 from ... import connectionTypes as cT
 from ...config import PipelineTaskConfig
 from ...connections import InputQuantizedConnection, OutputQuantizedConnection, PipelineTaskConnections
 from ...pipeline_graph import PipelineGraph
 from ...pipelineTask import PipelineTask
 from ._data_id_match import DataIdMatch
-from ._storage_class import MockDataset, MockDatasetQuantum, MockStorageClass, get_mock_name
+from ._storage_class import (
+    ConvertedUnmockedDataset,
+    MockDataset,
+    MockDatasetQuantum,
+    MockStorageClass,
+    get_mock_name,
+)
 
 _LOG = logging.getLogger(__name__)
 
@@ -107,7 +112,8 @@
         Original tasks and configuration to mock.
     unmocked_dataset_types : `~collections.abc.Iterable` [ `str` ], optional
         Names of overall-input dataset types that should not be replaced with
-        mocks.
+        mocks.  "Automatic" datasets written by the execution framework such
+        as configs, logs, and metadata are implicitly included.
     force_failures : `~collections.abc.Mapping` [ `str`, `ForcedFailure` ]
         Mapping from original task label to information about an exception one
         or more quanta for this task should raise.
@@ -118,10 +124,15 @@
         Pipeline graph using `MockPipelineTask` configurations that target the
         original tasks.  Never resolved.
     """
-    unmocked_dataset_types = tuple(unmocked_dataset_types)
+    unmocked_dataset_types = list(unmocked_dataset_types)
     if force_failures is None:
         force_failures = {}
     result = PipelineGraph(description=original_graph.description)
+    for task_node in original_graph.tasks.values():
+        unmocked_dataset_types.append(task_node.init.config_output.dataset_type_name)
+        if task_node.log_output is not None:
+            unmocked_dataset_types.append(task_node.log_output.dataset_type_name)
+        unmocked_dataset_types.append(task_node.metadata_output.dataset_type_name)
     for original_task_node in original_graph.tasks.values():
         config = MockPipelineTaskConfig()
         config.original.retarget(original_task_node.task_class)
@@ -306,14 +317,15 @@
                     input_dataset = butlerQC.get(ref)
                     if isinstance(input_dataset, DeferredDatasetHandle):
                         input_dataset = input_dataset.get()
-                    if not isinstance(input_dataset, MockDataset):
+                    if isinstance(input_dataset, MockDataset):
+                        # To avoid very deep provenance we trim inputs to a
+                        # single level.
+                        input_dataset.quantum = None
+                    elif not isinstance(input_dataset, ConvertedUnmockedDataset):
                         raise TypeError(
-                            f"Expected MockDataset instance for {ref}; "
+                            f"Expected MockDataset or ConvertedUnmockedDataset instance for {ref}; "
                             f"got {input_dataset!r} of type {type(input_dataset)!r}."
                         )
-                    # To avoid very deep provenance we trim inputs to a single
-                    # level.
-                    input_dataset.quantum = None
                 else:
                     input_dataset = MockDataset(
                         dataset_id=ref.id,
@@ -381,22 +393,12 @@
         self.unmocked_dataset_types = frozenset(config.unmocked_dataset_types)
         for name, connection in self.original.allConnections.items():
             if connection.name not in self.unmocked_dataset_types:
-                if connection.storageClass in (
-                    acc.CONFIG_INIT_OUTPUT_STORAGE_CLASS,
-                    acc.METADATA_OUTPUT_STORAGE_CLASS,
-                    acc.LOG_OUTPUT_STORAGE_CLASS,
-                ):
-                    # We don't mock the automatic output connections, so if
-                    # they're used as an input in any other connection, we
-                    # can't mock them there either.
-                    storage_class_name = connection.storageClass
-                else:
-                    # We register the mock storage class with the global
-                    # singleton here, but can only put its name in the
-                    # connection. That means the same global singleton (or one
-                    # that also has these registrations) has to be available
-                    # whenever this dataset type is used.
-                    storage_class_name = MockStorageClass.get_or_register_mock(connection.storageClass).name
+                # We register the mock storage class with the global
+                # singleton here, but can only put its name in the
+                # connection. That means the same global singleton (or one
+                # that also has these registrations) has to be available
+                # whenever this dataset type is used.
+                storage_class_name = MockStorageClass.get_or_register_mock(connection.storageClass).name
                 kwargs: dict[str, Any] = {}
                 if hasattr(connection, "dimensions"):
                     connection_dimensions = set(connection.dimensions)

diff --git a/python/lsst/pipe/base/tests/mocks/_storage_class.py b/python/lsst/pipe/base/tests/mocks/_storage_class.py
@@ -28,6 +28,7 @@
 from __future__ import annotations
 
 __all__ = (
+    "ConvertedUnmockedDataset",
     "MockDataset",
     "MockStorageClass",
     "MockDatasetQuantum",
@@ -229,6 +230,40 @@
             return super().model_json_schema(*args, **kwargs)
 
 
+class ConvertedUnmockedDataset(pydantic.BaseModel):
+    """A marker class that represents a conversion from a regular in-memory
+    dataset to a mock storage class.
+    """
+
+    original_type: str
+    """The full Python type of the original unmocked in-memory dataset."""
+
+    # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
+    # when we inherit those docstrings in our public classes.
+    if "sphinx" in sys.modules:
+
+        def copy(self, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.copy`."""
+            return super().copy(*args, **kwargs)
+
+        def model_dump(self, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_dump`."""
+            return super().model_dump(*args, **kwargs)
+
+        def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_dump_json`."""
+            return super().model_dump(*args, **kwargs)
+
+        def model_copy(self, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_copy`."""
+            return super().model_copy(*args, **kwargs)
+
+        @classmethod
+        def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_json_schema`."""
+            return super().model_json_schema(*args, **kwargs)
+
+
 class MockDatasetQuantum(pydantic.BaseModel):
     """Description of the quantum that produced a mock dataset.
 
@@ -242,7 +277,7 @@
     data_id: dict[str, DataIdValue]
     """Data ID for the quantum."""
 
-    inputs: dict[str, list[MockDataset]]
+    inputs: dict[str, list[MockDataset | ConvertedUnmockedDataset]]
     """Mock datasets provided as input to the quantum.
 
     Keys are task-internal connection names, not dataset type names.
@@ -410,16 +445,17 @@
     def can_convert(self, other: StorageClass) -> bool:
         # Docstring inherited.
         if not isinstance(other, MockStorageClass):
-            return False
+            # Allow conversions from an original type (and others compatible
+            # with it) to a mock, to allow for cases where an upstream task
+            # did not use a mock to write something but the downstream one is
+            # trying to us a mock to read it.
+            return self.original.can_convert(other)
         return self.original.can_convert(other.original)
 
     def coerce_type(self, incorrect: Any) -> Any:
         # Docstring inherited.
         if not isinstance(incorrect, MockDataset):
-            raise TypeError(
-                f"Mock storage class {self.name!r} can only convert in-memory datasets "
-                f"corresponding to other mock storage classes, not {incorrect!r}."
-            )
+            return ConvertedUnmockedDataset(original_type=get_full_type_name(incorrect))
         factory = StorageClassFactory()
         other_storage_class = factory.getStorageClass(incorrect.storage_class)
         assert isinstance(other_storage_class, MockStorageClass), "Should not get a MockDataset otherwise."
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Add mocking support for tasks that write regular datasets with config, log, or metadata storage classes.