update default display engines (#16)

* replaced default display engines and renamed them to panel * replaced randomly generated tmp dir for datasets to hard coded one, to save downloading time * requirements.txt make local install instead of pip
guybuk · Sep 9, 2024 · fbc9dde · fbc9dde
1 parent c4b7762
commit fbc9dde
Show file tree

Hide file tree

Showing 15 changed files with 59 additions and 147 deletions.
diff --git a/bridge/display/text.py b/bridge/display/text.py
@@ -1,17 +1,19 @@
+from __future__ import annotations
+
 from typing import TYPE_CHECKING, Any, Dict
 
 import pandas as pd
 import panel as pn
 
+from bridge.display import DisplayEngine
+from bridge.primitives.dataset import SingularDataset
 from bridge.primitives.sample.singular_sample import SingularSample
 
 if TYPE_CHECKING:
-    from bridge.display import DisplayEngine
-    from bridge.primitives.dataset import SingularDataset
     from bridge.primitives.element.element import Element
 
 
-class TextClassification(DisplayEngine[SingularDataset, SingularSample]):
+class Panel(DisplayEngine[SingularDataset, SingularSample]):
     def show_element(self, element: Element, element_plot_kwargs: Dict[str, Any] | None = None):
         if element.etype == "class_label":
             return pn.pane.Markdown(element.to_pd_series().to_frame().T.to_markdown())
@@ -45,49 +47,3 @@ def plot_sample_by_widget(sample_id):
             return self.show_sample(dataset.get(sample_id), element_plot_kwargs, sample_plot_kwargs)
 
         return pn.Column(sample_ids_wig, plot_sample_by_widget)
-
-
-# class Panel(DisplayEngine):
-#     def show_element(self, element: Element, element_plot_kwargs: Dict[str, Any] | None = None):
-#         if element.etype == "class_label":
-#             return self._show_class_label(element, element_plot_kwargs)
-#         elif element.etype == "text":
-#             return self._show_text(element, element_plot_kwargs)
-#         else:
-#             raise NotImplementedError()
-#
-#     def show_sample(
-#         self,
-#         sample: Sample,
-#         element_plot_kwargs: Dict[str, Any] | None = None,
-#         sample_plot_kwargs: Dict[str, Any] | None = None,
-#     ):
-#         annotations_md = pd.DataFrame(
-#             [ann.to_pd_series() for ann in sample.elements["class_label"]]
-#         ).to_markdown()
-#         text_display = self.show_element(sample.elements["text"][0])
-#         return pn.Column("# Sample Text:", text_display, "# Annotations Data:", annotations_md)
-#
-#     def show_dataset(
-#         self,
-#         dataset: Dataset,
-#         element_plot_kwargs: Dict[str, Any] | None = None,
-#         sample_plot_kwargs: Dict[str, Any] | None = None,
-#         dataset_plot_kwargs: Dict[str, Any] | None = None,
-#     ):
-#         sample_ids = dataset.sample_ids
-#         sample_ids_wig = pn.widgets.DiscreteSlider(name="Sample ID", options=sample_ids, value=sample_ids[0])
-#
-#         @pn.depends(sample_ids_wig.param.value)
-#         def plot_sample_by_widget(sample_id):
-#             return self.show_sample(dataset.get(sample_id), element_plot_kwargs, sample_plot_kwargs)
-#
-#         return pn.Column(sample_ids_wig, plot_sample_by_widget)
-#
-#     @staticmethod
-#     def _show_class_label(element: Element, element_plot_kwargs: Dict[str, Any] | None):
-#         return pn.pane.Markdown(element.to_pd_series().to_frame().T.to_markdown())
-#
-#     @staticmethod
-#     def _show_text(element: Element, element_plot_kwargs: Dict[str, Any] | None):
-#         return pn.pane.Markdown(element.data)
diff --git a/bridge/display/vision.py b/bridge/display/vision.py
@@ -15,13 +15,13 @@
     from bridge.utils.data_objects import BoundingBox
 
 
-class Holoviews(DisplayEngine):
+class Panel(DisplayEngine):
     def __init__(self, bbox_format: str = "xyxy") -> None:
         assert bbox_format in ["xyxy", "xywh", "cxcywh"]
         self._bbox_format = bbox_format
-        self._validate_dependencies()
 
     def show_element(self, element: Element, element_plot_kwargs: Dict[str, Any] | None = None):
+        self._validate_dependencies()
         etype = element.etype
         if etype == "image":
             plot = self._plot_single_image(element)
@@ -39,6 +39,7 @@ def show_sample(
         element_plot_kwargs: Dict[str, Any] | None = None,
         sample_plot_kwargs: Dict[str, Any] | None = None,
     ):
+        self._validate_dependencies()
         import holoviews as hv
 
         imgs = [self._plot_single_image(element) for element in sample.elements["image"]]
@@ -61,6 +62,7 @@ def show_dataset(
         sample_plot_kwargs: Dict[str, Any] | None = None,
         dataset_plot_kwargs: Dict[str, Any] | None = None,
     ):
+        self._validate_dependencies()
         import panel as pn
 
         sample_ids = dataset.sample_ids

diff --git a/bridge/providers/text.py b/bridge/providers/text.py
@@ -4,7 +4,7 @@
 from pathlib import Path
 from typing import TYPE_CHECKING, Dict
 
-from bridge.display.basic import SimplePrints
+from bridge.display.text import Panel
 from bridge.primitives.dataset.singular_dataset import SingularDataset
 from bridge.primitives.element.data.load_mechanism import LoadMechanism
 from bridge.primitives.element.element import Element
@@ -22,7 +22,7 @@ class LargeMovieReviewDataset(DatasetProvider[SingularDataset, SingularSample]):
     dataset_url = "https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz"
 
     def __init__(self, root: str | os.PathLike, split: str = "train", download: bool = False):
-        root = Path(root)
+        root = Path(root).expanduser()
 
         if download:
             if (root / "aclImdb_v1.tar.gz").exists():
@@ -33,7 +33,7 @@ def __init__(self, root: str | os.PathLike, split: str = "train", download: bool
 
     def build_dataset(
         self,
-        display_engine: DisplayEngine[SingularDataset, SingularSample] = SimplePrints(),
+        display_engine: DisplayEngine[SingularDataset, SingularSample] = Panel(),
         cache_mechanisms: Dict[str, CacheMechanism] = None,
     ) -> SingularDataset:
         samples = []

diff --git a/bridge/providers/vision.py b/bridge/providers/vision.py
@@ -6,7 +6,7 @@
 
 import numpy as np
 
-from bridge.display.basic import SimplePrints
+from bridge.display.vision import Panel
 from bridge.primitives.dataset import SingularDataset
 from bridge.primitives.element.data.load_mechanism import LoadMechanism
 from bridge.primitives.element.element import Element
@@ -24,7 +24,9 @@ class ImageFolder(DatasetProvider[SingularDataset, SingularSample]):
     def __init__(self, root: str | os.PathLike):
         self._root = root
 
-    def build_dataset(self, display_engine: DisplayEngine = None, cache_mechanisms: Dict[str, CacheMechanism] = None):
+    def build_dataset(
+        self, display_engine: DisplayEngine = Panel(), cache_mechanisms: Dict[str, CacheMechanism] = None
+    ):
         images = []
         classes = []
         for i, class_dir in enumerate(sorted(Path(self._root).iterdir())):
@@ -90,7 +92,7 @@ def __init__(self, root: str | os.PathLike, split: str = "train", img_source: st
 
     def build_dataset(
         self,
-        display_engine: DisplayEngine = SimplePrints(),
+        display_engine: DisplayEngine = Panel(bbox_format="xywh"),
         cache_mechanisms: Dict[str, CacheMechanism] = None,
     ):
         img_id_list = list(sorted(self._coco.imgs.keys()))
@@ -144,7 +146,7 @@ def __init__(self, root: str | os.PathLike, train: bool = True, download: bool =
 
     def build_dataset(
         self,
-        display_engine: DisplayEngine = SimplePrints(),
+        display_engine: DisplayEngine = Panel(),
         cache_mechanisms: Dict[str, CacheMechanism | None] | None = None,
     ):
         sample_list = []

diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -1,3 +1,3 @@
-bridge-ds[dev,vision]
+.[dev,vision]
 pycocotools
 torch
diff --git a/docs/source/notebooks/vision/custom_data/dataset_provider.ipynb b/docs/source/notebooks/vision/custom_data/dataset_provider.ipynb
@@ -24,10 +24,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import tempfile\n",
     "from pathlib import Path\n",
     "\n",
-    "TMP_NOTEBOOK_ROOT = Path(tempfile.mkdtemp()) / \"custom_data\" / \"dataset_provider\""
+    "TMP_NOTEBOOK_ROOT = Path(\"/tmp/bridge-ds/tutorials\")"
    ]
   },
   {
@@ -403,7 +402,7 @@
     "4. Elements are the low-level object which contains raw data, by using a **LoadMechanism**.\n",
     "\n",
     "## Up Next\n",
-    "In this tutorial, we've used a primitive DisplayEngine called **SimplePrints**. If you would prefer a more sophisticated one like the Holoviews one in previous tutorials, continue to the next tutorial where we learn how to create our own **DisplayEngine** for a text dataset. "
+    "In this tutorial, we've used a primitive DisplayEngine called **SimplePrints**. If you would prefer a more sophisticated one like the Panel one in previous tutorials, continue to the next tutorial where we learn how to create our own **DisplayEngine** for a text dataset. "
    ]
   }
  ],

diff --git a/docs/source/notebooks/vision/custom_data/display_engine.ipynb b/docs/source/notebooks/vision/custom_data/display_engine.ipynb
@@ -23,10 +23,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import tempfile\n",
     "from pathlib import Path\n",
     "\n",
-    "TMP_NOTEBOOK_ROOT = Path(tempfile.mkdtemp()) / \"custom_data\" / \"display_engine\""
+    "TMP_NOTEBOOK_ROOT = Path(\"/tmp/bridge-ds/tutorials\")"
    ]
   },
   {

diff --git a/docs/source/notebooks/vision/custom_data/load_mechanism.ipynb b/docs/source/notebooks/vision/custom_data/load_mechanism.ipynb
@@ -23,19 +23,17 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import tempfile\n",
     "from pathlib import Path\n",
     "\n",
     "import holoviews as hv\n",
     "import panel as pn\n",
     "\n",
-    "from bridge.display.vision import Holoviews\n",
     "from bridge.providers.vision import Coco2017Detection\n",
     "\n",
     "hv.extension(\"bokeh\")\n",
     "pn.extension()\n",
     "\n",
-    "TMP_NOTEBOOK_ROOT = Path(tempfile.mkdtemp()) / \"custom_data\" / \"load_mechanism\""
+    "TMP_NOTEBOOK_ROOT = Path(\"/tmp/bridge-ds/tutorials\")"
    ]
   },
   {
@@ -56,7 +54,7 @@
     "root_dir = TMP_NOTEBOOK_ROOT / \"coco\"\n",
     "\n",
     "provider = Coco2017Detection(root_dir, split=\"val\", img_source=\"stream\")\n",
-    "ds = provider.build_dataset(display_engine=Holoviews(bbox_format=\"xywh\"))\n",
+    "ds = provider.build_dataset()\n",
     "ds"
    ]
   },

diff --git a/docs/source/notebooks/vision/fundamentals/coco_eda_demo.ipynb b/docs/source/notebooks/vision/fundamentals/coco_eda_demo.ipynb
@@ -23,7 +23,6 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import tempfile\n",
     "from pathlib import Path\n",
     "\n",
     "import holoviews as hv\n",
@@ -45,7 +44,7 @@
     "except ModuleNotFoundError:\n",
     "    pass\n",
     "\n",
-    "TMP_NOTEBOOK_ROOT = Path(tempfile.mkdtemp()) / \"basics\" / \"coco_eda_demo\""
+    "TMP_NOTEBOOK_ROOT = Path(\"/tmp/bridge-ds/tutorials\")"
    ]
   },
   {
@@ -71,7 +70,6 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from bridge.display.vision import Holoviews\n",
     "from bridge.providers.vision import Coco2017Detection\n",
     "\n",
     "root_dir = TMP_NOTEBOOK_ROOT / \"coco\"\n",
@@ -333,9 +331,7 @@
    "id": "27",
    "metadata": {},
    "source": [
-    "To gain a deeper understanding of the image and the size of the dining table annotation in question, we introduce DisplayEngines. These objects are injected into Datasets  Samples, and Elements, enabling us to manipulate the behavior of the `ds.show() / sample.show / element.show()` methods.\n",
-    "\n",
-    "By default, the **SimplePrints** engine is used. Let's switch to the **Holoviews** engine for enhanced visualization:"
+    "You can also call `ds.show()` to visualize the entire dataset instead of a single sample. You can freely scroll through using the slider and visualize different samples from the COCO, right in your notebook."
    ]
   },
   {
@@ -345,47 +341,19 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Datasets are immutable, so we'll build a new dataset from the existing provider\n",
-    "# with a new rendering engine, and then re-run the assignments we made.\n",
-    "\n",
-    "ds = provider.build_dataset(display_engine=Holoviews(bbox_format=\"xywh\"))\n",
-    "ds = ds.assign_annotations(\n",
-    "    data=lambda samples, anns: anns.data.apply(lambda bbox: map_bbox_class_names(bbox, classnames))\n",
-    ")\n",
-    "ds = ds.assign_samples(date_captured=lambda samples, anns: pd.to_datetime(samples.date_captured))\n",
-    "\n",
     "ds.show()"
    ]
   },
   {
    "cell_type": "markdown",
    "id": "29",
    "metadata": {},
-   "source": [
-    "Now, we have a more user-friendly way to observe our data. You can freely scroll through using the slider and visualize different samples from the COCO, right in your notebook.\n",
-    "\n",
-    "Next up, let's visualize the specific sample (400410) that piqued our interest:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "30",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "ds.get(sample_id).show()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "31",
-   "metadata": {},
    "source": [
     "## Sorting COCO dataset by bbox sizes\n",
-    "Upon inspection, it's evident that the `dining table` annotation encompasses the entire image.\n",
     "\n",
-    "To assess the frequency of such occurrences, let's render the samples in our dataset in descending order of annotation size.\n",
+    "Like we've seen in the previous section, it's evident that the `dining table` annotation covers the entire image.\n",
+    "\n",
+    "To assess the frequency of such occurrences, let's display the samples in our dataset in descending order of annotation size.\n",
     "\n",
     "To achieve this:\n",
     "1. Assign a new column to `ds.samples` representing the area value of its largest annotation.\n",
@@ -396,7 +364,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "32",
+   "id": "30",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -419,7 +387,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "33",
+   "id": "31",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -428,15 +396,15 @@
   },
   {
    "cell_type": "markdown",
-   "id": "34",
+   "id": "32",
    "metadata": {},
    "source": [
     "By scrolling the slider, we observe images with very large annotations on the left, followed by images with very small annotations, and then images without annotations on the right."
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "35",
+   "id": "33",
    "metadata": {},
    "source": [
     "## Filtering out images with large bboxes\n",
@@ -446,7 +414,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "36",
+   "id": "34",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -458,7 +426,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "37",
+   "id": "35",
    "metadata": {},
    "source": [
     "For completeness, let's plot the KDE from before on `ds_smaller`:"
@@ -467,7 +435,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "38",
+   "id": "36",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -481,7 +449,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "39",
+   "id": "37",
    "metadata": {},
    "source": [
     "As we can see, there's still a leftward squeezing - although significantly less than before. We've gained some insight into the distribution of our bbox sizes, but there's always more to do. Feel free to change the bbox area threshold to something even smaller, or plot this KDE for individual classes (rather than all of them), etc."