Skip to content

Commit

Permalink
update default display engines (#16)
Browse files Browse the repository at this point in the history
* replaced default display engines and renamed them to panel

* replaced randomly generated tmp dir for datasets to hard coded one, to save downloading time

* requirements.txt make local install instead of pip
  • Loading branch information
guybuk authored Sep 9, 2024
1 parent c4b7762 commit fbc9dde
Show file tree
Hide file tree
Showing 15 changed files with 59 additions and 147 deletions.
54 changes: 5 additions & 49 deletions bridge/display/text.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
from __future__ import annotations

from typing import TYPE_CHECKING, Any, Dict

import pandas as pd
import panel as pn

from bridge.display import DisplayEngine
from bridge.primitives.dataset import SingularDataset
from bridge.primitives.sample.singular_sample import SingularSample

if TYPE_CHECKING:
from bridge.display import DisplayEngine
from bridge.primitives.dataset import SingularDataset
from bridge.primitives.element.element import Element


class TextClassification(DisplayEngine[SingularDataset, SingularSample]):
class Panel(DisplayEngine[SingularDataset, SingularSample]):
def show_element(self, element: Element, element_plot_kwargs: Dict[str, Any] | None = None):
if element.etype == "class_label":
return pn.pane.Markdown(element.to_pd_series().to_frame().T.to_markdown())
Expand Down Expand Up @@ -45,49 +47,3 @@ def plot_sample_by_widget(sample_id):
return self.show_sample(dataset.get(sample_id), element_plot_kwargs, sample_plot_kwargs)

return pn.Column(sample_ids_wig, plot_sample_by_widget)


# class Panel(DisplayEngine):
# def show_element(self, element: Element, element_plot_kwargs: Dict[str, Any] | None = None):
# if element.etype == "class_label":
# return self._show_class_label(element, element_plot_kwargs)
# elif element.etype == "text":
# return self._show_text(element, element_plot_kwargs)
# else:
# raise NotImplementedError()
#
# def show_sample(
# self,
# sample: Sample,
# element_plot_kwargs: Dict[str, Any] | None = None,
# sample_plot_kwargs: Dict[str, Any] | None = None,
# ):
# annotations_md = pd.DataFrame(
# [ann.to_pd_series() for ann in sample.elements["class_label"]]
# ).to_markdown()
# text_display = self.show_element(sample.elements["text"][0])
# return pn.Column("# Sample Text:", text_display, "# Annotations Data:", annotations_md)
#
# def show_dataset(
# self,
# dataset: Dataset,
# element_plot_kwargs: Dict[str, Any] | None = None,
# sample_plot_kwargs: Dict[str, Any] | None = None,
# dataset_plot_kwargs: Dict[str, Any] | None = None,
# ):
# sample_ids = dataset.sample_ids
# sample_ids_wig = pn.widgets.DiscreteSlider(name="Sample ID", options=sample_ids, value=sample_ids[0])
#
# @pn.depends(sample_ids_wig.param.value)
# def plot_sample_by_widget(sample_id):
# return self.show_sample(dataset.get(sample_id), element_plot_kwargs, sample_plot_kwargs)
#
# return pn.Column(sample_ids_wig, plot_sample_by_widget)
#
# @staticmethod
# def _show_class_label(element: Element, element_plot_kwargs: Dict[str, Any] | None):
# return pn.pane.Markdown(element.to_pd_series().to_frame().T.to_markdown())
#
# @staticmethod
# def _show_text(element: Element, element_plot_kwargs: Dict[str, Any] | None):
# return pn.pane.Markdown(element.data)
6 changes: 4 additions & 2 deletions bridge/display/vision.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@
from bridge.utils.data_objects import BoundingBox


class Holoviews(DisplayEngine):
class Panel(DisplayEngine):
def __init__(self, bbox_format: str = "xyxy") -> None:
assert bbox_format in ["xyxy", "xywh", "cxcywh"]
self._bbox_format = bbox_format
self._validate_dependencies()

def show_element(self, element: Element, element_plot_kwargs: Dict[str, Any] | None = None):
self._validate_dependencies()
etype = element.etype
if etype == "image":
plot = self._plot_single_image(element)
Expand All @@ -39,6 +39,7 @@ def show_sample(
element_plot_kwargs: Dict[str, Any] | None = None,
sample_plot_kwargs: Dict[str, Any] | None = None,
):
self._validate_dependencies()
import holoviews as hv

imgs = [self._plot_single_image(element) for element in sample.elements["image"]]
Expand All @@ -61,6 +62,7 @@ def show_dataset(
sample_plot_kwargs: Dict[str, Any] | None = None,
dataset_plot_kwargs: Dict[str, Any] | None = None,
):
self._validate_dependencies()
import panel as pn

sample_ids = dataset.sample_ids
Expand Down
6 changes: 3 additions & 3 deletions bridge/providers/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from pathlib import Path
from typing import TYPE_CHECKING, Dict

from bridge.display.basic import SimplePrints
from bridge.display.text import Panel
from bridge.primitives.dataset.singular_dataset import SingularDataset
from bridge.primitives.element.data.load_mechanism import LoadMechanism
from bridge.primitives.element.element import Element
Expand All @@ -22,7 +22,7 @@ class LargeMovieReviewDataset(DatasetProvider[SingularDataset, SingularSample]):
dataset_url = "https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz"

def __init__(self, root: str | os.PathLike, split: str = "train", download: bool = False):
root = Path(root)
root = Path(root).expanduser()

if download:
if (root / "aclImdb_v1.tar.gz").exists():
Expand All @@ -33,7 +33,7 @@ def __init__(self, root: str | os.PathLike, split: str = "train", download: bool

def build_dataset(
self,
display_engine: DisplayEngine[SingularDataset, SingularSample] = SimplePrints(),
display_engine: DisplayEngine[SingularDataset, SingularSample] = Panel(),
cache_mechanisms: Dict[str, CacheMechanism] = None,
) -> SingularDataset:
samples = []
Expand Down
10 changes: 6 additions & 4 deletions bridge/providers/vision.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import numpy as np

from bridge.display.basic import SimplePrints
from bridge.display.vision import Panel
from bridge.primitives.dataset import SingularDataset
from bridge.primitives.element.data.load_mechanism import LoadMechanism
from bridge.primitives.element.element import Element
Expand All @@ -24,7 +24,9 @@ class ImageFolder(DatasetProvider[SingularDataset, SingularSample]):
def __init__(self, root: str | os.PathLike):
self._root = root

def build_dataset(self, display_engine: DisplayEngine = None, cache_mechanisms: Dict[str, CacheMechanism] = None):
def build_dataset(
self, display_engine: DisplayEngine = Panel(), cache_mechanisms: Dict[str, CacheMechanism] = None
):
images = []
classes = []
for i, class_dir in enumerate(sorted(Path(self._root).iterdir())):
Expand Down Expand Up @@ -90,7 +92,7 @@ def __init__(self, root: str | os.PathLike, split: str = "train", img_source: st

def build_dataset(
self,
display_engine: DisplayEngine = SimplePrints(),
display_engine: DisplayEngine = Panel(bbox_format="xywh"),
cache_mechanisms: Dict[str, CacheMechanism] = None,
):
img_id_list = list(sorted(self._coco.imgs.keys()))
Expand Down Expand Up @@ -144,7 +146,7 @@ def __init__(self, root: str | os.PathLike, train: bool = True, download: bool =

def build_dataset(
self,
display_engine: DisplayEngine = SimplePrints(),
display_engine: DisplayEngine = Panel(),
cache_mechanisms: Dict[str, CacheMechanism | None] | None = None,
):
sample_list = []
Expand Down
2 changes: 1 addition & 1 deletion docs/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
bridge-ds[dev,vision]
.[dev,vision]
pycocotools
torch
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,9 @@
"metadata": {},
"outputs": [],
"source": [
"import tempfile\n",
"from pathlib import Path\n",
"\n",
"TMP_NOTEBOOK_ROOT = Path(tempfile.mkdtemp()) / \"custom_data\" / \"dataset_provider\""
"TMP_NOTEBOOK_ROOT = Path(\"/tmp/bridge-ds/tutorials\")"
]
},
{
Expand Down Expand Up @@ -403,7 +402,7 @@
"4. Elements are the low-level object which contains raw data, by using a **LoadMechanism**.\n",
"\n",
"## Up Next\n",
"In this tutorial, we've used a primitive DisplayEngine called **SimplePrints**. If you would prefer a more sophisticated one like the Holoviews one in previous tutorials, continue to the next tutorial where we learn how to create our own **DisplayEngine** for a text dataset. "
"In this tutorial, we've used a primitive DisplayEngine called **SimplePrints**. If you would prefer a more sophisticated one like the Panel one in previous tutorials, continue to the next tutorial where we learn how to create our own **DisplayEngine** for a text dataset. "
]
}
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,9 @@
"metadata": {},
"outputs": [],
"source": [
"import tempfile\n",
"from pathlib import Path\n",
"\n",
"TMP_NOTEBOOK_ROOT = Path(tempfile.mkdtemp()) / \"custom_data\" / \"display_engine\""
"TMP_NOTEBOOK_ROOT = Path(\"/tmp/bridge-ds/tutorials\")"
]
},
{
Expand Down
6 changes: 2 additions & 4 deletions docs/source/notebooks/vision/custom_data/load_mechanism.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,17 @@
"metadata": {},
"outputs": [],
"source": [
"import tempfile\n",
"from pathlib import Path\n",
"\n",
"import holoviews as hv\n",
"import panel as pn\n",
"\n",
"from bridge.display.vision import Holoviews\n",
"from bridge.providers.vision import Coco2017Detection\n",
"\n",
"hv.extension(\"bokeh\")\n",
"pn.extension()\n",
"\n",
"TMP_NOTEBOOK_ROOT = Path(tempfile.mkdtemp()) / \"custom_data\" / \"load_mechanism\""
"TMP_NOTEBOOK_ROOT = Path(\"/tmp/bridge-ds/tutorials\")"
]
},
{
Expand All @@ -56,7 +54,7 @@
"root_dir = TMP_NOTEBOOK_ROOT / \"coco\"\n",
"\n",
"provider = Coco2017Detection(root_dir, split=\"val\", img_source=\"stream\")\n",
"ds = provider.build_dataset(display_engine=Holoviews(bbox_format=\"xywh\"))\n",
"ds = provider.build_dataset()\n",
"ds"
]
},
Expand Down
58 changes: 13 additions & 45 deletions docs/source/notebooks/vision/fundamentals/coco_eda_demo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
"metadata": {},
"outputs": [],
"source": [
"import tempfile\n",
"from pathlib import Path\n",
"\n",
"import holoviews as hv\n",
Expand All @@ -45,7 +44,7 @@
"except ModuleNotFoundError:\n",
" pass\n",
"\n",
"TMP_NOTEBOOK_ROOT = Path(tempfile.mkdtemp()) / \"basics\" / \"coco_eda_demo\""
"TMP_NOTEBOOK_ROOT = Path(\"/tmp/bridge-ds/tutorials\")"
]
},
{
Expand All @@ -71,7 +70,6 @@
"metadata": {},
"outputs": [],
"source": [
"from bridge.display.vision import Holoviews\n",
"from bridge.providers.vision import Coco2017Detection\n",
"\n",
"root_dir = TMP_NOTEBOOK_ROOT / \"coco\"\n",
Expand Down Expand Up @@ -333,9 +331,7 @@
"id": "27",
"metadata": {},
"source": [
"To gain a deeper understanding of the image and the size of the dining table annotation in question, we introduce DisplayEngines. These objects are injected into Datasets Samples, and Elements, enabling us to manipulate the behavior of the `ds.show() / sample.show / element.show()` methods.\n",
"\n",
"By default, the **SimplePrints** engine is used. Let's switch to the **Holoviews** engine for enhanced visualization:"
"You can also call `ds.show()` to visualize the entire dataset instead of a single sample. You can freely scroll through using the slider and visualize different samples from the COCO, right in your notebook."
]
},
{
Expand All @@ -345,47 +341,19 @@
"metadata": {},
"outputs": [],
"source": [
"# Datasets are immutable, so we'll build a new dataset from the existing provider\n",
"# with a new rendering engine, and then re-run the assignments we made.\n",
"\n",
"ds = provider.build_dataset(display_engine=Holoviews(bbox_format=\"xywh\"))\n",
"ds = ds.assign_annotations(\n",
" data=lambda samples, anns: anns.data.apply(lambda bbox: map_bbox_class_names(bbox, classnames))\n",
")\n",
"ds = ds.assign_samples(date_captured=lambda samples, anns: pd.to_datetime(samples.date_captured))\n",
"\n",
"ds.show()"
]
},
{
"cell_type": "markdown",
"id": "29",
"metadata": {},
"source": [
"Now, we have a more user-friendly way to observe our data. You can freely scroll through using the slider and visualize different samples from the COCO, right in your notebook.\n",
"\n",
"Next up, let's visualize the specific sample (400410) that piqued our interest:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "30",
"metadata": {},
"outputs": [],
"source": [
"ds.get(sample_id).show()"
]
},
{
"cell_type": "markdown",
"id": "31",
"metadata": {},
"source": [
"## Sorting COCO dataset by bbox sizes\n",
"Upon inspection, it's evident that the `dining table` annotation encompasses the entire image.\n",
"\n",
"To assess the frequency of such occurrences, let's render the samples in our dataset in descending order of annotation size.\n",
"Like we've seen in the previous section, it's evident that the `dining table` annotation covers the entire image.\n",
"\n",
"To assess the frequency of such occurrences, let's display the samples in our dataset in descending order of annotation size.\n",
"\n",
"To achieve this:\n",
"1. Assign a new column to `ds.samples` representing the area value of its largest annotation.\n",
Expand All @@ -396,7 +364,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "32",
"id": "30",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -419,7 +387,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "33",
"id": "31",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -428,15 +396,15 @@
},
{
"cell_type": "markdown",
"id": "34",
"id": "32",
"metadata": {},
"source": [
"By scrolling the slider, we observe images with very large annotations on the left, followed by images with very small annotations, and then images without annotations on the right."
]
},
{
"cell_type": "markdown",
"id": "35",
"id": "33",
"metadata": {},
"source": [
"## Filtering out images with large bboxes\n",
Expand All @@ -446,7 +414,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "36",
"id": "34",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -458,7 +426,7 @@
},
{
"cell_type": "markdown",
"id": "37",
"id": "35",
"metadata": {},
"source": [
"For completeness, let's plot the KDE from before on `ds_smaller`:"
Expand All @@ -467,7 +435,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "38",
"id": "36",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -481,7 +449,7 @@
},
{
"cell_type": "markdown",
"id": "39",
"id": "37",
"metadata": {},
"source": [
"As we can see, there's still a leftward squeezing - although significantly less than before. We've gained some insight into the distribution of our bbox sizes, but there's always more to do. Feel free to change the bbox area threshold to something even smaller, or plot this KDE for individual classes (rather than all of them), etc."
Expand Down
Loading

0 comments on commit fbc9dde

Please sign in to comment.