Fixed up the OAK-T support. Survives multiple viewer restarts, suppor…

…ts the Thermal person detection model
luxonis · Feb 27, 2024 · 1740a1c · 1740a1c
1 parent 4091d69
commit 1740a1c
Show file tree

Hide file tree

Showing 6 changed files with 151 additions and 56 deletions.
diff --git a/crates/re_viewer/src/depthai/depthai.rs b/crates/re_viewer/src/depthai/depthai.rs
@@ -587,7 +587,7 @@ pub struct State {
     pub backend_comms: BackendCommChannel,
     #[serde(skip)]
     poll_instant: Option<Instant>,
-    #[serde(default = "default_neural_networks")]
+    #[serde(skip, default = "default_neural_networks")]
     pub neural_networks: Vec<Option<AiModel>>,
     #[serde(skip)]
     update_timeout_timer: Option<Instant>,
@@ -625,6 +625,11 @@ fn default_neural_networks() -> Vec<Option<AiModel>> {
             display_name: String::from("Age gender recognition"),
             camera: CameraBoardSocket::CAM_A,
         }),
+        Some(AiModel {
+            path: String::from("yolov6n_thermal_people_256x192"),
+            display_name: String::from("Thermal Person Detection"),
+            camera: CameraBoardSocket::CAM_E
+        }),
     ]
 }
 

diff --git a/rerun_py/depthai_viewer/_backend/device.py b/rerun_py/depthai_viewer/_backend/device.py
@@ -3,6 +3,7 @@
 from queue import Empty as QueueEmpty
 from queue import Queue
 from typing import Dict, List, Optional, Tuple
+import os
 
 import depthai as dai
 import numpy as np
@@ -14,6 +15,7 @@
 )
 from depthai_sdk.components.tof_component import Component
 from numpy.typing import NDArray
+from depthai_viewer.install_requirements import model_dir
 
 import depthai_viewer as viewer
 from depthai_viewer._backend.device_configuration import (
@@ -39,7 +41,7 @@
     Message,
     WarningMessage,
 )
-from depthai_viewer._backend.packet_handler import PacketHandler
+from depthai_viewer._backend.packet_handler import PacketHandler, PacketHandlerContext, DetectionContext
 from depthai_viewer._backend.store import Store
 
 
@@ -76,14 +78,13 @@ class Device:
 
     _packet_handler: PacketHandler
     _oak: Optional[OakCamera] = None
-    _cameras: List[CameraComponent] = []
     _stereo: StereoComponent = None
     _nnet: NNComponent = None
     _xlink_statistics: Optional[XlinkStatistics] = None
     _sys_info_q: Optional[Queue] = None  # type: ignore[type-arg]
     _pipeline_start_t: Optional[float] = None
     _queues: List[Tuple[Component, ComponentOutput]] = []
-    _dai_queues: List[Tuple[dai.Node, dai.DataOutputQueue]] = []
+    _dai_queues: List[Tuple[dai.Node, Optional[PacketHandlerContext]]] = []
 
     # _profiler = cProfile.Profile()
 
@@ -272,7 +273,11 @@ def reconnect_to_oak(self) -> Message:
         return ErrorMessage("Failed to create oak camera")
 
     def _get_component_by_socket(self, socket: dai.CameraBoardSocket) -> Optional[CameraComponent]:
-        component = list(filter(lambda c: c.node.getBoardSocket() == socket, self._cameras))
+        component = list(
+            filter(
+                lambda c: isinstance(c, CameraComponent) and c.node.getBoardSocket() == socket, self._oak._components
+            )
+        )
         if not component:
             return None
         return component[0]
@@ -381,8 +386,7 @@ def update_pipeline(self, runtime_only: bool) -> Message:
             else:
                 self._create_auto_pipeline_config(config)
 
-        create_dai_queues_after_start: Dict[str, dai.Node] = {}
-        self._cameras = []
+        create_dai_queues_after_start: Dict[str, (dai.Node, Optional[PacketHandlerContext])] = {}
         self._stereo = None
         self._packet_handler.reset()
         self._sys_info_q = None
@@ -447,11 +451,13 @@ def update_pipeline(self, runtime_only: bool) -> Message:
                     self._queues.append((sdk_cam, self._oak.queue(sdk_cam.out.main)))
                 elif dai.CameraSensorType.THERMAL in camera_features.supportedTypes:
                     thermal_cam = self._oak.pipeline.create(dai.node.Camera)
+                    # Hardcoded for OAK-T. The correct size is needed for correct detection parsing
+                    thermal_cam.setSize(256, 192)
                     thermal_cam.setBoardSocket(cam.board_socket)
                     xout_thermal = self._oak.pipeline.create(dai.node.XLinkOut)
                     xout_thermal.setStreamName("thermal_cam")
                     thermal_cam.raw.link(xout_thermal.input)
-                    create_dai_queues_after_start["thermal_cam"] = thermal_cam
+                    create_dai_queues_after_start["thermal_cam"] = (thermal_cam, None)
                 elif sensor_resolution is not None:
                     sdk_cam = self._oak.create_camera(
                         cam.board_socket,
@@ -466,7 +472,6 @@ def update_pipeline(self, runtime_only: bool) -> Message:
                                 (smallest_supported_resolution.width, smallest_supported_resolution.height), res_x
                             )
                         )
-                        self._cameras.append(sdk_cam)
                     self._queues.append((sdk_cam, self._oak.queue(sdk_cam.out.main)))
                 else:
                     print("Skipped creating camera:", cam.board_socket, "because no valid sensor resolution was found.")
@@ -524,16 +529,51 @@ def update_pipeline(self, runtime_only: bool) -> Message:
 
         if config.ai_model and config.ai_model.path:
             cam_component = self._get_component_by_socket(config.ai_model.camera)
-
-            if not cam_component:
+            dai_camnode = [
+                node
+                for node, _ in create_dai_queues_after_start.values()
+                if isinstance(node, dai.node.Camera) and node.getBoardSocket() == config.ai_model.camera
+            ]
+            model_path = config.ai_model.path
+            if len(dai_camnode) > 0:
+                model_path = os.path.join(
+                    model_dir,
+                    config.ai_model.path
+                    + "_openvino_"
+                    + dai.OpenVINO.getVersionName(dai.OpenVINO.DEFAULT_VERSION)
+                    + "_6shave"
+                    + ".blob",
+                )
+                cam_node = dai_camnode[0]
+                if "yolo" in config.ai_model.path:
+                    yolo = self._oak.pipeline.createYoloDetectionNetwork()
+                    yolo.setBlobPath(model_path)
+                    yolo.setConfidenceThreshold(0.5)
+                    if "yolov6n_thermal_people_256x192" == config.ai_model.path:
+                        yolo.setNumClasses(1)
+                        yolo.setCoordinateSize(4)
+                cam_node.raw.link(yolo.input)
+                xlink_out_yolo = self._oak.pipeline.createXLinkOut()
+                xlink_out_yolo.setStreamName("yolo")
+                yolo.out.link(xlink_out_yolo.input)
+                create_dai_queues_after_start["yolo"] = (
+                    yolo,
+                    DetectionContext(
+                        labels=["person"],
+                        frame_width=cam_node.getWidth(),
+                        frame_height=cam_node.getHeight(),
+                        board_socket=config.ai_model.camera,
+                    ),
+                )
+            elif not cam_component:
                 self.store.send_message_to_frontend(
                     WarningMessage(f"{config.ai_model.camera} is not configured, won't create NNET.")
                 )
             elif config.ai_model.path == "age-gender-recognition-retail-0013":
-                face_detection = self._oak.create_nn("face-detection-retail-0004", cam_component)
-                self._nnet = self._oak.create_nn("age-gender-recognition-retail-0013", input=face_detection)
+                face_detection = self._oak.create_nn(model_path, cam_component)
+                self._nnet = self._oak.create_nn(model_path, input=face_detection)
             else:
-                self._nnet = self._oak.create_nn(config.ai_model.path, cam_component)
+                self._nnet = self._oak.create_nn(model_path, cam_component)
             if self._nnet:
                 self._queues.append((self._nnet, self._oak.queue(self._nnet.out.main)))
 
@@ -545,19 +585,15 @@ def update_pipeline(self, runtime_only: bool) -> Message:
 
         try:
             print("Starting pipeline")
-            print(self._oak.pipeline.serializeToJson())
-            import json
-
-            json.dump(self._oak.pipeline.serializeToJson(), open("pipeline.json", "w"))
             self._oak.start(blocking=False)
         except RuntimeError as e:
             print("Couldn't start pipeline: ", e)
             return ErrorMessage("Couldn't start pipeline")
 
         running = self._oak.running()
         if running:
-            for q_name, node in create_dai_queues_after_start.items():
-                self._dai_queues.append((node, self._oak.device.getOutputQueue(q_name, 2, False)))
+            for q_name, (node, context) in create_dai_queues_after_start.items():
+                self._dai_queues.append((node, self._oak.device.getOutputQueue(q_name, 2, False), context))
             self._pipeline_start_t = time.time()
             self._sys_info_q = self._oak.device.getOutputQueue("sys_logger", 1, False)
             # We might have modified the config, so store it
@@ -584,10 +620,10 @@ def update(self) -> None:
             except QueueEmpty:
                 continue
 
-        for dai_node, queue in self._dai_queues:
+        for dai_node, queue, context in self._dai_queues:
             packet = queue.tryGet()
             if packet is not None:
-                self._packet_handler.log_dai_packet(dai_node, packet)
+                self._packet_handler.log_dai_packet(dai_node, packet, context)
 
         if self._xlink_statistics is not None:
             self._xlink_statistics.update()

diff --git a/rerun_py/depthai_viewer/_backend/device_configuration.py b/rerun_py/depthai_viewer/_backend/device_configuration.py
@@ -56,9 +56,7 @@ def to_runtime_controls(self) -> Dict[str, Any]:
                 "align": (
                     "RECTIFIED_LEFT"
                     if self.align == dai.CameraBoardSocket.LEFT
-                    else "RECTIFIED_RIGHT"
-                    if self.align == dai.CameraBoardSocket.RIGHT
-                    else "CENTER"
+                    else "RECTIFIED_RIGHT" if self.align == dai.CameraBoardSocket.RIGHT else "CENTER"
                 ),
                 "lr_check": self.lr_check,
                 "lrc_check_threshold": self.lrc_threshold,
@@ -130,6 +128,11 @@ def dict(self, *args, **kwargs):  # type: ignore[no-untyped-def]
         display_name="Age gender recognition",
         camera=dai.CameraBoardSocket.CAM_A,
     ),
+    AiModelConfiguration(
+        path="yolov6n_thermal_people_256x192",
+        display_name="Thermal Person Detection",
+        camera=dai.CameraBoardSocket.CAM_E,
+    ),
 ]
 
 
@@ -272,9 +275,9 @@ class DeviceProperties(BaseModel):  # type: ignore[misc]
     id: str
     cameras: List[CameraFeatures] = []
     imu: Optional[ImuKind]
-    stereo_pairs: List[
-        Tuple[dai.CameraBoardSocket, dai.CameraBoardSocket]
-    ] = []  # Which cameras can be paired for stereo
+    stereo_pairs: List[Tuple[dai.CameraBoardSocket, dai.CameraBoardSocket]] = (
+        []
+    )  # Which cameras can be paired for stereo
     default_stereo_pair: Optional[Tuple[dai.CameraBoardSocket, dai.CameraBoardSocket]] = None
     info: DeviceInfo = DeviceInfo()
 

diff --git a/rerun_py/depthai_viewer/_backend/device_defaults/oak_t_default.py b/rerun_py/depthai_viewer/_backend/device_defaults/oak_t_default.py
@@ -3,6 +3,7 @@
     CameraConfiguration,
     CameraSensorResolution,
     PipelineConfiguration,
+    AiModelConfiguration,
 )
 
 config = PipelineConfiguration(
@@ -22,5 +23,9 @@
         ),
     ],
     depth=None,
-    ai_model=None,
+    ai_model=AiModelConfiguration(
+        display_name="Thermal Person Detection",
+        path="yolov6n_thermal_people_256x192",
+        camera=dai.CameraBoardSocket.CAM_E,
+    ),
 )
diff --git a/rerun_py/depthai_viewer/_backend/packet_handler.py b/rerun_py/depthai_viewer/_backend/packet_handler.py
@@ -22,6 +22,20 @@
 from depthai_viewer._backend.store import Store
 from depthai_viewer._backend.topic import Topic
 from depthai_viewer.components.rect2d import RectFormat
+from pydantic import BaseModel
+
+
+class PacketHandlerContext(BaseModel):
+
+    class Config:
+        arbitrary_types_allowed = True
+
+
+class DetectionContext(PacketHandlerContext):
+    labels: List[str]
+    frame_width: int
+    frame_height: int
+    board_socket: dai.CameraBoardSocket
 
 
 class PacketHandler:
@@ -50,7 +64,7 @@ def set_camera_intrinsics_getter(
         # type: ignore[assignment, misc]
         self._get_camera_intrinsics = camera_intrinsics_getter
 
-    def log_dai_packet(self, node: dai.Node, packet: dai.Buffer) -> None:
+    def log_dai_packet(self, node: dai.Node, packet: dai.Buffer, context: PacketHandlerContext) -> None:
         if isinstance(packet, dai.ImgFrame):
             board_socket = None
             if isinstance(node, dai.node.ColorCamera):
@@ -63,9 +77,42 @@ def log_dai_packet(self, node: dai.Node, packet: dai.Buffer) -> None:
                 self._on_camera_frame(FramePacket("", packet), board_socket)
             else:
                 print("Unknown node type:", type(node), "for packet:", type(packet))
+        elif isinstance(packet, dai.ImgDetections):
+            if not isinstance(context, DetectionContext):
+                print("Invalid context for detections packet", context)
+            self._on_dai_detections(packet, context)
         else:
             print("Unknown dai packet type:", type(packet))
 
+    def _dai_detections_to_rects_colors_labels(
+        self, packet: dai.ImgDetections, context: DetectionContext
+    ) -> Tuple[List[List[int]], List[List[int]], List[str]]:
+        rects = []
+        colors = []
+        labels = []
+        for detection in packet.detections:
+            rects.append(self._rect_from_detection(detection, context.frame_width, context.frame_height))
+            colors.append([0, 255, 0])
+            label = ""
+            # Open model zoo models output label index
+            if context.labels is not None:
+                label += context.labels[detection.label]
+            label += ", " + str(int(detection.confidence * 100)) + "%"
+            labels.append(label)
+        return rects, colors, labels
+        pass
+
+    def _on_dai_detections(self, packet: dai.ImgDetections, context: DetectionContext) -> None:
+        packet.detections
+        rects, colors, labels = self._dai_detections_to_rects_colors_labels(packet, context)
+        viewer.log_rects(
+            f"{context.board_socket.name}/transform/color_cam/Detections",
+            rects,
+            rect_format=RectFormat.XYXY,
+            colors=colors,
+            labels=labels,
+        )
+
     def log_packet(
         self,
         component: Component,
@@ -195,7 +242,9 @@ def _detections_to_rects_colors_labels(
         colors = []
         labels = []
         for detection in packet.detections:
-            rects.append(self._rect_from_detection(detection, packet.frame.shape[0], packet.frame.shape[1]))
+            rects.append(
+                self._rect_from_detection(detection.img_detection, packet.frame.shape[0], packet.frame.shape[1])
+            )
             colors.append([0, 255, 0])
             label: str = detection.label_str
             # Open model zoo models output label index
@@ -217,18 +266,18 @@ def _on_age_gender_packet(self, packet: TwoStagePacket, component: NNComponent)
             cam = "color_cam" if component._get_camera_comp().is_color() else "mono_cam"
             viewer.log_rect(
                 f"{component._get_camera_comp()._socket.name}/transform/{cam}/Detection",
-                self._rect_from_detection(det, packet.frame.shape[0], packet.frame.shape[1]),
+                self._rect_from_detection(det.img_detection, packet.frame.shape[0], packet.frame.shape[1]),
                 rect_format=RectFormat.XYXY,
                 color=color,
                 label=label,
             )
 
-    def _rect_from_detection(self, detection: Detection, max_height: int, max_width: int) -> List[int]:
+    def _rect_from_detection(self, detection: dai.ImgDetection, max_height: int, max_width: int) -> List[int]:
         return [
-            max(min(detection.bottom_right[0], max_width), 0) * max_width,
-            max(min(detection.bottom_right[1], max_height), 0) * max_height,
-            max(min(detection.top_left[0], max_width), 0) * max_width,
-            max(min(detection.top_left[1], max_height), 0) * max_height,
+            max(min(detection.xmin, max_width), 0) * max_width,
+            max(min(detection.xmax, max_height), 0) * max_height,
+            max(min(detection.ymax, max_width), 0) * max_width,
+            max(min(detection.ymin, max_height), 0) * max_height,
         ]