diff --git a/crates/re_viewer/src/depthai/depthai.rs b/crates/re_viewer/src/depthai/depthai.rs index 988d7e023e27..a1827714b99a 100644 --- a/crates/re_viewer/src/depthai/depthai.rs +++ b/crates/re_viewer/src/depthai/depthai.rs @@ -587,7 +587,7 @@ pub struct State { pub backend_comms: BackendCommChannel, #[serde(skip)] poll_instant: Option, - #[serde(default = "default_neural_networks")] + #[serde(skip, default = "default_neural_networks")] pub neural_networks: Vec>, #[serde(skip)] update_timeout_timer: Option, @@ -625,6 +625,11 @@ fn default_neural_networks() -> Vec> { display_name: String::from("Age gender recognition"), camera: CameraBoardSocket::CAM_A, }), + Some(AiModel { + path: String::from("yolov6n_thermal_people_256x192"), + display_name: String::from("Thermal Person Detection"), + camera: CameraBoardSocket::CAM_E + }), ] } diff --git a/rerun_py/depthai_viewer/_backend/device.py b/rerun_py/depthai_viewer/_backend/device.py index 1815193cf8ee..3864e6bd7894 100644 --- a/rerun_py/depthai_viewer/_backend/device.py +++ b/rerun_py/depthai_viewer/_backend/device.py @@ -3,6 +3,7 @@ from queue import Empty as QueueEmpty from queue import Queue from typing import Dict, List, Optional, Tuple +import os import depthai as dai import numpy as np @@ -14,6 +15,7 @@ ) from depthai_sdk.components.tof_component import Component from numpy.typing import NDArray +from depthai_viewer.install_requirements import model_dir import depthai_viewer as viewer from depthai_viewer._backend.device_configuration import ( @@ -39,7 +41,7 @@ Message, WarningMessage, ) -from depthai_viewer._backend.packet_handler import PacketHandler +from depthai_viewer._backend.packet_handler import PacketHandler, PacketHandlerContext, DetectionContext from depthai_viewer._backend.store import Store @@ -76,14 +78,13 @@ class Device: _packet_handler: PacketHandler _oak: Optional[OakCamera] = None - _cameras: List[CameraComponent] = [] _stereo: StereoComponent = None _nnet: NNComponent = None _xlink_statistics: Optional[XlinkStatistics] = None _sys_info_q: Optional[Queue] = None # type: ignore[type-arg] _pipeline_start_t: Optional[float] = None _queues: List[Tuple[Component, ComponentOutput]] = [] - _dai_queues: List[Tuple[dai.Node, dai.DataOutputQueue]] = [] + _dai_queues: List[Tuple[dai.Node, Optional[PacketHandlerContext]]] = [] # _profiler = cProfile.Profile() @@ -272,7 +273,11 @@ def reconnect_to_oak(self) -> Message: return ErrorMessage("Failed to create oak camera") def _get_component_by_socket(self, socket: dai.CameraBoardSocket) -> Optional[CameraComponent]: - component = list(filter(lambda c: c.node.getBoardSocket() == socket, self._cameras)) + component = list( + filter( + lambda c: isinstance(c, CameraComponent) and c.node.getBoardSocket() == socket, self._oak._components + ) + ) if not component: return None return component[0] @@ -381,8 +386,7 @@ def update_pipeline(self, runtime_only: bool) -> Message: else: self._create_auto_pipeline_config(config) - create_dai_queues_after_start: Dict[str, dai.Node] = {} - self._cameras = [] + create_dai_queues_after_start: Dict[str, (dai.Node, Optional[PacketHandlerContext])] = {} self._stereo = None self._packet_handler.reset() self._sys_info_q = None @@ -447,11 +451,13 @@ def update_pipeline(self, runtime_only: bool) -> Message: self._queues.append((sdk_cam, self._oak.queue(sdk_cam.out.main))) elif dai.CameraSensorType.THERMAL in camera_features.supportedTypes: thermal_cam = self._oak.pipeline.create(dai.node.Camera) + # Hardcoded for OAK-T. The correct size is needed for correct detection parsing + thermal_cam.setSize(256, 192) thermal_cam.setBoardSocket(cam.board_socket) xout_thermal = self._oak.pipeline.create(dai.node.XLinkOut) xout_thermal.setStreamName("thermal_cam") thermal_cam.raw.link(xout_thermal.input) - create_dai_queues_after_start["thermal_cam"] = thermal_cam + create_dai_queues_after_start["thermal_cam"] = (thermal_cam, None) elif sensor_resolution is not None: sdk_cam = self._oak.create_camera( cam.board_socket, @@ -466,7 +472,6 @@ def update_pipeline(self, runtime_only: bool) -> Message: (smallest_supported_resolution.width, smallest_supported_resolution.height), res_x ) ) - self._cameras.append(sdk_cam) self._queues.append((sdk_cam, self._oak.queue(sdk_cam.out.main))) else: print("Skipped creating camera:", cam.board_socket, "because no valid sensor resolution was found.") @@ -524,16 +529,51 @@ def update_pipeline(self, runtime_only: bool) -> Message: if config.ai_model and config.ai_model.path: cam_component = self._get_component_by_socket(config.ai_model.camera) - - if not cam_component: + dai_camnode = [ + node + for node, _ in create_dai_queues_after_start.values() + if isinstance(node, dai.node.Camera) and node.getBoardSocket() == config.ai_model.camera + ] + model_path = config.ai_model.path + if len(dai_camnode) > 0: + model_path = os.path.join( + model_dir, + config.ai_model.path + + "_openvino_" + + dai.OpenVINO.getVersionName(dai.OpenVINO.DEFAULT_VERSION) + + "_6shave" + + ".blob", + ) + cam_node = dai_camnode[0] + if "yolo" in config.ai_model.path: + yolo = self._oak.pipeline.createYoloDetectionNetwork() + yolo.setBlobPath(model_path) + yolo.setConfidenceThreshold(0.5) + if "yolov6n_thermal_people_256x192" == config.ai_model.path: + yolo.setNumClasses(1) + yolo.setCoordinateSize(4) + cam_node.raw.link(yolo.input) + xlink_out_yolo = self._oak.pipeline.createXLinkOut() + xlink_out_yolo.setStreamName("yolo") + yolo.out.link(xlink_out_yolo.input) + create_dai_queues_after_start["yolo"] = ( + yolo, + DetectionContext( + labels=["person"], + frame_width=cam_node.getWidth(), + frame_height=cam_node.getHeight(), + board_socket=config.ai_model.camera, + ), + ) + elif not cam_component: self.store.send_message_to_frontend( WarningMessage(f"{config.ai_model.camera} is not configured, won't create NNET.") ) elif config.ai_model.path == "age-gender-recognition-retail-0013": - face_detection = self._oak.create_nn("face-detection-retail-0004", cam_component) - self._nnet = self._oak.create_nn("age-gender-recognition-retail-0013", input=face_detection) + face_detection = self._oak.create_nn(model_path, cam_component) + self._nnet = self._oak.create_nn(model_path, input=face_detection) else: - self._nnet = self._oak.create_nn(config.ai_model.path, cam_component) + self._nnet = self._oak.create_nn(model_path, cam_component) if self._nnet: self._queues.append((self._nnet, self._oak.queue(self._nnet.out.main))) @@ -545,10 +585,6 @@ def update_pipeline(self, runtime_only: bool) -> Message: try: print("Starting pipeline") - print(self._oak.pipeline.serializeToJson()) - import json - - json.dump(self._oak.pipeline.serializeToJson(), open("pipeline.json", "w")) self._oak.start(blocking=False) except RuntimeError as e: print("Couldn't start pipeline: ", e) @@ -556,8 +592,8 @@ def update_pipeline(self, runtime_only: bool) -> Message: running = self._oak.running() if running: - for q_name, node in create_dai_queues_after_start.items(): - self._dai_queues.append((node, self._oak.device.getOutputQueue(q_name, 2, False))) + for q_name, (node, context) in create_dai_queues_after_start.items(): + self._dai_queues.append((node, self._oak.device.getOutputQueue(q_name, 2, False), context)) self._pipeline_start_t = time.time() self._sys_info_q = self._oak.device.getOutputQueue("sys_logger", 1, False) # We might have modified the config, so store it @@ -584,10 +620,10 @@ def update(self) -> None: except QueueEmpty: continue - for dai_node, queue in self._dai_queues: + for dai_node, queue, context in self._dai_queues: packet = queue.tryGet() if packet is not None: - self._packet_handler.log_dai_packet(dai_node, packet) + self._packet_handler.log_dai_packet(dai_node, packet, context) if self._xlink_statistics is not None: self._xlink_statistics.update() diff --git a/rerun_py/depthai_viewer/_backend/device_configuration.py b/rerun_py/depthai_viewer/_backend/device_configuration.py index 7178d082728e..c4a42c843e65 100644 --- a/rerun_py/depthai_viewer/_backend/device_configuration.py +++ b/rerun_py/depthai_viewer/_backend/device_configuration.py @@ -56,9 +56,7 @@ def to_runtime_controls(self) -> Dict[str, Any]: "align": ( "RECTIFIED_LEFT" if self.align == dai.CameraBoardSocket.LEFT - else "RECTIFIED_RIGHT" - if self.align == dai.CameraBoardSocket.RIGHT - else "CENTER" + else "RECTIFIED_RIGHT" if self.align == dai.CameraBoardSocket.RIGHT else "CENTER" ), "lr_check": self.lr_check, "lrc_check_threshold": self.lrc_threshold, @@ -130,6 +128,11 @@ def dict(self, *args, **kwargs): # type: ignore[no-untyped-def] display_name="Age gender recognition", camera=dai.CameraBoardSocket.CAM_A, ), + AiModelConfiguration( + path="yolov6n_thermal_people_256x192", + display_name="Thermal Person Detection", + camera=dai.CameraBoardSocket.CAM_E, + ), ] @@ -272,9 +275,9 @@ class DeviceProperties(BaseModel): # type: ignore[misc] id: str cameras: List[CameraFeatures] = [] imu: Optional[ImuKind] - stereo_pairs: List[ - Tuple[dai.CameraBoardSocket, dai.CameraBoardSocket] - ] = [] # Which cameras can be paired for stereo + stereo_pairs: List[Tuple[dai.CameraBoardSocket, dai.CameraBoardSocket]] = ( + [] + ) # Which cameras can be paired for stereo default_stereo_pair: Optional[Tuple[dai.CameraBoardSocket, dai.CameraBoardSocket]] = None info: DeviceInfo = DeviceInfo() diff --git a/rerun_py/depthai_viewer/_backend/device_defaults/oak_t_default.py b/rerun_py/depthai_viewer/_backend/device_defaults/oak_t_default.py index 47ff3bbe18e3..a990530906ef 100644 --- a/rerun_py/depthai_viewer/_backend/device_defaults/oak_t_default.py +++ b/rerun_py/depthai_viewer/_backend/device_defaults/oak_t_default.py @@ -3,6 +3,7 @@ CameraConfiguration, CameraSensorResolution, PipelineConfiguration, + AiModelConfiguration, ) config = PipelineConfiguration( @@ -22,5 +23,9 @@ ), ], depth=None, - ai_model=None, + ai_model=AiModelConfiguration( + display_name="Thermal Person Detection", + path="yolov6n_thermal_people_256x192", + camera=dai.CameraBoardSocket.CAM_E, + ), ) diff --git a/rerun_py/depthai_viewer/_backend/packet_handler.py b/rerun_py/depthai_viewer/_backend/packet_handler.py index ce8362dd4c78..5e805bb12f23 100644 --- a/rerun_py/depthai_viewer/_backend/packet_handler.py +++ b/rerun_py/depthai_viewer/_backend/packet_handler.py @@ -22,6 +22,20 @@ from depthai_viewer._backend.store import Store from depthai_viewer._backend.topic import Topic from depthai_viewer.components.rect2d import RectFormat +from pydantic import BaseModel + + +class PacketHandlerContext(BaseModel): + + class Config: + arbitrary_types_allowed = True + + +class DetectionContext(PacketHandlerContext): + labels: List[str] + frame_width: int + frame_height: int + board_socket: dai.CameraBoardSocket class PacketHandler: @@ -50,7 +64,7 @@ def set_camera_intrinsics_getter( # type: ignore[assignment, misc] self._get_camera_intrinsics = camera_intrinsics_getter - def log_dai_packet(self, node: dai.Node, packet: dai.Buffer) -> None: + def log_dai_packet(self, node: dai.Node, packet: dai.Buffer, context: PacketHandlerContext) -> None: if isinstance(packet, dai.ImgFrame): board_socket = None if isinstance(node, dai.node.ColorCamera): @@ -63,9 +77,42 @@ def log_dai_packet(self, node: dai.Node, packet: dai.Buffer) -> None: self._on_camera_frame(FramePacket("", packet), board_socket) else: print("Unknown node type:", type(node), "for packet:", type(packet)) + elif isinstance(packet, dai.ImgDetections): + if not isinstance(context, DetectionContext): + print("Invalid context for detections packet", context) + self._on_dai_detections(packet, context) else: print("Unknown dai packet type:", type(packet)) + def _dai_detections_to_rects_colors_labels( + self, packet: dai.ImgDetections, context: DetectionContext + ) -> Tuple[List[List[int]], List[List[int]], List[str]]: + rects = [] + colors = [] + labels = [] + for detection in packet.detections: + rects.append(self._rect_from_detection(detection, context.frame_width, context.frame_height)) + colors.append([0, 255, 0]) + label = "" + # Open model zoo models output label index + if context.labels is not None: + label += context.labels[detection.label] + label += ", " + str(int(detection.confidence * 100)) + "%" + labels.append(label) + return rects, colors, labels + pass + + def _on_dai_detections(self, packet: dai.ImgDetections, context: DetectionContext) -> None: + packet.detections + rects, colors, labels = self._dai_detections_to_rects_colors_labels(packet, context) + viewer.log_rects( + f"{context.board_socket.name}/transform/color_cam/Detections", + rects, + rect_format=RectFormat.XYXY, + colors=colors, + labels=labels, + ) + def log_packet( self, component: Component, @@ -195,7 +242,9 @@ def _detections_to_rects_colors_labels( colors = [] labels = [] for detection in packet.detections: - rects.append(self._rect_from_detection(detection, packet.frame.shape[0], packet.frame.shape[1])) + rects.append( + self._rect_from_detection(detection.img_detection, packet.frame.shape[0], packet.frame.shape[1]) + ) colors.append([0, 255, 0]) label: str = detection.label_str # Open model zoo models output label index @@ -217,18 +266,18 @@ def _on_age_gender_packet(self, packet: TwoStagePacket, component: NNComponent) cam = "color_cam" if component._get_camera_comp().is_color() else "mono_cam" viewer.log_rect( f"{component._get_camera_comp()._socket.name}/transform/{cam}/Detection", - self._rect_from_detection(det, packet.frame.shape[0], packet.frame.shape[1]), + self._rect_from_detection(det.img_detection, packet.frame.shape[0], packet.frame.shape[1]), rect_format=RectFormat.XYXY, color=color, label=label, ) - def _rect_from_detection(self, detection: Detection, max_height: int, max_width: int) -> List[int]: + def _rect_from_detection(self, detection: dai.ImgDetection, max_height: int, max_width: int) -> List[int]: return [ - max(min(detection.bottom_right[0], max_width), 0) * max_width, - max(min(detection.bottom_right[1], max_height), 0) * max_height, - max(min(detection.top_left[0], max_width), 0) * max_width, - max(min(detection.top_left[1], max_height), 0) * max_height, + max(min(detection.xmin, max_width), 0) * max_width, + max(min(detection.xmax, max_height), 0) * max_height, + max(min(detection.ymax, max_width), 0) * max_width, + max(min(detection.ymin, max_height), 0) * max_height, ] diff --git a/rerun_py/depthai_viewer/install_requirements.py b/rerun_py/depthai_viewer/install_requirements.py index d810ccade7e4..36af4c210b8c 100644 --- a/rerun_py/depthai_viewer/install_requirements.py +++ b/rerun_py/depthai_viewer/install_requirements.py @@ -6,6 +6,7 @@ import sys import traceback from typing import Any, Dict +from pathlib import Path # type: ignore[attr-defined] from depthai_viewer import version as depthai_viewer_version @@ -15,6 +16,8 @@ venv_python = ( os.path.join(venv_dir, "Scripts", "python") if sys.platform == "win32" else os.path.join(venv_dir, "bin", "python") ) +# The default blobconverter location. __ protected... +model_dir = Path.home() / Path(".cache/blobconverter") def delete_partially_created_venv(path: str) -> None: @@ -43,25 +46,19 @@ def get_site_packages() -> str: def download_blobs() -> None: import blobconverter - from depthai_sdk.components.nn_helper import getSupportedModels - - models = [ - "yolov8n_coco_640x352", - "mobilenet-ssd", - "face-detection-retail-0004", - "age-gender-recognition-retail-0013", - ] - sdk_models = getSupportedModels(printModels=False) - for model in models: - zoo_type = None - if model in sdk_models: - model_config_file = sdk_models[model] / "config.json" - config = json.load(open(model_config_file)) - if "model" in config: - model_config: Dict[str, Any] = config["model"] - if "model_name" in model_config: - zoo_type = model_config.get("zoo", "intel") - blobconverter.from_zoo(model, zoo_type=zoo_type, shaves=6) + + if not os.path.exists(model_dir): + os.makedirs(model_dir) + models = { + "yolov8n_coco_640x352": "depthai", + "mobilenet-ssd": "intel", + "face-detection-retail-0004": "intel", + "age-gender-recognition-retail-0013": "intel", + "yolov6n_thermal_people_256x192": "depthai", + } + for model, zoo_type in models.items(): + # With use_cache=True, blobconverter will not download / move the blob to model_dir... + blobconverter.from_zoo(model, zoo_type=zoo_type, shaves=6, output_dir=model_dir, use_cache=False) def dependencies_installed() -> bool: