diff --git a/examples/python/detect_and_track_objects/README.md b/examples/python/detect_and_track_objects/README.md index 4e4dfc7ab137..b42feeb5d6e7 100644 --- a/examples/python/detect_and_track_objects/README.md +++ b/examples/python/detect_and_track_objects/README.md @@ -17,7 +17,7 @@ Visualize object detection and segmentation using the [Huggingface's Transformer ## Used Rerun types -[`Image`](https://www.rerun.io/docs/reference/types/archetypes/image), [`SegmentationImage`](https://www.rerun.io/docs/reference/types/archetypes/segmentation_image), [`AnnotationContext`](https://www.rerun.io/docs/reference/types/archetypes/annotation_context), [`Boxes2D`](https://www.rerun.io/docs/reference/types/archetypes/boxes2d), [`TextLog`](https://www.rerun.io/docs/reference/types/archetypes/text_log) +[`Image`](https://www.rerun.io/docs/reference/types/archetypes/image), [`AssetVideo`](https://www.rerun.io/docs/reference/types/archetypes/asset_video), [`VideoFrameReference`](https://rerun.io/docs/reference/types/archetypes/video_frame_reference), [`SegmentationImage`](https://www.rerun.io/docs/reference/types/archetypes/segmentation_image), [`AnnotationContext`](https://www.rerun.io/docs/reference/types/archetypes/annotation_context), [`Boxes2D`](https://www.rerun.io/docs/reference/types/archetypes/boxes2d), [`TextLog`](https://www.rerun.io/docs/reference/types/archetypes/text_log) ## Background In this example, CSRT (Channel and Spatial Reliability Tracker), a tracking API introduced in OpenCV, is employed for object detection and tracking across frames. @@ -36,12 +36,21 @@ rr.set_time_sequence("frame", frame_idx) ``` ### Video -The input video is logged as a sequence of [`Image`](https://www.rerun.io/docs/reference/types/archetypes/image) to the `image` entity. +The input video is logged as a static [`AssetVideo`](https://www.rerun.io/docs/reference/types/archetypes/asset_video) to the `video` entity. + +```python +video_asset = rr.AssetVideo(path=video_path) +frame_timestamps_ns = video_asset.read_frame_timestamps_ns() + +rr.log("video", video_asset, static=True) +``` + +Each frame is processed and the timestamp is logged to the `frame` timeline using a [`VideoFrameReference`](https://www.rerun.io/docs/reference/types/archetypes/video_frame_reference). ```python rr.log( - "image", - rr.Image(rgb).compress(jpeg_quality=85) + "video", + rr.VideoFrameReference(nanoseconds=frame_timestamps_ns[frame_idx]) ) ``` diff --git a/examples/python/detect_and_track_objects/detect_and_track_objects.py b/examples/python/detect_and_track_objects/detect_and_track_objects.py index b3028116c060..d9be4f0e7ce6 100755 --- a/examples/python/detect_and_track_objects/detect_and_track_objects.py +++ b/examples/python/detect_and_track_objects/detect_and_track_objects.py @@ -205,7 +205,7 @@ def update(self, bgr: cv2.typing.MatLike) -> None: def log_tracked(self) -> None: if self.is_tracking: rr.log( - f"image/tracked/{self.tracking_id}", + f"video/tracked/{self.tracking_id}", rr.Boxes2D( array=self.tracked.bbox_xywh, array_format=rr.Box2DFormat.XYWH, @@ -213,7 +213,7 @@ def log_tracked(self) -> None: ), ) else: - rr.log(f"image/tracked/{self.tracking_id}", rr.Clear(recursive=False)) # TODO(#3381) + rr.log(f"video/tracked/{self.tracking_id}", rr.Clear(recursive=False)) # TODO(#3381) def update_with_detection(self, detection: Detection, bgr: cv2.typing.MatLike) -> None: self.num_recent_undetected_frames = 0 @@ -339,6 +339,11 @@ def track_objects(video_path: str, *, max_frame_count: int | None) -> None: detector = Detector(coco_categories=coco_categories) logging.info("Detector initialized.") + video_asset = rr.AssetVideo(path=video_path) + frame_timestamps_ns = video_asset.read_frame_timestamps_ns() + + rr.log("video", video_asset, static=True) + logging.info("Loading input video: %s", str(video_path)) cap = cv2.VideoCapture(video_path) frame_idx = 0 @@ -358,7 +363,7 @@ def track_objects(video_path: str, *, max_frame_count: int | None) -> None: break rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB) - rr.log("image", rr.Image(rgb).compress(jpeg_quality=85)) + rr.log("video", rr.VideoFrameReference(nanoseconds=frame_timestamps_ns[frame_idx])) if not trackers or frame_idx % 40 == 0: detections = detector.detect_objects_to_track(rgb=rgb, frame_idx=frame_idx)