From 2e1a3c0567e8fcad35d2697bb9d1a2c424c6820b Mon Sep 17 00:00:00 2001
From: Shunsuke Miura <37187849+miursh@users.noreply.github.com>
Date: Mon, 18 Dec 2023 11:05:14 +0900
Subject: [PATCH 1/2] feat: add tracking sim converter test (#63)

* feat:add tracking sim converter test

Signed-off-by: Shunsuke Miura <shunsuke.miura@tier4.jp>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* add document

Signed-off-by: Shunsuke Miura <shunsuke.miura@tier4.jp>

---------

Signed-off-by: Shunsuke Miura <shunsuke.miura@tier4.jp>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 docs/tracking_simulator_conversions.md        |  25 ++
 ...rt_tracking_debugger_to_tracking_eval.yaml |  16 +
 tests/test_tracking_sim_dataset_conversion.py | 345 ++++++++++++++++++
 3 files changed, 386 insertions(+)
 create mode 100644 docs/tracking_simulator_conversions.md
 create mode 100644 tests/config/convert_tracking_debugger_to_tracking_eval.yaml
 create mode 100644 tests/test_tracking_sim_dataset_conversion.py

diff --git a/docs/tracking_simulator_conversions.md b/docs/tracking_simulator_conversions.md
new file mode 100644
index 00000000..c75c859d
--- /dev/null
+++ b/docs/tracking_simulator_conversions.md
@@ -0,0 +1,25 @@
+# Tools Overview
+
+This document offers a concise overview of the scripts employed in the conversion process for the tracking simulator.  
+Prior to reading this document, we recommend referring to the [tools overview](./tools_overview.md) for a comprehensive description of the entire repository.
+
+## tracking debugger rosbag to annotated T4 format
+
+input: rosbag2 with ground_truth/objects  
+output: T4 format data
+
+```bash
+python -m perception_dataset.convert --config config/rosbag2_to_t4/convert_tracking_debugger_to_tracking_eval.yaml
+```
+
+## scenario simulator rosbag
+
+This guide emphasizes the utilization of the rosbag output generated by the planning scenario simulator.
+The T4 format of this output proves valuable for conducting regression tests on the tracking module.
+
+input: rosbag2 with ground_truth/objects and obstacle_segmentation/pointcloud  
+output: T4 format data
+
+```bash
+python -m perception_dataset.convert --config config/rosbag2_to_t4/convert_scenario_sim_to_tracking_eval.yaml
+```
diff --git a/tests/config/convert_tracking_debugger_to_tracking_eval.yaml b/tests/config/convert_tracking_debugger_to_tracking_eval.yaml
new file mode 100644
index 00000000..ad2960ab
--- /dev/null
+++ b/tests/config/convert_tracking_debugger_to_tracking_eval.yaml
@@ -0,0 +1,16 @@
+task: convert_rosbag2_to_t4_tracking
+description:
+  scene: ["tracking_regression", "synthetic"]
+conversion:
+  # path to rosbag dir output by simulator
+  input_base: ./data/rosbag2_tracking_debugger
+  output_base: ./data/pseudo_labeling_t4_format
+  skip_timestamp: 1.0
+  num_load_frames: 0
+  object_topic_name: /perception/object_recognition/ground_truth/objects
+  object_msg_type: TrackedObjects
+  lidar_sensor: # tracking debugger data has no lidar
+    topic: ""
+    channel: ""
+  camera_sensors: [] # synthetic data has no images
+  topic_list: ./config/topic_list_tracking_eval.yaml
diff --git a/tests/test_tracking_sim_dataset_conversion.py b/tests/test_tracking_sim_dataset_conversion.py
new file mode 100644
index 00000000..2b3a45ac
--- /dev/null
+++ b/tests/test_tracking_sim_dataset_conversion.py
@@ -0,0 +1,345 @@
+import json
+import os
+import os.path as osp
+import shutil
+
+from nuscenes.nuscenes import NuScenes
+import pandas as pd
+import pytest
+import yaml
+
+from perception_dataset.constants import SENSOR_ENUM, T4_FORMAT_DIRECTORY_NAME
+from perception_dataset.rosbag2.converter_params import DataType, Rosbag2ConverterParams
+from perception_dataset.rosbag2.rosbag2_to_t4_tracking_converter import (
+    Rosbag2ToT4TrackingConverter,
+)
+from perception_dataset.t4_dataset.data_validator import validate_data_hz
+from perception_dataset.t4_dataset.format_validator import (
+    validate_directory_structure,
+    validate_format,
+)
+from tests.constants import TEST_CONFIG_ROOT_DIR, TEST_ROOT_DIR
+
+
+@pytest.fixture(scope="module")
+def t4_dataset_path(request):
+    test_rosbag_name = "tracking_sim_sample_data"
+    # before test - convert rosbag2 to t4
+    with open(TEST_CONFIG_ROOT_DIR / "convert_tracking_debugger_to_tracking_eval.yaml") as f:
+        config_dict = yaml.safe_load(f)
+
+    input_base = osp.join(TEST_ROOT_DIR, config_dict["conversion"]["input_base"])
+    output_base = osp.join(TEST_ROOT_DIR, config_dict["conversion"]["output_base"])
+
+    config_dict["conversion"]["input_base"] = input_base
+    config_dict["conversion"]["output_base"] = output_base
+    assert osp.exists(input_base), f"input_base doesn't exist: {input_base}"
+
+    param_args = {
+        "task": config_dict["task"],
+        "data_type": DataType.SYNTHETIC,
+        "scene_description": config_dict["description"]["scene"],
+        "overwrite_mode": True,
+        **config_dict["conversion"],
+    }
+    converter_params = Rosbag2ConverterParams(**param_args)
+    converter = Rosbag2ToT4TrackingConverter(converter_params)
+
+    converter.convert()
+
+    # provide a path to converted t4_dataset
+    yield osp.join(output_base, test_rosbag_name, "t4_dataset")
+
+    # after test - remove resource
+    shutil.rmtree(output_base, ignore_errors=True)
+
+
+@pytest.fixture
+def attribute_list():
+    with open(TEST_CONFIG_ROOT_DIR / "label/attribute.yaml") as f:
+        arrtibute_dict = yaml.safe_load(f)
+        arrtibute_list = []
+        for k, v in arrtibute_dict.items():
+            for key in v.keys():
+                arrtibute_list.append(f"{k}.{key}")
+
+        return arrtibute_list
+
+
+@pytest.fixture
+def category_list():
+    with open(TEST_CONFIG_ROOT_DIR / "label/object.yaml") as f:
+        category_dict = yaml.safe_load(f)
+        return list(category_dict.keys())
+
+
+def load_json(t4_dataset_path, file_name):
+    with open(osp.join(t4_dataset_path, "annotation", f"{file_name}.json")) as f:
+        return json.load(f)
+
+
+@pytest.mark.parametrize("t4_dataset_path", [True, False], indirect=True)
+def test_t4_dataset_format(t4_dataset_path):
+    validate_directory_structure(t4_dataset_path)
+
+    nusc = NuScenes(
+        version=T4_FORMAT_DIRECTORY_NAME.ANNOTATION.value,
+        dataroot=t4_dataset_path,
+        verbose=False,
+    )
+
+    validate_format(nusc, t4_dataset_path)
+    validate_data_hz(nusc)
+
+
+def get_empty(df, col):
+    return (df.iloc[1:-1][col] == "").index.tolist()
+
+
+@pytest.mark.parametrize("t4_dataset_path", [True, False], indirect=True)
+def test_rosbag2_converter_dataset_consistency(t4_dataset_path):
+    sample_annotation = load_json(t4_dataset_path, "sample_annotation")
+    # First frame doesn't have prev frame
+    grouped = pd.DataFrame(sample_annotation).groupby("instance_token")
+    for _, annotations in grouped:
+        if len(annotations) == 1:
+            assert annotations.iloc[0]["prev"] == ""
+            assert annotations.iloc[0]["next"] == ""
+            continue
+
+        # First frame doesn't have prev frame
+        assert annotations.iloc[0]["prev"] == ""
+        assert annotations.iloc[0]["next"]
+
+        # Last frame doesn't have next frame
+        assert annotations.iloc[-1]["prev"]
+        assert annotations.iloc[-1]["next"] == ""
+
+        if len(annotations) <= 2:
+            continue
+
+        # All other frames should have both prev and next
+        assert (
+            annotations.iloc[1:-1]["next"] != ""
+        ).all(), f'next is empty at indexes {get_empty("next")}'
+        assert (
+            annotations.iloc[1:-1]["prev"] != ""
+        ).all(), f'prev is empty at indexes {get_empty("prev")}'
+
+        # All other frames should have both prev and next that are not equal
+        assert (
+            annotations.iloc[1:-1]["prev"] != annotations.iloc[1:-1]["next"]
+        ).all() == True  # noqa E712
+
+
+@pytest.mark.parametrize("t4_dataset_path", [True], indirect=True)
+def test_attribute_json(t4_dataset_path, attribute_list):
+    attribute_json = load_json(t4_dataset_path, "attribute")
+    assert len(attribute_json) == 0, f"attribute_json length is {len(attribute_json)}, expected 0"
+    assert len(attribute_json) <= len(
+        attribute_list
+    ), f"attribute_json length more than {len(attribute_list)}, expected {len(attribute_list), {attribute_list}}"
+    for attribute in attribute_json:
+        assert attribute["name"], "name is empty"
+        assert attribute["token"], "token is empty"
+        assert (
+            attribute["name"] in attribute_list
+        ), f"{attribute['name']} is not in {attribute_list}"
+
+
+@pytest.mark.parametrize("t4_dataset_path", [False], indirect=True)
+def test_calibrated_sensor_json(t4_dataset_path):
+    calibrated_sensor = load_json(t4_dataset_path, "calibrated_sensor")
+    assert len(calibrated_sensor) == 1
+
+
+@pytest.mark.parametrize("t4_dataset_path", [True], indirect=True)
+def test_category_json(t4_dataset_path, category_list):
+    category_json = load_json(t4_dataset_path, "category")
+    assert len(category_json) == 1, f"category length is {len(category_json)}, expected 1"
+    for category in category_json:
+        assert category["name"], "name is empty"
+        assert category["token"], "token is empty"
+        assert category["name"] in category_list, f"{category['name']} is not in {category_list}"
+
+
+@pytest.mark.parametrize("t4_dataset_path", [True], indirect=True)
+def test_ego_pose_json(t4_dataset_path):
+    ego_pose_json = load_json(t4_dataset_path, "ego_pose")
+    assert len(ego_pose_json) == 88, f"ego_pose length is {len(ego_pose_json)}, expected 88"
+    assert (
+        ego_pose_json[0]["timestamp"] == 1699630502235902
+    ), "the first timestamp of ego_pose is not 1699630502.235902"
+    assert (
+        ego_pose_json[-1]["timestamp"] == 1699630510935894
+    ), "the last timestamp of ego_pose is not 1699630510.935894"
+    for ego_pose in ego_pose_json:
+        assert ego_pose["translation"], "translation is empty"
+        assert ego_pose["rotation"], "rotation is empty"
+        assert ego_pose["token"], "token is empty"
+        assert ego_pose["timestamp"], "timestamp is empty"
+
+
+@pytest.mark.parametrize("t4_dataset_path", [True], indirect=True)
+def test_instance_json(t4_dataset_path):
+    instance_json = load_json(t4_dataset_path, "instance")
+    assert len(instance_json) == 1, f"instance length is {len(instance_json)}, expected 1"
+    for instance in instance_json:
+        assert instance["token"], "token is empty"
+        assert instance["category_token"], "category_token is empty"
+        assert instance["instance_name"], "instance_name is empty"
+        assert instance["nbr_annotations"] >= 0, "nbr_annotations is empty"
+        if instance["nbr_annotations"] > 0:
+            assert instance["first_annotation_token"], "first_annotation_token is empty"
+            assert instance["last_annotation_token"], "last_annotation_token is empty"
+
+
+@pytest.mark.parametrize("t4_dataset_path", [True], indirect=True)
+def test_log_json(t4_dataset_path):
+    log_json = load_json(t4_dataset_path, "log")
+    assert len(log_json) == 1, f"log length is {len(log_json)}, expected 1"
+    for log in log_json:
+        assert log["token"], "token is empty"
+        assert log["logfile"] == "", "logfile is empty"
+        assert log["vehicle"] == "", "vehicle is empty"
+        assert log["data_captured"] == "", "data_captured is empty"
+        assert log["location"] == "", "location is empty"
+
+
+@pytest.mark.parametrize("t4_dataset_path", [True], indirect=True)
+def test_map_json(t4_dataset_path):
+    map_json = load_json(t4_dataset_path, "map")
+    assert len(map_json) == 1, f"map length is {len(map_json)}, expected 1"
+    for map_ in map_json:
+        assert map_["token"], "token is empty"
+        assert map_["log_tokens"], "log_tokens is empty"
+        assert map_["category"] == "", "category is empty"
+        assert map_["filename"] == "", "filename is empty"
+
+
+@pytest.mark.parametrize("t4_dataset_path", [True], indirect=True)
+def test_object_ann_json(t4_dataset_path):
+    object_ann_json = load_json(t4_dataset_path, "object_ann")
+    assert len(object_ann_json) == 0, f"object_ann length is {len(object_ann_json)}, expected 0"
+
+
+@pytest.mark.parametrize("t4_dataset_path", [True], indirect=True)
+def test_sample_annotation_json(t4_dataset_path):
+    sample_annotation = load_json(t4_dataset_path, "sample_annotation")
+    assert (
+        len(sample_annotation) == 49
+    ), f"sample_annotation length is {len(sample_annotation)}, expected 49"
+    for sample_anno in sample_annotation:
+        sample_anno: dict
+        assert sample_anno["token"], "token is empty"
+        assert sample_anno["sample_token"], "sample_token is empty"
+        assert sample_anno["instance_token"], "instance_token is empty"
+
+        assert sample_anno["attribute_tokens"] == []
+        assert sample_anno["visibility_token"], "visibility_token is empty"
+        assert sample_anno["translation"], "translation is empty"
+        assert "velocity" in sample_anno.keys(), "sample_annotation must have velocity key"
+        assert "acceleration" in sample_anno.keys(), "sample_annotation must have acceleration key"
+        assert sample_anno["size"], "size is empty"
+        assert sample_anno["rotation"], "rotation is empty"
+        assert sample_anno["num_lidar_pts"] >= 0, "num_lidar_pts is empty"
+        assert sample_anno["num_radar_pts"] >= 0, "num_radar_pts is empty"
+
+
+@pytest.mark.parametrize("t4_dataset_path", [True], indirect=True)
+def test_sample_data_json(t4_dataset_path):
+    sample_data_json = load_json(t4_dataset_path, "sample_data")
+    assert (
+        len(sample_data_json) == 88
+    ), f"sample_data length is {len(sample_data_json)}, expected 88"
+    for sample_data in sample_data_json:
+        assert sample_data["token"], "token is empty"
+        assert sample_data["sample_token"], "sample_token is empty"
+        assert sample_data["ego_pose_token"], "instance_token is empty"
+        assert sample_data["calibrated_sensor_token"], "calibrated_sensor_token is empty"
+        assert sample_data["filename"], "filename is empty"
+        assert sample_data["width"] >= 0, "width is empty"
+        assert sample_data["height"] >= 0, "height is empty"
+        assert (not sample_data["is_valid"] and not sample_data["is_key_frame"]) or sample_data[
+            "is_valid"
+        ], f"is_key_frame is {sample_data['is_key_frame']}, is_valid is {sample_data['is_valid']}"
+        assert "next" in sample_data.keys(), "next is empty"
+        assert "prev" in sample_data.keys(), "prev is empty"
+        if sample_data["filename"] == "data/CAM_BACK/00000.jpg":
+            assert (
+                sample_data["timestamp"] == 1660889208947739
+            ), "the first back-camera timestamp is not 1660889208.947739"
+
+
+@pytest.mark.parametrize("t4_dataset_path", [True], indirect=True)
+def test_sample_json(t4_dataset_path):
+    sample_json = load_json(t4_dataset_path, "sample")
+    assert len(sample_json) == 88, f"sample length is {len(sample_json)}, expected 88"
+    for sample in sample_json:
+        assert sample["token"], "token is empty"
+        assert sample["timestamp"], "timestamp is empty"
+        assert sample["scene_token"], "scene_token is empty"
+        assert "next" in sample.keys(), "next is empty"
+        assert "prev" in sample.keys(), "prev is empty"
+
+
+@pytest.mark.parametrize("t4_dataset_path", [True], indirect=True)
+def test_scene_json(t4_dataset_path):
+    scene_json = load_json(t4_dataset_path, "scene")
+    assert len(scene_json) == 1, f"scene length is {len(scene_json)}, expected 1"
+    for scene in scene_json:
+        assert scene["token"], "token is empty"
+        assert scene["name"], "name is empty"
+        assert (
+            scene["description"] == "tracking_regression, synthetic"
+        ), "description is not tracking_regression, synthetic"
+        assert scene["log_token"], "log_token is empty"
+        assert scene["nbr_samples"], "nbr_samples is empty"
+        assert scene["first_sample_token"], "first_sample_token is empty"
+        assert scene["last_sample_token"], "last_sample_token is empty"
+
+        assert scene["nbr_samples"] == 88, f"nbr_samples is {scene['nbr_samples']}, expected 88"
+
+
+@pytest.mark.parametrize("t4_dataset_path", [True], indirect=True)
+def test_sensor_json(t4_dataset_path):
+    sensor_json = load_json(t4_dataset_path, "sensor")
+    assert len(sensor_json) == 1, f"sensor length is {len(sensor_json)}, expected 1"
+    for sensor in sensor_json:
+        assert sensor["token"], "token is empty"
+        assert SENSOR_ENUM.has_channel(sensor["channel"])
+        assert sensor["modality"] in [
+            "camera",
+            "lidar",
+        ], f"modality is {sensor['modality']} not in ['camera','lidar']"
+
+
+@pytest.mark.parametrize("t4_dataset_path", [True], indirect=True)
+def test_surface_ann_json(t4_dataset_path):
+    surface_ann_json = load_json(t4_dataset_path, "surface_ann")
+    assert surface_ann_json == [], "surface_ann is not empty"
+
+
+@pytest.mark.parametrize("t4_dataset_path", [True], indirect=True)
+def test_visibility_json(t4_dataset_path):
+    visibility_json = load_json(t4_dataset_path, "visibility")
+    assert len(visibility_json) == 1, f"visibility length is {len(visibility_json)}, expected 1"
+    for visibility in visibility_json:
+        assert visibility["token"], "token is empty"
+        assert visibility["level"] == "none"
+        assert visibility["description"], "description is empty"
+
+
+@pytest.mark.parametrize("t4_dataset_path", [True], indirect=True)
+def test_directory_structure(t4_dataset_path):
+    dir_files = os.listdir(t4_dataset_path)
+    assert "annotation" in dir_files, "annotation is not in t4_dataset"
+    assert "data" in dir_files, "data is not in t4_dataset"
+    assert "input_bag" in dir_files, "input_bag is not in t4_dataset"
+    assert "status.json" in dir_files, "status.json is not in t4_dataset"
+
+    intput_bag_files = os.listdir(osp.join(t4_dataset_path, "input_bag"))
+    assert "metadata.yaml" in intput_bag_files, "metadata.yaml is not in input_bag"
+    assert (
+        "tracking_sim_sample_data_0.db3" in intput_bag_files
+    ), "tracking_sim_sample_data_0.db3 is not in input_bag"

From 26f85f71aa2fb08c39432b0a004ca872e3e1b2ce Mon Sep 17 00:00:00 2001
From: kminoda <44218668+kminoda@users.noreply.github.com>
Date: Mon, 18 Dec 2023 18:32:56 +0900
Subject: [PATCH 2/2] fix(annotated_t4_to_deepen): fix file_id output (#64)

* fix(annotated_t4_to_deepen): fix file_id output

Signed-off-by: kminoda <koji.minoda@tier4.jp>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: kminoda <koji.minoda@tier4.jp>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Shunsuke Miura <37187849+miursh@users.noreply.github.com>
---
 .../deepen/annotated_t4_to_deepen_converter.py               | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/perception_dataset/deepen/annotated_t4_to_deepen_converter.py b/perception_dataset/deepen/annotated_t4_to_deepen_converter.py
index a08bf5ce..055954db 100644
--- a/perception_dataset/deepen/annotated_t4_to_deepen_converter.py
+++ b/perception_dataset/deepen/annotated_t4_to_deepen_converter.py
@@ -83,6 +83,11 @@ def _convert_one_scene(self, input_dir: str, scene_name: str):
 
                 sample_data_record = nusc.get("sample_data", sample_record["data"][sensor])
                 file_id = osp.basename(sample_data_record["filename"]).replace(".pcd.bin", ".pcd")
+
+                # Original T4 format names the file_id as 000000.pcd.bin for example.
+                # We need to convert it to 0.pcd in this case.
+                file_id = str(int(file_id.split(".")[0])) + ".pcd"
+
                 label_category_id = self._label_converter.convert_label(category_record["name"])
 
                 attributes_records = [