From 198a9a011e4f12f10f47fb3da6c59f365f0c2fa6 Mon Sep 17 00:00:00 2001 From: Pietro Date: Mon, 1 Apr 2024 11:06:12 +0200 Subject: [PATCH 1/2] fixed bugs --- RAM_Net/model/metric.py | 2 +- RAM_Net/train.py | 2 +- README.md | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/RAM_Net/model/metric.py b/RAM_Net/model/metric.py index 59a0e8b..b85c1a5 100644 --- a/RAM_Net/model/metric.py +++ b/RAM_Net/model/metric.py @@ -1,5 +1,5 @@ from sklearn.metrics import mean_squared_error -from skimage.measure import compare_ssim as ssim +from skimage.metrics import structural_similarity as ssim import torch import numpy as np from scipy import ndimage diff --git a/RAM_Net/train.py b/RAM_Net/train.py index 536ab9d..d77b4ff 100644 --- a/RAM_Net/train.py +++ b/RAM_Net/train.py @@ -9,7 +9,7 @@ from torch.utils.data import DataLoader, ConcatDataset from data_loader.dataset import * from trainer.lstm_trainer import LSTMTrainer -from trainer.trainer_no_recurrent import TrainerNoRecurrent +#from trainer.trainer_no_recurrent import TrainerNoRecurrent from utils.data_augmentation import Compose, RandomRotationFlip, RandomCrop, CenterCrop from os.path import join import bisect diff --git a/README.md b/README.md index 21cd5d5..8c056e2 100644 --- a/README.md +++ b/README.md @@ -40,8 +40,9 @@ follows (make sure to adapt the CUDA toolkit version according to your setup): ```bash conda create --name RAMNET python=3.7 +conda activate RAMNET conda install pytorch torchvision torchaudio cudatoolkit=10.2 -c pytorch -pip install tb-nightly kornia scikit-learn scikit-image opencv-python +pip install tb-nightly kornia scikit-learn scikit-image opencv-python matplotlib ``` ## Branches From 5aee9664696efaebb86194fb178ac0645b9fa016 Mon Sep 17 00:00:00 2001 From: Pietro Date: Mon, 1 Apr 2024 12:44:17 +0200 Subject: [PATCH 2/2] prepare dataset --- RAM_Net/configs/README.md | 4 +-- ...epth_si_grad_loss_statenet_baseline_e.json | 27 ++++++++++------ ...h_si_grad_loss_statenet_baseline_ergb.json | 27 ++++++++++------ ...s_statenet_baseline_ergb_no_recurrent.json | 23 ++++++++----- ...th_si_grad_loss_statenet_baseline_rgb.json | 20 ++++++------ ...in_e2depth_si_grad_loss_statenet_ergb.json | 32 +++++++++++++------ README.md | 7 ++++ download_dataset.sh | 28 ++++++++++++++++ 8 files changed, 119 insertions(+), 49 deletions(-) create mode 100755 download_dataset.sh diff --git a/RAM_Net/configs/README.md b/RAM_Net/configs/README.md index 9ec376a..9d813ea 100644 --- a/RAM_Net/configs/README.md +++ b/RAM_Net/configs/README.md @@ -10,7 +10,7 @@ Specifies the type of dataloader, either SequenceMVSEC or SequenceSynchronizedFr ### type2: For the branch `asynchronous_irregular_real_data` a second dataset can be specified in order to train on two datasets simultaneously. Also specify the base_folder2 and step_size2 if this is used. ### base_folder: -specify the path of the data folder, starting from the exported path (see README in AMM-Net folder) +specify the path of the data folder, starting from the exported path (see README in RAM-Net folder) ### step_size: specifies the number of skipped datapoints before beginning a new sequence. If sequence = 5 and step_size = 5, each datapoint is only seen one during an epoch. If the the step_size is smaller than the sequence_length, datapoints are seen several times. However, step_size > 0 does not actually skip image frames that the network sees, it only defines where a new sequence should be start with respect to the starting data point from the last sequence. ### clip_distance: @@ -21,7 +21,7 @@ defines how many rgb frames are skipped in order to get asynchronous data read i ### scale_factor: downscales inputs for faster training. ### baseline: -If `false`, AMM-Net is trained. Other options are `rgb`, `e`, `ergb0`. For the `asynchronous_irregular_real_data` branch only `false` and `rgb` can be used. +If `false`, RAM-Net is trained. Other options are `rgb`, `e`, `ergb0`. For the `asynchronous_irregular_real_data` branch only `false` and `rgb` can be used. ## trainer: diff --git a/RAM_Net/configs/train_e2depth_si_grad_loss_statenet_baseline_e.json b/RAM_Net/configs/train_e2depth_si_grad_loss_statenet_baseline_e.json index 7ff1a6c..5548899 100644 --- a/RAM_Net/configs/train_e2depth_si_grad_loss_statenet_baseline_e.json +++ b/RAM_Net/configs/train_e2depth_si_grad_loss_statenet_baseline_e.json @@ -5,11 +5,11 @@ "data_loader": { "train": { "type": "SequenceSynchronizedFramesEventsDataset", - "base_folder": "dataset_mathias_23_07/train", + "base_folder": "train", "depth_folder": "depth/data", "frame_folder": "rgb/data", "flow_folder": "", - "event_folder": "events/voxels", + "event_folder": "events/data", "proba_pause_when_running": 0.0, "proba_pause_when_paused": 0.0, "step_size": 5, @@ -20,11 +20,11 @@ }, "validation": { "type": "SequenceSynchronizedFramesEventsDataset", - "base_folder": "dataset_mathias_23_07/validation", + "base_folder": "val", "frame_folder": "rgb/data", "depth_folder": "depth/data", "flow_folder": "", - "event_folder": "events/voxels", + "event_folder": "events/data", "proba_pause_when_running": 0.0, "proba_pause_when_paused": 0.0, "step_size": 5, @@ -51,7 +51,10 @@ }, "loss": { "type": "scale_invariant_loss", - "config": {"weight": 1.0, "n_lambda" : 1.0} + "config": { + "weight": 1.0, + "n_lambda": 1.0 + } }, "grad_loss": { "weight": 0.25 @@ -65,7 +68,7 @@ "trainer": { "epochs": 201, "sequence_length": 10, - "save_dir": "/data/storage/michelle/e2depth_checkpoints/EventScape", + "save_dir": "/archive/sony/pietro/ramnet_eventscape", "save_freq": 4, "verbosity": 2, "monitor": "val_loss", @@ -75,8 +78,9 @@ "still_previews": true, "grid_loss": true, "loss_composition": "image", - "loss_weights": [1] - + "loss_weights": [ + 1 + ] }, "arch": "ERGB2DepthRecurrent", "use_phased_arch": false, @@ -86,11 +90,14 @@ "skip_type": "sum", "recurrent_block_type": "conv", "state_combination": "convlstm", - "spatial_resolution": [112, 112], + "spatial_resolution": [ + 112, + 112 + ], "num_encoders": 3, "base_num_channels": 32, "num_residual_blocks": 2, "use_upsample_conv": true, "norm": "none" } -} +} \ No newline at end of file diff --git a/RAM_Net/configs/train_e2depth_si_grad_loss_statenet_baseline_ergb.json b/RAM_Net/configs/train_e2depth_si_grad_loss_statenet_baseline_ergb.json index 7afcf46..92d119a 100644 --- a/RAM_Net/configs/train_e2depth_si_grad_loss_statenet_baseline_ergb.json +++ b/RAM_Net/configs/train_e2depth_si_grad_loss_statenet_baseline_ergb.json @@ -5,11 +5,11 @@ "data_loader": { "train": { "type": "SequenceSynchronizedFramesEventsDataset", - "base_folder": "dataset_mathias_23_07/train", + "base_folder": "train", "depth_folder": "depth/data", "frame_folder": "rgb/data", "flow_folder": "", - "event_folder": "events/voxels", + "event_folder": "events/data", "proba_pause_when_running": 0.0, "proba_pause_when_paused": 0.0, "step_size": 5, @@ -21,11 +21,11 @@ }, "validation": { "type": "SequenceSynchronizedFramesEventsDataset", - "base_folder": "dataset_mathias_23_07/validation", + "base_folder": "val", "frame_folder": "rgb/data", "depth_folder": "depth/data", "flow_folder": "", - "event_folder": "events/voxels", + "event_folder": "events/data", "proba_pause_when_running": 0.0, "proba_pause_when_paused": 0.0, "step_size": 5, @@ -53,7 +53,10 @@ }, "loss": { "type": "scale_invariant_loss", - "config": {"weight": 1.0, "n_lambda" : 1.0} + "config": { + "weight": 1.0, + "n_lambda": 1.0 + } }, "grad_loss": { "weight": 0.25 @@ -67,7 +70,7 @@ "trainer": { "epochs": 201, "sequence_length": 10, - "save_dir": "/data/storage/michelle/e2depth_checkpoints/EventScape", + "save_dir": "/archive/sony/pietro/ramnet_eventscape", "save_freq": 4, "verbosity": 2, "monitor": "val_loss", @@ -77,8 +80,9 @@ "still_previews": true, "grid_loss": true, "loss_composition": "image", - "loss_weights": [1] - + "loss_weights": [ + 1 + ] }, "arch": "ERGB2DepthRecurrent", "use_phased_arch": false, @@ -88,11 +92,14 @@ "skip_type": "sum", "recurrent_block_type": "conv", "state_combination": "convlstm", - "spatial_resolution": [112, 112], + "spatial_resolution": [ + 112, + 112 + ], "num_encoders": 3, "base_num_channels": 32, "num_residual_blocks": 2, "use_upsample_conv": true, "norm": "none" } -} +} \ No newline at end of file diff --git a/RAM_Net/configs/train_e2depth_si_grad_loss_statenet_baseline_ergb_no_recurrent.json b/RAM_Net/configs/train_e2depth_si_grad_loss_statenet_baseline_ergb_no_recurrent.json index bafb247..fbe537a 100644 --- a/RAM_Net/configs/train_e2depth_si_grad_loss_statenet_baseline_ergb_no_recurrent.json +++ b/RAM_Net/configs/train_e2depth_si_grad_loss_statenet_baseline_ergb_no_recurrent.json @@ -5,7 +5,7 @@ "data_loader": { "train": { "type": "SequenceSynchronizedFramesEventsDataset", - "base_folder": "dataset_mathias_23_07/train", + "base_folder": "train", "depth_folder": "depth/data", "frame_folder": "rgb/data", "flow_folder": "", @@ -21,7 +21,7 @@ }, "validation": { "type": "SequenceSynchronizedFramesEventsDataset", - "base_folder": "dataset_mathias_23_07/validation", + "base_folder": "val", "frame_folder": "rgb/data", "depth_folder": "depth/data", "flow_folder": "", @@ -53,7 +53,10 @@ }, "loss": { "type": "scale_invariant_loss", - "config": {"weight": 1.0, "n_lambda" : 1.0} + "config": { + "weight": 1.0, + "n_lambda": 1.0 + } }, "grad_loss": { "weight": 0.25 @@ -67,7 +70,7 @@ "trainer": { "epochs": 100, "sequence_length": 1, - "save_dir": "/data/storage/michelle/e2depth_checkpoints/final_tests", + "save_dir": "/archive/sony/pietro/ramnet_eventscape", "save_freq": 4, "verbosity": 2, "monitor": "val_loss", @@ -77,8 +80,9 @@ "still_previews": true, "grid_loss": true, "loss_composition": "image", - "loss_weights": [1] - + "loss_weights": [ + 1 + ] }, "arch": "ERGB2Depth", "use_phased_arch": false, @@ -88,11 +92,14 @@ "skip_type": "sum", "recurrent_block_type": "conv", "state_combination": "convlstm", - "spatial_resolution": [112, 112], + "spatial_resolution": [ + 112, + 112 + ], "num_encoders": 3, "base_num_channels": 32, "num_residual_blocks": 2, "use_upsample_conv": true, "norm": "none" } -} +} \ No newline at end of file diff --git a/RAM_Net/configs/train_e2depth_si_grad_loss_statenet_baseline_rgb.json b/RAM_Net/configs/train_e2depth_si_grad_loss_statenet_baseline_rgb.json index 39778fe..a970093 100644 --- a/RAM_Net/configs/train_e2depth_si_grad_loss_statenet_baseline_rgb.json +++ b/RAM_Net/configs/train_e2depth_si_grad_loss_statenet_baseline_rgb.json @@ -5,11 +5,11 @@ "data_loader": { "train": { "type": "SequenceSynchronizedFramesEventsDataset", - "base_folder": "dataset_mathias_23_07/train", + "base_folder": "train", "depth_folder": "depth/data", "frame_folder": "rgb/data", "flow_folder": "", - "event_folder": "events/voxels", + "event_folder": "events/data", "proba_pause_when_running": 0.0, "proba_pause_when_paused": 0.0, "step_size": 5, @@ -21,11 +21,11 @@ }, "validation": { "type": "SequenceSynchronizedFramesEventsDataset", - "base_folder": "dataset_mathias_23_07/validation", + "base_folder": "val", "frame_folder": "rgb/data", "depth_folder": "depth/data", "flow_folder": "", - "event_folder": "events/voxels", + "event_folder": "events/data", "proba_pause_when_running": 0.0, "proba_pause_when_paused": 0.0, "step_size": 5, @@ -55,7 +55,8 @@ "type": "scale_invariant_loss", "config": { "weight": 1.0, - "n_lambda": 1.0} + "n_lambda": 1.0 + } }, "grad_loss": { "weight": 0.25 @@ -71,7 +72,7 @@ "trainer": { "epochs": 201, "sequence_length": 10, - "save_dir": "/data/storage/michelle/e2depth_checkpoints/EventScape", + "save_dir": "/archive/sony/pietro/ramnet_eventscape", "save_freq": 4, "verbosity": 2, "monitor": "val_loss", @@ -81,8 +82,9 @@ "still_previews": true, "grid_loss": true, "loss_composition": "image", - "loss_weights": [1] - + "loss_weights": [ + 1 + ] }, "arch": "ERGB2DepthRecurrent", "use_phased_arch": false, @@ -102,4 +104,4 @@ "use_upsample_conv": true, "norm": "none" } -} +} \ No newline at end of file diff --git a/RAM_Net/configs/train_e2depth_si_grad_loss_statenet_ergb.json b/RAM_Net/configs/train_e2depth_si_grad_loss_statenet_ergb.json index b5867ea..6654ef0 100644 --- a/RAM_Net/configs/train_e2depth_si_grad_loss_statenet_ergb.json +++ b/RAM_Net/configs/train_e2depth_si_grad_loss_statenet_ergb.json @@ -5,11 +5,11 @@ "data_loader": { "train": { "type": "SequenceSynchronizedFramesEventsDataset", - "base_folder": "dataset_mathias_23_07/train", + "base_folder": "train", "depth_folder": "depth/data", "frame_folder": "rgb/data", "flow_folder": "", - "event_folder": "events/voxels", + "event_folder": "events/data", "proba_pause_when_running": 0.0, "proba_pause_when_paused": 0.0, "step_size": 5000, @@ -21,11 +21,11 @@ }, "validation": { "type": "SequenceSynchronizedFramesEventsDataset", - "base_folder": "dataset_mathias_23_07/validation", + "base_folder": "val", "frame_folder": "rgb/data", "depth_folder": "depth/data", "flow_folder": "", - "event_folder": "events/voxels", + "event_folder": "events/data", "proba_pause_when_running": 0.0, "proba_pause_when_paused": 0.0, "step_size": 5000, @@ -53,7 +53,10 @@ }, "loss": { "type": "scale_invariant_loss", - "config": {"weight": 1.0, "n_lambda" : 1.0} + "config": { + "weight": 1.0, + "n_lambda": 1.0 + } }, "grad_loss": { "weight": 0.25 @@ -67,7 +70,7 @@ "trainer": { "epochs": 201, "sequence_length": 10, - "save_dir": "/data/storage/michelle/e2depth_checkpoints/final_tests", + "save_dir": "/archive/sony/pietro/ramnet_eventscape", "save_freq": 4, "verbosity": 2, "monitor": "val_loss", @@ -76,8 +79,14 @@ "num_val_previews": 2, "still_previews": true, "grid_loss": true, - "loss_composition": ["image", "events4"], - "loss_weights": [1, 1] + "loss_composition": [ + "image", + "events4" + ], + "loss_weights": [ + 1, + 1 + ] }, "arch": "ERGB2DepthRecurrent", "use_phased_arch": false, @@ -87,11 +96,14 @@ "skip_type": "sum", "recurrent_block_type": "conv", "state_combination": "convgru", - "spatial_resolution": [112,112], + "spatial_resolution": [ + 112, + 112 + ], "num_encoders": 3, "base_num_channels": 32, "num_residual_blocks": 2, "use_upsample_conv": true, "norm": "none" } -} +} \ No newline at end of file diff --git a/README.md b/README.md index 8c056e2..8408317 100644 --- a/README.md +++ b/README.md @@ -68,6 +68,13 @@ This work uses the EventScape dataset which can be downloaded here: * [Validation Set (12 Gb)](http://rpg.ifi.uzh.ch/data/RAM_Net/dataset/Town05_val.zip) * [Test Set (14 Gb)](http://rpg.ifi.uzh.ch/data/RAM_Net/dataset/Town05_test.zip) +To run and prepare the dataset launch this script + +``` +chmod u+x download_dataset.sh +./download_dataset.sh +``` +

Video to Events diff --git a/download_dataset.sh b/download_dataset.sh new file mode 100755 index 0000000..9bfd019 --- /dev/null +++ b/download_dataset.sh @@ -0,0 +1,28 @@ +#!/bin/bash +path_to_dataset="/archive/ferrari/event_scape" +cd $path_to_dataset + +# Download +wget http://rpg.ifi.uzh.ch/data/RAM_Net/dataset/Town01-03_train.zip && unzip Town01-03_train.zip +wget http://rpg.ifi.uzh.ch/data/RAM_Net/dataset/Town05_val.zip && unzip Town05_val.zip +wget http://rpg.ifi.uzh.ch/data/RAM_Net/dataset/Town05_test.zip && unzip Town05_test.zip + +# Rename folders +rename_folders() { + local town_name=$1 + local dataset_type_name=$2 + for folder in "$path_to_dataset/$town_name/"*/; do + # Rename each sequence folder name + new_folder_name="$(basename "$folder")_${town_name}" + path_to_new_folder="$path_to_dataset/${dataset_type_name}/$new_folder_name" + mv "$folder" "$path_to_new_folder" + done +} + +mkdir train +rename_folders "Town01" "train" +rename_folders "Town02" "train" +rename_folders "Town03" "train" + +mkdir val +rename_folders "Town05" "val" \ No newline at end of file