From a57aecf63749dd131acdab8012ab8cf7dc34a25c Mon Sep 17 00:00:00 2001 From: gferraro Date: Wed, 28 Aug 2024 09:12:33 +1200 Subject: [PATCH 001/117] tweak training --- src/autobuild.sh | 10 +++++----- src/build.py | 2 +- src/ml_tools/thermaldataset.py | 8 ++++++++ src/rebuildDate.py | 20 ++++++++++++++------ 4 files changed, 28 insertions(+), 12 deletions(-) diff --git a/src/autobuild.sh b/src/autobuild.sh index ca7360a4..193a6e6a 100755 --- a/src/autobuild.sh +++ b/src/autobuild.sh @@ -3,9 +3,9 @@ set -e set -x config="classifier-thermal.yaml" -month_ago=$(python3 rebuildDate.py -c $config) +echo "Saving into $1" +month_ago=$(python3 rebuildDate.py $1) echo $month_ago -python3 ../../cptv-download/cptv-download.py -l 0 -i 'poor tracking' -i 'untagged' -i 'part' -i 'untagged-by-humans' -i 'unknown' -i 'unidentified' -m 'human-tagged' --start-date "$month_ago" "../clips$month_ago" useremail@email.com userpassword -echo "Downloading into ../clips$month_ago" -python3 load.py -target "../clips$month_ago" -c $config -python3 build.py -c $config +python3 ../../cptv-download/cptv-download.py -l 0 -i 'poor tracking' -i 'untagged' -i 'part' -i 'untagged-by-humans' -i 'unknown' -i 'unidentified' -m 'human-tagged' --start-date "$month_ago" "$1" useremail@email.com userpassword +echo "Downloading into $1" +python3 build.py -c $config --ext ".cptv" $1 diff --git a/src/build.py b/src/build.py index 5c48a8c2..7b91dad8 100644 --- a/src/build.py +++ b/src/build.py @@ -57,7 +57,7 @@ def parse_args(): ) parser.add_argument("--split-file", help="Json file defining a split") parser.add_argument( - "--ext", default=".hdf5", help="Extension of files to load .mp4,.cptv,.hdf5" + "--ext", default=".cptv", help="Extension of files to load .mp4,.cptv,.hdf5" ) parser.add_argument("-c", "--config-file", help="Path to config file to use") diff --git a/src/ml_tools/thermaldataset.py b/src/ml_tools/thermaldataset.py index 728b95f1..14125bc7 100644 --- a/src/ml_tools/thermaldataset.py +++ b/src/ml_tools/thermaldataset.py @@ -33,12 +33,20 @@ def get_excluded(): "pest", "pig", "sealion", + "bat", + "mammal", + "frog", ] def get_remapped(multi_label=False): land_bird = "land-bird" if multi_label else "bird" return { + "echidna": "hedgehog", + "grey kangaroo": "wallaby", + "sambar deer": "deer", + "mouse": "rodent", + "rat": "rodent", "water": "false-positive", "insect": "false-positive", "allbirds": "bird", diff --git a/src/rebuildDate.py b/src/rebuildDate.py index dadf1a6a..7693842d 100644 --- a/src/rebuildDate.py +++ b/src/rebuildDate.py @@ -5,15 +5,23 @@ from config.config import Config from datetime import timedelta from datetime import date +from pathlib import Path +from dateutil.parser import parse as parse_date parser = argparse.ArgumentParser() -parser.add_argument("-c", "--config-file", help="Path to config file to use") +parser.add_argument("data_dir", help="Directory of hdf5 files") args = parser.parse_args() +args.data_dir = Path(args.data_dir) +latest_date = None +for db_clip in args.data_dir.glob(f"**/*.cptv"): + file_name = db_clip.name + hyphen = file_name.index("-") + date_s = file_name[hyphen + 1 : hyphen + 16] + cptv_dt = parse_date(date_s) + if latest_date is None or cptv_dt > latest_date: + latest_date = cptv_dt -config = Config.load_from_file(args.config_file) -db_file = os.path.join(config.tracks_folder, "dataset.hdf5") -db = TrackDatabase(db_file) -latest_date = db.latest_date() -month_ago = latest_date - timedelta(days=30) + +month_ago = latest_date - timedelta(days=30 * 6) month_ago = month_ago.strftime("%Y-%m-%d 00:00:00") print(month_ago) From ec115ab792228e69a695ac4a99066092f45cdcaf Mon Sep 17 00:00:00 2001 From: gferraro Date: Wed, 28 Aug 2024 09:19:40 +1200 Subject: [PATCH 002/117] rain to fp --- src/autobuild.sh | 2 ++ src/ml_tools/thermaldataset.py | 1 + 2 files changed, 3 insertions(+) diff --git a/src/autobuild.sh b/src/autobuild.sh index 193a6e6a..f81f4cc9 100755 --- a/src/autobuild.sh +++ b/src/autobuild.sh @@ -9,3 +9,5 @@ echo $month_ago python3 ../../cptv-download/cptv-download.py -l 0 -i 'poor tracking' -i 'untagged' -i 'part' -i 'untagged-by-humans' -i 'unknown' -i 'unidentified' -m 'human-tagged' --start-date "$month_ago" "$1" useremail@email.com userpassword echo "Downloading into $1" python3 build.py -c $config --ext ".cptv" $1 +dt=$(date '+%d%m%Y-%H%M%S'); +python3 train.py $dt \ No newline at end of file diff --git a/src/ml_tools/thermaldataset.py b/src/ml_tools/thermaldataset.py index 14125bc7..a10b63b3 100644 --- a/src/ml_tools/thermaldataset.py +++ b/src/ml_tools/thermaldataset.py @@ -47,6 +47,7 @@ def get_remapped(multi_label=False): "sambar deer": "deer", "mouse": "rodent", "rat": "rodent", + "rain": "false-positive", "water": "false-positive", "insect": "false-positive", "allbirds": "bird", From a5eb2eef1ab0adb192acf4a79bc8cf208739e86e Mon Sep 17 00:00:00 2001 From: gferraro Date: Wed, 28 Aug 2024 09:21:48 +1200 Subject: [PATCH 003/117] xla --- src/autobuild.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/src/autobuild.sh b/src/autobuild.sh index f81f4cc9..fe993483 100755 --- a/src/autobuild.sh +++ b/src/autobuild.sh @@ -10,4 +10,5 @@ python3 ../../cptv-download/cptv-download.py -l 0 -i 'poor tracking' -i 'untagge echo "Downloading into $1" python3 build.py -c $config --ext ".cptv" $1 dt=$(date '+%d%m%Y-%H%M%S'); +export XLA_FLAGS=--xla_gpu_cuda_data_dir=/home/cp/miniconda3/envs/tf/lib/ python3 train.py $dt \ No newline at end of file From 26857b90cb1e25df2fb14d2055774360b6d5d2e6 Mon Sep 17 00:00:00 2001 From: gferraro Date: Wed, 28 Aug 2024 09:22:24 +1200 Subject: [PATCH 004/117] add config --- src/autobuild.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/autobuild.sh b/src/autobuild.sh index fe993483..69f4dfac 100755 --- a/src/autobuild.sh +++ b/src/autobuild.sh @@ -11,4 +11,4 @@ echo "Downloading into $1" python3 build.py -c $config --ext ".cptv" $1 dt=$(date '+%d%m%Y-%H%M%S'); export XLA_FLAGS=--xla_gpu_cuda_data_dir=/home/cp/miniconda3/envs/tf/lib/ -python3 train.py $dt \ No newline at end of file +python3 train.py -c $config $dt \ No newline at end of file From 07084f8924c17f4869837d8903f6a6cb6891500e Mon Sep 17 00:00:00 2001 From: gferraro Date: Wed, 28 Aug 2024 13:09:47 +1200 Subject: [PATCH 005/117] make bash and rename --- src/autobuild-cron | 5 +++++ src/autobuild.sh | 5 +++-- 2 files changed, 8 insertions(+), 2 deletions(-) create mode 100644 src/autobuild-cron diff --git a/src/autobuild-cron b/src/autobuild-cron new file mode 100644 index 00000000..083f5c69 --- /dev/null +++ b/src/autobuild-cron @@ -0,0 +1,5 @@ +#run the first of every month +SHELL=/bin/bash +BASH_ENV=~/.bashrc_conda + +* * 1 * * cp ( cd /home/cp/cacophony/classifier-pipeline/src && ./autobuild.sh /data2/cptv-files) 2>&1 | logger --tag classifier-auto-build \ No newline at end of file diff --git a/src/autobuild.sh b/src/autobuild.sh index 69f4dfac..21f0c3b0 100755 --- a/src/autobuild.sh +++ b/src/autobuild.sh @@ -1,7 +1,8 @@ -#!/bin/sh - +#!/bin/bash set -e set -x +conda init bash +conda activate tf config="classifier-thermal.yaml" echo "Saving into $1" month_ago=$(python3 rebuildDate.py $1) From 79dccd1b7d445f0c6a743586e02dddd995f3c8fc Mon Sep 17 00:00:00 2001 From: gferraro Date: Sun, 15 Sep 2024 16:03:45 +0200 Subject: [PATCH 006/117] fix seg width --- src/ml_tools/datasetstructures.py | 2 +- src/ml_tools/thermaldataset.py | 27 +++++++++++++++++---------- src/ml_tools/thermalwriter.py | 10 ++++++---- 3 files changed, 24 insertions(+), 15 deletions(-) diff --git a/src/ml_tools/datasetstructures.py b/src/ml_tools/datasetstructures.py index a48e247f..c31c3159 100644 --- a/src/ml_tools/datasetstructures.py +++ b/src/ml_tools/datasetstructures.py @@ -922,8 +922,8 @@ def get_segments( track_id, start_frame, regions, - segment_frame_spacing=9, segment_width=25, + segment_frame_spacing=9, label=None, segment_min_mass=None, ffc_frames=[], diff --git a/src/ml_tools/thermaldataset.py b/src/ml_tools/thermaldataset.py index a10b63b3..8266685c 100644 --- a/src/ml_tools/thermaldataset.py +++ b/src/ml_tools/thermaldataset.py @@ -12,6 +12,7 @@ from ml_tools.featurenorms import mean_v, std_v from ml_tools.frame import TrackChannels +from pathlib import Path # seed = 1341 # tf.random.set_seed(seed) @@ -308,12 +309,13 @@ def tile_images(images): # test stuff def main(): init_logging() - config = Config.load_from_file() + config = Config.load_from_file("classifier-thermal.yaml") from .tfdataset import get_dataset, get_distribution # file = "/home/gp/cacophony/classifier-data/thermal-training/cp-training/training-meta.json" - file = f"{config.tracks_folder}/training-meta.json" - with open(file, "r") as f: + training_folder = Path(config.base_folder) / "training-data" + meta_f = training_folder / "training-meta.json" + with open(meta_f, "r") as f: meta = json.load(f) labels = meta.get("labels", []) datasets = [] @@ -321,7 +323,7 @@ def main(): resampled_ds, remapped, labels, epoch_size = get_dataset( # dir, load_dataset, - f"{config.tracks_folder}/training-data/test", + training_folder / "test", labels, batch_size=32, image_size=(160, 160), @@ -332,21 +334,24 @@ def main(): remapped_labels=get_remapped(), excluded_labels=get_excluded(), include_track=False, - num_frames=1, + num_frames=25, ) print("Ecpoh size is", epoch_size) - print(get_distribution(resampled_ds, len(labels), extra_meta=False)) + # print(get_distribution(resampled_ds, len(labels), extra_meta=False)) # return # - for e in range(2): + save_dir = Path("./test-images") + save_dir.mkdir(exist_ok=True) + for e in range(1): + batch_i = 0 print("epoch", e) for x, y in resampled_ds: - show_batch(x, y, labels) - + show_batch(x, y, labels, save=save_dir / f"{batch_i}.jpg") + batch_i += 1 # return -def show_batch(image_batch, label_batch, labels): +def show_batch(image_batch, label_batch, labels, save=None): plt.figure(figsize=(10, 10)) print("images in batch", len(image_batch), len(label_batch)) num_images = min(len(image_batch), 25) @@ -365,6 +370,8 @@ def show_batch(image_batch, label_batch, labels): plt.title(labels[np.argmax(label_batch[n])]) plt.axis("off") # return + if save: + plt.savefig(save) plt.show() diff --git a/src/ml_tools/thermalwriter.py b/src/ml_tools/thermalwriter.py index 8a3a290b..68dba9a8 100644 --- a/src/ml_tools/thermalwriter.py +++ b/src/ml_tools/thermalwriter.py @@ -216,10 +216,12 @@ def get_data(clip_samples, extra_args): # GP All assumes we dont have a track over multiple bins (Whcih we probably never want) if extra_args.get("use_segments", True): track.get_segments( - extra_args.get("segment_frame_spacing", 9), - extra_args.get("segment_width", 25), - extra_args.get("segment_type"), - extra_args.get("segment_min_avg_mass"), + segment_width=extra_args.get("segment_width", 25), + segment_frame_spacing=extra_args.get( + "segment_frame_spacing", 9 + ), + segment_type=extra_args.get("segment_type"), + segment_min_avg_mass=extra_args.get("segment_min_avg_mass"), max_segments=extra_args.get("max_segments"), dont_filter=extra_args.get("dont_filter_segment", False), skip_ffc=extra_args.get("skip_ffc", True), From 504e747f1966665624ff2b24f1bbea29cb520389 Mon Sep 17 00:00:00 2001 From: gferraro Date: Sun, 15 Sep 2024 16:09:07 +0200 Subject: [PATCH 007/117] fix name --- src/ml_tools/thermalwriter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ml_tools/thermalwriter.py b/src/ml_tools/thermalwriter.py index 68dba9a8..88f96066 100644 --- a/src/ml_tools/thermalwriter.py +++ b/src/ml_tools/thermalwriter.py @@ -221,7 +221,7 @@ def get_data(clip_samples, extra_args): "segment_frame_spacing", 9 ), segment_type=extra_args.get("segment_type"), - segment_min_avg_mass=extra_args.get("segment_min_avg_mass"), + segment_min_mass=extra_args.get("segment_min_avg_mass"), max_segments=extra_args.get("max_segments"), dont_filter=extra_args.get("dont_filter_segment", False), skip_ffc=extra_args.get("skip_ffc", True), From ddaf4fe5fed2beceaee5725ed3b75502f6c84e6f Mon Sep 17 00:00:00 2001 From: gferraro Date: Tue, 17 Sep 2024 15:15:15 +0200 Subject: [PATCH 008/117] debug --- src/ml_tools/thermaldataset.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/ml_tools/thermaldataset.py b/src/ml_tools/thermaldataset.py index 8266685c..55686c03 100644 --- a/src/ml_tools/thermaldataset.py +++ b/src/ml_tools/thermaldataset.py @@ -37,6 +37,11 @@ def get_excluded(): "bat", "mammal", "frog", + "grey kangaroo", + "sambar deer" "chicken", + "fox", + "cow", + "wombat", ] @@ -44,8 +49,8 @@ def get_remapped(multi_label=False): land_bird = "land-bird" if multi_label else "bird" return { "echidna": "hedgehog", - "grey kangaroo": "wallaby", - "sambar deer": "deer", + # "grey kangaroo": "wallaby", + # "sambar deer": "deer", "mouse": "rodent", "rat": "rodent", "rain": "false-positive", @@ -59,7 +64,7 @@ def get_remapped(multi_label=False): "pheasant": land_bird, "pukeko": land_bird, "quail": land_bird, - "chicken": land_bird, + # "chicken": land_bird, } From a0a67743a965d4fc4076ed9b0d1814901ca3abc8 Mon Sep 17 00:00:00 2001 From: gferraro Date: Tue, 17 Sep 2024 17:21:47 +0200 Subject: [PATCH 009/117] use rust binding --- src/ml_tools/rawdb.py | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/src/ml_tools/rawdb.py b/src/ml_tools/rawdb.py index 27175e23..9c34c149 100644 --- a/src/ml_tools/rawdb.py +++ b/src/ml_tools/rawdb.py @@ -20,7 +20,7 @@ from ml_tools.datasetstructures import TrackHeader, ClipHeader from track.track import Track from track.cliptrackextractor import is_affected_by_ffc -from cptv import CPTVReader +from cptv_rs_python_bindings import CptvReader from ml_tools.rectangle import Rectangle special_datasets = [ @@ -62,19 +62,23 @@ def load_frames(self): background = None tracker_version = self.meta_data.get("tracker_version") frame_i = 0 - with open(self.file, "rb") as f: - reader = CPTVReader(f) - for frame in reader: - if frame.background_frame: - background = frame.pix - # bug in previous tracker version where background was first frame - if tracker_version >= 10: - continue - ffc = is_affected_by_ffc(frame) - if ffc: - ffc_frames.append(frame_i) - cptv_frames.append(frame.pix) - frame_i += 1 + reader = CptvReader(str(self.file)) + header = reader.get_header() + while True: + frame = reader.next_frame() + if frame is None: + break + if frame.background_frame: + background = frame.pix + # bug in previous tracker version where background was first frame + if tracker_version >= 10: + continue + ffc = is_affected_by_ffc(frame) + if ffc: + print("GOT FFC") + ffc_frames.append(frame_i) + cptv_frames.append(frame.pix) + frame_i += 1 frames = np.uint16(cptv_frames) if background is None: background = np.mean(frames, axis=0) From 65d962f36a735a93e3c365aba8619419a0def674 Mon Sep 17 00:00:00 2001 From: gferraro Date: Tue, 17 Sep 2024 17:22:56 +0200 Subject: [PATCH 010/117] remove unneeded --- src/classify/clipclassifier.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/classify/clipclassifier.py b/src/classify/clipclassifier.py index 0600b49d..f9e239db 100644 --- a/src/classify/clipclassifier.py +++ b/src/classify/clipclassifier.py @@ -11,13 +11,8 @@ from track.clip import Clip from track.cliptrackextractor import ClipTrackExtractor, is_affected_by_ffc from ml_tools import tools -from ml_tools.kerasmodel import KerasModel from track.irtrackextractor import IRTrackExtractor from ml_tools.previewer import Previewer -from track.track import Track - -from cptv import CPTVReader -from datetime import datetime from ml_tools.interpreter import get_interpreter From 2586011e8f31d19dd92e9bb66e3e44ae25d0c963 Mon Sep 17 00:00:00 2001 From: gferraro Date: Tue, 17 Sep 2024 17:35:35 +0200 Subject: [PATCH 011/117] remove unneeded --- src/ml_tools/tfdataset.py | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/src/ml_tools/tfdataset.py b/src/ml_tools/tfdataset.py index f791ba07..437ba16c 100644 --- a/src/ml_tools/tfdataset.py +++ b/src/ml_tools/tfdataset.py @@ -61,18 +61,7 @@ def get_distribution(dataset, num_labels, batched=True, one_hot=True, extra_meta def get_dataset(load_function, base_dir, labels, **args): - land_birds = [ - "pukeko", - "california quail", - "brown quail", - "black swan", - "quail", - "pheasant", - "penguin", - "duck", - "chicken", - "rooster", - ] + excluded_labels = args.get("excluded_labels", []) to_remap = args.get("remapped_labels", {}) logging.info("Excluding %s", excluded_labels) @@ -193,7 +182,6 @@ def get_dataset(load_function, base_dir, labels, **args): dataset = dataset.take(epoch_size) else: epoch_size = 1 - dataset = dataset.prefetch(buffer_size=AUTOTUNE) batch_size = args.get("batch_size", None) if batch_size is not None: dataset = dataset.batch(batch_size) From a63e9a7058462273b0769f7716376b087b270b1d Mon Sep 17 00:00:00 2001 From: gferraro Date: Wed, 18 Sep 2024 16:32:54 +0200 Subject: [PATCH 012/117] avoid bad regions --- src/ml_tools/forestmodel.py | 2 +- src/ml_tools/rawdb.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/ml_tools/forestmodel.py b/src/ml_tools/forestmodel.py index a8ccd95a..c3e17a66 100644 --- a/src/ml_tools/forestmodel.py +++ b/src/ml_tools/forestmodel.py @@ -228,7 +228,7 @@ def forest_features( for i, frame in enumerate(track_frames): region = regions[i] - if region.blank or region.width == 0 or region.height == 0: + if region.blank or region.width > 0 or region.height > 0: prev_count = 0 continue diff --git a/src/ml_tools/rawdb.py b/src/ml_tools/rawdb.py index 9c34c149..d1d2d681 100644 --- a/src/ml_tools/rawdb.py +++ b/src/ml_tools/rawdb.py @@ -31,6 +31,8 @@ "overlay", ] +FPS = 9 + class RawDatabase: def __init__(self, database_filename): From 91ea9e7f51ec104084b2a5071dfb202b6daaf2fb Mon Sep 17 00:00:00 2001 From: gferraro Date: Fri, 20 Sep 2024 14:53:36 +0200 Subject: [PATCH 013/117] update python-cptv --- pirequirements.txt | 2 +- pyproject.toml | 2 +- requirements.txt | 2 +- src/ml_tools/thermalwriter.py | 10 ++++++++++ 4 files changed, 13 insertions(+), 3 deletions(-) diff --git a/pirequirements.txt b/pirequirements.txt index 72fde5a9..62280bdf 100644 --- a/pirequirements.txt +++ b/pirequirements.txt @@ -26,4 +26,4 @@ dbus-python==1.3.2 importlib_resources==5.10.2 opencv-python==4.8.0.76 inotify_simple==1.3.5 -python-cptv==0.0.3 \ No newline at end of file +python-cptv==0.0.5 \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 36570e39..c80c9a77 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,7 +48,7 @@ dependencies = [ "importlib_resources==5.10.2", "opencv-python==4.8.0.76", "inotify_simple==1.3.5", - "python-cptv==0.0.3" + "python-cptv==0.0.5" ] [project.scripts] diff --git a/requirements.txt b/requirements.txt index 8458878a..af94548d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -26,4 +26,4 @@ joblib #requires sudo apt-get install libopencv-dev used for ir track extraction on server # pybgs==3.2.0.post1 this was used for ir inotify_simple==1.3.5 -python-cptv==0.0.3 \ No newline at end of file +python-cptv==0.0.5 \ No newline at end of file diff --git a/src/ml_tools/thermalwriter.py b/src/ml_tools/thermalwriter.py index 88f96066..89c93f56 100644 --- a/src/ml_tools/thermalwriter.py +++ b/src/ml_tools/thermalwriter.py @@ -306,6 +306,16 @@ def get_data(clip_samples, extra_args): frame.resize_with_aspect( (32, 32), crop_rectangle, keep_edge=True ) + if ( + np.amax(frame.thermal) > 40000 + or np.amin(frame.thermal) < 1000 + ): + logging.error( + "Srange values for %s max %s min %s", + clip_id, + np.amax(frame.thermal), + np.amin(frame.thermal), + ) frame.thermal -= temp_median np.clip(frame.thermal, a_min=0, a_max=None, out=frame.thermal) From 5fb72e986f6e666248f08818dba6edef9c2123ec Mon Sep 17 00:00:00 2001 From: gferraro Date: Fri, 20 Sep 2024 15:26:07 +0200 Subject: [PATCH 014/117] save some files to test --- src/ml_tools/thermalwriter.py | 16 +++++++++++++++- src/ml_tools/tools.py | 4 ++-- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/src/ml_tools/thermalwriter.py b/src/ml_tools/thermalwriter.py index 89c93f56..3d3a442b 100644 --- a/src/ml_tools/thermalwriter.py +++ b/src/ml_tools/thermalwriter.py @@ -23,6 +23,7 @@ --output_file_prefix="${OUTPUT_DIR/FILE_PREFIX}" \ --num_shards=100 """ +import cv2 from PIL import Image from pathlib import Path import time @@ -175,13 +176,19 @@ def get_data(clip_samples, extra_args): return None data = [] crop_rectangle = tools.Rectangle(2, 2, 160 - 2 * 2, 140 - 2 * 2) + + out_folder = None if clip_samples[0].source_file.suffix == ".hdf5": db = TrackDatabase(clip_samples[0].source_file) + out_folder = "hdf5" else: db = RawDatabase(clip_samples[0].source_file) db.load_frames() - # going to redo segments to get rid of ffc segments + out_folder = "raw" + # going to redo segments to get rid of ffc segments + out_folder = Path(out_folder) + out_folder.mkdir(exist_ok=True) clip_id = clip_samples[0].clip_id try: background = db.get_clip_background() @@ -335,6 +342,13 @@ def get_data(clip_samples, extra_args): frame.filtered, min=min_diff, max=max_diff, new_max=255 ) + cv2.imwrite( + str( + out_folder / f"{clip_id}-{track_id}-{frame_number}.png" + ), + np.uint8(frame.thermal), + ) + if not stats[0]: frame.filtered = np.zeros((frame.filtered.shape)) f2 = np.uint8(frame.filtered) diff --git a/src/ml_tools/tools.py b/src/ml_tools/tools.py index 73355ca4..519c9e2f 100644 --- a/src/ml_tools/tools.py +++ b/src/ml_tools/tools.py @@ -6,7 +6,6 @@ import numpy as np import pickle import json -import dateutil import datetime import glob import cv2 @@ -15,6 +14,7 @@ from PIL import Image, ImageFont, ImageDraw from pathlib import Path from ml_tools.rectangle import Rectangle +from dateutil import parser EPISON = 1e-5 @@ -92,7 +92,7 @@ def load_clip_metadata(filename): # add in some metadata stats meta = json.load(t) if meta.get("recordingDateTime"): - meta["recordingDateTime"] = dateutil.parser.parse(meta["recordingDateTime"]) + meta["recordingDateTime"] = parser.parse(meta["recordingDateTime"]) if meta.get("tracks") is None and meta.get("Tracks"): meta["tracks"] = meta["Tracks"] return meta From 38228ac466913be57c732b264746866952f381e7 Mon Sep 17 00:00:00 2001 From: gferraro Date: Sun, 22 Sep 2024 16:50:53 +0200 Subject: [PATCH 015/117] more debugging --- src/ml_tools/kerasmodel.py | 6 ++++++ src/ml_tools/tfdataset.py | 3 +++ src/ml_tools/thermaldataset.py | 1 + src/ml_tools/thermalwriter.py | 2 +- 4 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/ml_tools/kerasmodel.py b/src/ml_tools/kerasmodel.py index 2313c789..9608f606 100644 --- a/src/ml_tools/kerasmodel.py +++ b/src/ml_tools/kerasmodel.py @@ -502,6 +502,9 @@ def train_model( multi_label=self.params.multi_label, num_frames=self.params.square_width**2, channels=self.params.channels, + deterministic=True, + shuffle=False, + epoch_size=1000, ) self.remapped = remapped self.validate, remapped, _, _ = get_dataset( @@ -519,6 +522,9 @@ def train_model( multi_label=self.params.multi_label, num_frames=self.params.square_width**2, channels=self.params.channels, + deterministic=True, + shuffle=False, + epoch_size=250, # dist=self.dataset_counts["validation"], ) diff --git a/src/ml_tools/tfdataset.py b/src/ml_tools/tfdataset.py index 437ba16c..b18cf960 100644 --- a/src/ml_tools/tfdataset.py +++ b/src/ml_tools/tfdataset.py @@ -149,6 +149,9 @@ def get_dataset(load_function, base_dir, labels, **args): stop_on_empty_dataset=True, rerandomize_each_iteration=True, ) + if args.get("epoch_size") is not None: + dataset = dataset.take(args.get("epoch_size")) + logging.info("Setting dataset to %s", args.get("epoch_size")) if args.get("cache", False): dataset = dataset.cache() if ( diff --git a/src/ml_tools/thermaldataset.py b/src/ml_tools/thermaldataset.py index 55686c03..04a6be4c 100644 --- a/src/ml_tools/thermaldataset.py +++ b/src/ml_tools/thermaldataset.py @@ -42,6 +42,7 @@ def get_excluded(): "fox", "cow", "wombat", + "chicken", ] diff --git a/src/ml_tools/thermalwriter.py b/src/ml_tools/thermalwriter.py index 3d3a442b..890cb461 100644 --- a/src/ml_tools/thermalwriter.py +++ b/src/ml_tools/thermalwriter.py @@ -346,7 +346,7 @@ def get_data(clip_samples, extra_args): str( out_folder / f"{clip_id}-{track_id}-{frame_number}.png" ), - np.uint8(frame.thermal), + np.uint8(frame.filtered), ) if not stats[0]: From f90c6ed786ce44e2964bd47ee8ee8562095468a0 Mon Sep 17 00:00:00 2001 From: gferraro Date: Sun, 22 Sep 2024 17:27:22 +0200 Subject: [PATCH 016/117] more debugging --- src/ml_tools/kerasmodel.py | 22 ++++++++++++++++++++-- src/ml_tools/thermaldataset.py | 26 ++++++++++++++++++++++---- 2 files changed, 42 insertions(+), 6 deletions(-) diff --git a/src/ml_tools/kerasmodel.py b/src/ml_tools/kerasmodel.py index 9608f606..28965b3d 100644 --- a/src/ml_tools/kerasmodel.py +++ b/src/ml_tools/kerasmodel.py @@ -503,8 +503,8 @@ def train_model( num_frames=self.params.square_width**2, channels=self.params.channels, deterministic=True, - shuffle=False, epoch_size=1000, + include_Track=True, ) self.remapped = remapped self.validate, remapped, _, _ = get_dataset( @@ -523,10 +523,28 @@ def train_model( num_frames=self.params.square_width**2, channels=self.params.channels, deterministic=True, - shuffle=False, epoch_size=250, + include_track=True, # dist=self.dataset_counts["validation"], ) + logging.info("Saving datasets") + save_dir = Path("./train-images") + save_dir.mkdir(exist_ok=True) + batch_i = 0 + for x, y in self.train: + thermaldataset.show_batch( + x, y, self.labels, save=save_dir / f"{batch_i}.jpg", tracks=True + ) + batch_i += 1 + + save_dir = Path("./val-images") + save_dir.mkdir(exist_ok=True) + batch_i = 0 + for x, y in self.validate: + thermaldataset.show_batch( + x, y, self.labels, save=save_dir / f"{batch_i}.jpg" + ) + batch_i += 1 if weights is not None: self.model.load_weights(weights) diff --git a/src/ml_tools/thermaldataset.py b/src/ml_tools/thermaldataset.py index 04a6be4c..51a39258 100644 --- a/src/ml_tools/thermaldataset.py +++ b/src/ml_tools/thermaldataset.py @@ -38,11 +38,23 @@ def get_excluded(): "mammal", "frog", "grey kangaroo", - "sambar deer" "chicken", + "sambar deer", + "chicken", "fox", "cow", "wombat", "chicken", + "dog", + "sheep" "cat", + "duck", + "pheasant", + "pukeko", + "brown quail", + "black swan", + "quail", + "california quail", + "sheep", + "echidna", ] @@ -357,10 +369,13 @@ def main(): # return -def show_batch(image_batch, label_batch, labels, save=None): +def show_batch(image_batch, label_batch, labels, save=None, tracks=False): plt.figure(figsize=(10, 10)) print("images in batch", len(image_batch), len(label_batch)) num_images = min(len(image_batch), 25) + if tracks: + track_batch = label_batch[1] + label_batch = label_batch[0] for n in range(num_images): ax = plt.subplot(5, 5, n + 1) img = np.uint8(image_batch[n]) @@ -372,8 +387,11 @@ def show_batch(image_batch, label_batch, labels, save=None): # if repeat > 0: # print(img.shape, " repeating", repeat) plt.imshow(img) - plt.title("C-" + str(image_batch[n])) - plt.title(labels[np.argmax(label_batch[n])]) + if tracks: + plt.title(f"{labels[np.argmax(label_batch[n])]}-{track_batch[n]}") + else: + plt.title(labels[np.argmax(label_batch[n])]) + plt.axis("off") # return if save: From 127940de3931c93ee68c7051460af722c2b9dd38 Mon Sep 17 00:00:00 2001 From: gferraro Date: Sun, 22 Sep 2024 17:28:10 +0200 Subject: [PATCH 017/117] double chicken --- src/ml_tools/thermaldataset.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/ml_tools/thermaldataset.py b/src/ml_tools/thermaldataset.py index 51a39258..b321754c 100644 --- a/src/ml_tools/thermaldataset.py +++ b/src/ml_tools/thermaldataset.py @@ -43,7 +43,6 @@ def get_excluded(): "fox", "cow", "wombat", - "chicken", "dog", "sheep" "cat", "duck", From 31f2e866ce7cbcc89ac12bf468d45f302d7db510 Mon Sep 17 00:00:00 2001 From: gferraro Date: Mon, 23 Sep 2024 14:42:27 +0200 Subject: [PATCH 018/117] add missing station id --- src/ml_tools/rawdb.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ml_tools/rawdb.py b/src/ml_tools/rawdb.py index d1d2d681..a997a584 100644 --- a/src/ml_tools/rawdb.py +++ b/src/ml_tools/rawdb.py @@ -196,6 +196,7 @@ def get_clip_tracks(self, tag_precedence): human_tags=human_tags, source_file=self.file, mega_missed_regions=track_meta.get("mega_missed_regions"), + station_id=clip_header.station_id, # frame_temp_median=frame_temp_median, ) clip_header.tracks.append(header) From 2d31161e12d0773114465411a7cacb384ef26a0d Mon Sep 17 00:00:00 2001 From: gferraro Date: Mon, 23 Sep 2024 14:44:22 +0200 Subject: [PATCH 019/117] load small --- src/ml_tools/dataset.py | 2 ++ src/ml_tools/thermalwriter.py | 12 ++++++------ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/ml_tools/dataset.py b/src/ml_tools/dataset.py index b7690999..e9b75ae2 100644 --- a/src/ml_tools/dataset.py +++ b/src/ml_tools/dataset.py @@ -192,6 +192,8 @@ def load_clips( counter += 1 if counter % 50 == 0: logging.debug("Dataset loaded %s", counter) + if counter == 500: + break return [counter, counter] def load_clip(self, db_clip, dont_filter_segment=False): diff --git a/src/ml_tools/thermalwriter.py b/src/ml_tools/thermalwriter.py index 890cb461..7dfbae0e 100644 --- a/src/ml_tools/thermalwriter.py +++ b/src/ml_tools/thermalwriter.py @@ -342,12 +342,12 @@ def get_data(clip_samples, extra_args): frame.filtered, min=min_diff, max=max_diff, new_max=255 ) - cv2.imwrite( - str( - out_folder / f"{clip_id}-{track_id}-{frame_number}.png" - ), - np.uint8(frame.filtered), - ) + # cv2.imwrite( + # str( + # out_folder / f"{clip_id}-{track_id}-{frame_number}.png" + # ), + # np.uint8(frame.filtered), + # ) if not stats[0]: frame.filtered = np.zeros((frame.filtered.shape)) From 138bdfbce741a3e081c17fa9057b237de321cc61 Mon Sep 17 00:00:00 2001 From: gferraro Date: Mon, 23 Sep 2024 14:47:41 +0200 Subject: [PATCH 020/117] add check --- src/ml_tools/rawdb.py | 1 - src/ml_tools/tfdataset.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/ml_tools/rawdb.py b/src/ml_tools/rawdb.py index a997a584..29921198 100644 --- a/src/ml_tools/rawdb.py +++ b/src/ml_tools/rawdb.py @@ -77,7 +77,6 @@ def load_frames(self): continue ffc = is_affected_by_ffc(frame) if ffc: - print("GOT FFC") ffc_frames.append(frame_i) cptv_frames.append(frame.pix) frame_i += 1 diff --git a/src/ml_tools/tfdataset.py b/src/ml_tools/tfdataset.py index b18cf960..a77b91c3 100644 --- a/src/ml_tools/tfdataset.py +++ b/src/ml_tools/tfdataset.py @@ -75,7 +75,7 @@ def get_dataset(load_function, base_dir, labels, **args): if excluded in labels: new_labels.remove(excluded) for remapped_lbl in to_remap.keys(): - if remapped_lbl in labels: + if remapped_lbl in new_labels: new_labels.remove(remapped_lbl) for l in labels: keys.append(labels.index(l)) From 3dfefc3251d88035f6d732040d67a35ecb8e678e Mon Sep 17 00:00:00 2001 From: gferraro Date: Mon, 23 Sep 2024 14:56:57 +0200 Subject: [PATCH 021/117] use model lbls --- src/modelevaluate.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/modelevaluate.py b/src/modelevaluate.py index 397b2446..7676a2f4 100644 --- a/src/modelevaluate.py +++ b/src/modelevaluate.py @@ -481,9 +481,11 @@ def main(): threshold=args.threshold, ) elif args.dataset: + model_labels = model.labels.copy() model.load_training_meta(base_dir) - if model.params.multi_label: - model.labels.append("land-bird") + model.labels = model_labels + # if model.params.multi_label: + # model.labels.append("land-bird") excluded, remapped = get_excluded(model.data_type) files = base_dir / args.dataset dataset, _, new_labels, _ = get_dataset( From cdc6ef637f631367d5b08d1675cfd97280f64b61 Mon Sep 17 00:00:00 2001 From: gferraro Date: Mon, 23 Sep 2024 15:12:13 +0200 Subject: [PATCH 022/117] remap labels --- src/ml_tools/tfdataset.py | 73 ++++++++++++++++++++++++++------------- src/modelevaluate.py | 7 ++-- 2 files changed, 53 insertions(+), 27 deletions(-) diff --git a/src/ml_tools/tfdataset.py b/src/ml_tools/tfdataset.py index a77b91c3..f710799d 100644 --- a/src/ml_tools/tfdataset.py +++ b/src/ml_tools/tfdataset.py @@ -61,36 +61,61 @@ def get_distribution(dataset, num_labels, batched=True, one_hot=True, extra_meta def get_dataset(load_function, base_dir, labels, **args): + model_labels = args.get("model_labels") excluded_labels = args.get("excluded_labels", []) to_remap = args.get("remapped_labels", {}) - logging.info("Excluding %s", excluded_labels) remapped = {} keys = [] values = [] - # excluded_labels.append("insect") - # excluded_labels.append("cat") - new_labels = labels.copy() - for excluded in excluded_labels: - if excluded in labels: - new_labels.remove(excluded) - for remapped_lbl in to_remap.keys(): - if remapped_lbl in new_labels: - new_labels.remove(remapped_lbl) - for l in labels: - keys.append(labels.index(l)) - if l not in new_labels: - remapped[l] = -1 - values.append(-1) - logging.info("Excluding %s", l) - else: - remapped[l] = [l] - values.append(new_labels.index(l)) - for k, v in to_remap.items(): - if k in labels and v in labels: - remapped[v].append(k) - values[labels.index(k)] = new_labels.index(v) - del remapped[k] + if model_labels is not None: + logging.info("Mapping DS labels to model labels ") + # if we are loading a model with different labels we need to map the dataset labels + # to the equivalent model labels + for l_i, og_lbl in enumerate(labels): + keys.append(l_i) + try: + lbl = og_lbl + if lbl in to_remap: + lbl = to_remap[lbl] + l_i = labels.index(lbl) + + mdl_i = model_labels.index(lbl) + if lbl not in remapped: + remapped[lbl] = [] + remapped[lbl].append(og_lbl) + values.append(mdl_i) + except: + remapped[og_lbl] = -1 + values.append(-1) + + else: + + logging.info("Excluding %s", excluded_labels) + + # excluded_labels.append("insect") + # excluded_labels.append("cat") + new_labels = labels.copy() + for excluded in excluded_labels: + if excluded in labels: + new_labels.remove(excluded) + for remapped_lbl in to_remap.keys(): + if remapped_lbl in new_labels: + new_labels.remove(remapped_lbl) + for l in labels: + keys.append(labels.index(l)) + if l not in new_labels: + remapped[l] = -1 + values.append(-1) + logging.info("Excluding %s", l) + else: + remapped[l] = [l] + values.append(new_labels.index(l)) + for k, v in to_remap.items(): + if k in labels and v in labels: + remapped[v].append(k) + values[labels.index(k)] = new_labels.index(v) + del remapped[k] remap_lookup = tf.lookup.StaticHashTable( initializer=tf.lookup.KeyValueTensorInitializer( keys=tf.constant(keys), diff --git a/src/modelevaluate.py b/src/modelevaluate.py index 7676a2f4..009451c8 100644 --- a/src/modelevaluate.py +++ b/src/modelevaluate.py @@ -483,15 +483,16 @@ def main(): elif args.dataset: model_labels = model.labels.copy() model.load_training_meta(base_dir) - model.labels = model_labels - # if model.params.multi_label: - # model.labels.append("land-bird") + # model.labels = model_labels + if model.params.multi_label: + model.labels.append("land-bird") excluded, remapped = get_excluded(model.data_type) files = base_dir / args.dataset dataset, _, new_labels, _ = get_dataset( files, model.data_type, model.labels, + model_labels=model_labels, batch_size=64, image_size=model.params.output_dim[:2], preprocess_fn=model.preprocess_fn, From deb1a2b0677576b9633d886a8c55e3eab83c34c9 Mon Sep 17 00:00:00 2001 From: gferraro Date: Mon, 23 Sep 2024 15:14:32 +0200 Subject: [PATCH 023/117] fix new --- src/ml_tools/tfdataset.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/ml_tools/tfdataset.py b/src/ml_tools/tfdataset.py index f710799d..6f6ffb84 100644 --- a/src/ml_tools/tfdataset.py +++ b/src/ml_tools/tfdataset.py @@ -69,6 +69,8 @@ def get_dataset(load_function, base_dir, labels, **args): keys = [] values = [] if model_labels is not None: + new_labels = model_labels + logging.info("Mapping DS labels to model labels ") # if we are loading a model with different labels we need to map the dataset labels # to the equivalent model labels From 6731c5bb0f9da3fb3e0008bf2d50e9ebb40a7135 Mon Sep 17 00:00:00 2001 From: gferraro Date: Mon, 23 Sep 2024 15:24:22 +0200 Subject: [PATCH 024/117] no need to show --- src/ml_tools/kerasmodel.py | 56 +++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/src/ml_tools/kerasmodel.py b/src/ml_tools/kerasmodel.py index 28965b3d..3ee4779e 100644 --- a/src/ml_tools/kerasmodel.py +++ b/src/ml_tools/kerasmodel.py @@ -527,34 +527,34 @@ def train_model( include_track=True, # dist=self.dataset_counts["validation"], ) - logging.info("Saving datasets") - save_dir = Path("./train-images") - save_dir.mkdir(exist_ok=True) - batch_i = 0 - for x, y in self.train: - thermaldataset.show_batch( - x, y, self.labels, save=save_dir / f"{batch_i}.jpg", tracks=True - ) - batch_i += 1 - - save_dir = Path("./val-images") - save_dir.mkdir(exist_ok=True) - batch_i = 0 - for x, y in self.validate: - thermaldataset.show_batch( - x, y, self.labels, save=save_dir / f"{batch_i}.jpg" - ) - batch_i += 1 - - if weights is not None: - self.model.load_weights(weights) - if rebalance: - self.class_weights = get_weighting(self.train, self.labels) - logging.info( - "Training on %s with class weights %s", - self.labels, - self.class_weights, - ) + # logging.info("Saving datasets") + # save_dir = Path("./train-images") + # save_dir.mkdir(exist_ok=True) + # batch_i = 0 + # for x, y in self.train: + # thermaldataset.show_batch( + # x, y, self.labels, save=save_dir / f"{batch_i}.jpg", tracks=True + # ) + # batch_i += 1 + + # save_dir = Path("./val-images") + # save_dir.mkdir(exist_ok=True) + # batch_i = 0 + # for x, y in self.validate: + # thermaldataset.show_batch( + # x, y, self.labels, save=save_dir / f"{batch_i}.jpg" + # ) + # batch_i += 1 + + # if weights is not None: + # self.model.load_weights(weights) + # if rebalance: + # self.class_weights = get_weighting(self.train, self.labels) + # logging.info( + # "Training on %s with class weights %s", + # self.labels, + # self.class_weights, + # ) self.save_metadata(run_name) self.save(run_name) From e878f0c3e0063aa14dfca59eafad44120c967744 Mon Sep 17 00:00:00 2001 From: gferraro Date: Mon, 23 Sep 2024 15:30:54 +0200 Subject: [PATCH 025/117] add ext --- src/ml_tools/kerasmodel.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/ml_tools/kerasmodel.py b/src/ml_tools/kerasmodel.py index 3ee4779e..a903f92a 100644 --- a/src/ml_tools/kerasmodel.py +++ b/src/ml_tools/kerasmodel.py @@ -601,12 +601,12 @@ def train_model( self.save(run_name, history=history, test_results=test_accuracy) def checkpoints(self, run_name): - checkpoint_file = self.checkpoint_folder / run_name / "cp.ckpt" + checkpoint_file = self.checkpoint_folder / run_name / "cp.weights.h5" cp_callback = tf.keras.callbacks.ModelCheckpoint( filepath=checkpoint_file, save_weights_only=True, verbose=1 ) - val_loss = self.checkpoint_folder / run_name / "val_loss" + val_loss = self.checkpoint_folder / run_name / "val_loss.weights.h5" checkpoint_loss = tf.keras.callbacks.ModelCheckpoint( val_loss, @@ -616,7 +616,7 @@ def checkpoints(self, run_name): save_weights_only=True, mode="auto", ) - val_acc = self.checkpoint_folder / run_name / "val_acc" + val_acc = self.checkpoint_folder / run_name / "val_acc.weights.h5" checkpoint_acc = tf.keras.callbacks.ModelCheckpoint( val_acc, @@ -631,7 +631,7 @@ def checkpoints(self, run_name): mode="max", ) - val_precision = self.checkpoint_folder / run_name / "val_recall" + val_precision = self.checkpoint_folder / run_name / "val_recall.weights.h5" checkpoint_recall = tf.keras.callbacks.ModelCheckpoint( val_precision, From 28808014b9d046d47c8a77bb046b933d3e0bd8a9 Mon Sep 17 00:00:00 2001 From: gferraro Date: Mon, 23 Sep 2024 15:36:15 +0200 Subject: [PATCH 026/117] add mode --- src/ml_tools/kerasmodel.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/ml_tools/kerasmodel.py b/src/ml_tools/kerasmodel.py index a903f92a..00ea6a47 100644 --- a/src/ml_tools/kerasmodel.py +++ b/src/ml_tools/kerasmodel.py @@ -503,8 +503,8 @@ def train_model( num_frames=self.params.square_width**2, channels=self.params.channels, deterministic=True, - epoch_size=1000, - include_Track=True, + # epoch_size=1000, + # include_Track=True, ) self.remapped = remapped self.validate, remapped, _, _ = get_dataset( @@ -523,8 +523,8 @@ def train_model( num_frames=self.params.square_width**2, channels=self.params.channels, deterministic=True, - epoch_size=250, - include_track=True, + # epoch_size=250, + # in2clude_track=True, # dist=self.dataset_counts["validation"], ) # logging.info("Saving datasets") @@ -672,6 +672,7 @@ def checkpoints(self, run_name): if self.params.multi_label else "val_categorical_accuracy" ), + mode = "max" verbose=1, ) return [ From 2657382323ba32204c0026cd043b7d4190103e0c Mon Sep 17 00:00:00 2001 From: gferraro Date: Mon, 23 Sep 2024 15:40:51 +0200 Subject: [PATCH 027/117] add more debug --- src/ml_tools/kerasmodel.py | 1 + src/ml_tools/tfdataset.py | 1 + src/ml_tools/thermaldataset.py | 9 +++++++-- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/ml_tools/kerasmodel.py b/src/ml_tools/kerasmodel.py index 00ea6a47..0c14fd2f 100644 --- a/src/ml_tools/kerasmodel.py +++ b/src/ml_tools/kerasmodel.py @@ -648,6 +648,7 @@ def checkpoints(self, run_name): if self.params.multi_label else "val_categorical_accuracy" ), + mode = "max" ) # havent found much use in this just takes training time # file_writer_cm = tf.summary.create_file_writer( diff --git a/src/ml_tools/tfdataset.py b/src/ml_tools/tfdataset.py index 6f6ffb84..d4790df4 100644 --- a/src/ml_tools/tfdataset.py +++ b/src/ml_tools/tfdataset.py @@ -206,6 +206,7 @@ def get_dataset(load_function, base_dir, labels, **args): logging.info("Setting dataset size to %s", epoch_size) if not args.get("only_features", False): dataset = dataset.repeat(2) + dataset = dataset.take(epoch_size) scale_epoch = args.get("scale_epoch", None) if scale_epoch: epoch_size = epoch_size // scale_epoch diff --git a/src/ml_tools/thermaldataset.py b/src/ml_tools/thermaldataset.py index b321754c..a73438c9 100644 --- a/src/ml_tools/thermaldataset.py +++ b/src/ml_tools/thermaldataset.py @@ -54,6 +54,11 @@ def get_excluded(): "california quail", "sheep", "echidna", + "mouse", + "rodent", + "possum", + "cat", + "dog", ] @@ -63,8 +68,8 @@ def get_remapped(multi_label=False): "echidna": "hedgehog", # "grey kangaroo": "wallaby", # "sambar deer": "deer", - "mouse": "rodent", - "rat": "rodent", + # "mouse": "rodent", + # "rat": "rodent", "rain": "false-positive", "water": "false-positive", "insect": "false-positive", From a1a37d70f7c043ad8e3394bb1a470623eac14f4b Mon Sep 17 00:00:00 2001 From: gferraro Date: Mon, 23 Sep 2024 15:44:07 +0200 Subject: [PATCH 028/117] exclude most --- src/ml_tools/thermaldataset.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/ml_tools/thermaldataset.py b/src/ml_tools/thermaldataset.py index a73438c9..1dd0e7ee 100644 --- a/src/ml_tools/thermaldataset.py +++ b/src/ml_tools/thermaldataset.py @@ -59,6 +59,11 @@ def get_excluded(): "possum", "cat", "dog", + "hedgehog", + "kiwi", + "leporidae", + "mustelid", + "wallaby", ] From 61338c77b61deab27fe34b73fa360df1eb1f4142 Mon Sep 17 00:00:00 2001 From: gferraro Date: Mon, 23 Sep 2024 15:44:39 +0200 Subject: [PATCH 029/117] comma --- src/ml_tools/kerasmodel.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ml_tools/kerasmodel.py b/src/ml_tools/kerasmodel.py index 0c14fd2f..42f79714 100644 --- a/src/ml_tools/kerasmodel.py +++ b/src/ml_tools/kerasmodel.py @@ -648,7 +648,7 @@ def checkpoints(self, run_name): if self.params.multi_label else "val_categorical_accuracy" ), - mode = "max" + mode="max", ) # havent found much use in this just takes training time # file_writer_cm = tf.summary.create_file_writer( @@ -673,7 +673,7 @@ def checkpoints(self, run_name): if self.params.multi_label else "val_categorical_accuracy" ), - mode = "max" + mode="max", verbose=1, ) return [ From 513fcf336fb1abd01fccf1324d30f119cf3a626b Mon Sep 17 00:00:00 2001 From: gferraro Date: Mon, 23 Sep 2024 15:45:55 +0200 Subject: [PATCH 030/117] remaining 2 --- src/ml_tools/thermaldataset.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/ml_tools/thermaldataset.py b/src/ml_tools/thermaldataset.py index 1dd0e7ee..f84e5752 100644 --- a/src/ml_tools/thermaldataset.py +++ b/src/ml_tools/thermaldataset.py @@ -64,6 +64,8 @@ def get_excluded(): "leporidae", "mustelid", "wallaby", + "human", + "vehicle", ] From 8ff17710e932ce1db8958f3e6718a52c6e1222c7 Mon Sep 17 00:00:00 2001 From: gferraro Date: Mon, 23 Sep 2024 16:02:54 +0200 Subject: [PATCH 031/117] dont save strange values --- src/ml_tools/thermalwriter.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/ml_tools/thermalwriter.py b/src/ml_tools/thermalwriter.py index 7dfbae0e..ee9b67da 100644 --- a/src/ml_tools/thermalwriter.py +++ b/src/ml_tools/thermalwriter.py @@ -318,11 +318,20 @@ def get_data(clip_samples, extra_args): or np.amin(frame.thermal) < 1000 ): logging.error( - "Srange values for %s max %s min %s", + "Strange values for %s max %s min %s", clip_id, np.amax(frame.thermal), np.amin(frame.thermal), ) + raise Exception( + f"Strange values for {clip_id} - {track_id} #{frame_number}" + ) + # cv2.imwrite( + # str( + # out_folder / f"{clip_id}-{track_id}-{frame_number}.png" + # ), + # np.uint8(frame.filtered), + # ) frame.thermal -= temp_median np.clip(frame.thermal, a_min=0, a_max=None, out=frame.thermal) From ae908717428afebf06b3be356e185a08453eda7c Mon Sep 17 00:00:00 2001 From: gferraro Date: Mon, 23 Sep 2024 16:27:47 +0200 Subject: [PATCH 032/117] weighting --- src/ml_tools/kerasmodel.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/ml_tools/kerasmodel.py b/src/ml_tools/kerasmodel.py index 42f79714..cd0d5469 100644 --- a/src/ml_tools/kerasmodel.py +++ b/src/ml_tools/kerasmodel.py @@ -548,13 +548,13 @@ def train_model( # if weights is not None: # self.model.load_weights(weights) - # if rebalance: - # self.class_weights = get_weighting(self.train, self.labels) - # logging.info( - # "Training on %s with class weights %s", - # self.labels, - # self.class_weights, - # ) + if rebalance: + self.class_weights = get_weighting(self.train, self.labels) + logging.info( + "Training on %s with class weights %s", + self.labels, + self.class_weights, + ) self.save_metadata(run_name) self.save(run_name) From b66fe315aacb39fb39668505d0d157fbf3bc8b3c Mon Sep 17 00:00:00 2001 From: gferraro Date: Tue, 24 Sep 2024 16:22:26 +0200 Subject: [PATCH 033/117] debugging --- src/ml_tools/datasetstructures.py | 10 +++++ src/ml_tools/imageprocessing.py | 32 +++++++++++---- src/ml_tools/kerasmodel.py | 2 +- src/ml_tools/thermaldataset.py | 11 ------ src/ml_tools/thermalwriter.py | 66 ++++++++++++++++++++----------- 5 files changed, 78 insertions(+), 43 deletions(-) diff --git a/src/ml_tools/datasetstructures.py b/src/ml_tools/datasetstructures.py index c31c3159..81f1e64c 100644 --- a/src/ml_tools/datasetstructures.py +++ b/src/ml_tools/datasetstructures.py @@ -941,6 +941,13 @@ def get_segments( dont_filter=False, skip_ffc=True, ): + logging.info( + "Getting segments %s min mass %s max %s ffc %s", + segment_type, + segment_min_mass, + max_segments, + ffc_frames, + ) if segment_type == SegmentType.ALL_RANDOM_NOMIN: segment_min_mass = None if min_frames is None: @@ -950,6 +957,7 @@ def get_segments( filtered_stats = {"segment_mass": 0, "too short": 0} has_no_mass = np.sum(mass_history) == 0 + before = len(regions) frame_indices = [ region.frame_number for region in regions @@ -963,6 +971,7 @@ def get_segments( and region.width > 0 and region.height > 0 ] + logging.info("Frames are %s / %s", len(frame_indices), before) if len(frame_indices) == 0: logging.warn("Nothing to load for %s - %s", clip_id, track_id) return [], filtered_stats @@ -1065,6 +1074,7 @@ def get_segments( segment_mass = np.sum(mass_slice) segment_avg_mass = segment_mass / len(mass_slice) filtered = False + logging.info("avg mass is %s mass slice %s %s", segment_avg_mass, mass_slice) if segment_min_mass and segment_avg_mass < segment_min_mass: if dont_filter: filtered = True diff --git a/src/ml_tools/imageprocessing.py b/src/ml_tools/imageprocessing.py index 6b2e4fbf..d9e9f738 100644 --- a/src/ml_tools/imageprocessing.py +++ b/src/ml_tools/imageprocessing.py @@ -5,6 +5,7 @@ from PIL import Image from scipy import ndimage from PIL import Image +import logging def resize_and_pad( @@ -19,10 +20,22 @@ def resize_and_pad( extra_v=0, ): scale_percent = (new_dim[:2] / np.array(frame.shape[:2])).min() - width = int(frame.shape[1] * scale_percent) - height = int(frame.shape[0] * scale_percent) + width = round(frame.shape[1] * scale_percent) + height = round(frame.shape[0] * scale_percent) width = max(width, 1) height = max(height, 1) + + width = min(width, new_dim[0]) + height = min(height, new_dim[1]) + logging.info( + "Resizing image with dim %s into dim %s height %s and width %s keep edge %s region %s", + frame.shape, + new_dim, + height, + width, + keep_edge, + region, + ) if len(frame.shape) == 3: resize_dim = (width, height, frame.shape[2]) else: @@ -40,17 +53,20 @@ def resize_and_pad( offset_x = (new_dim[1] - frame_width) // 2 offset_y = (new_dim[0] - frame_height) // 2 if keep_edge and crop_region is not None: - if region.left == crop_region.left: + logging.info("Checking region %s against crop %s", region, crop_region) + if region.left <= crop_region.left: offset_x = 0 - - elif region.right == crop_region.right: + logging.info("On left offset so setting 0 %s", region) + elif region.right >= crop_region.right: offset_x = new_dim[1] - frame_width + logging.info("On right offset so setting 0 %s", region) - if region.top == crop_region.top: + if region.top <= crop_region.top: offset_y = 0 - elif region.bottom == crop_region.bottom: + elif region.bottom >= crop_region.bottom: offset_y = new_dim[0] - frame_height + logging.info("Offsets are %s %s", offset_x, offset_y) if len(resized.shape) == 3: resized[ offset_y : offset_y + frame_height, offset_x : offset_x + frame_width, : @@ -159,7 +175,7 @@ def normalize(data, min=None, max=None, new_max=1): max = np.amax(data) if min is None: min = np.amin(data) - # print("normalizing with", max, min, new_max) + print("normalizing with", max, "MIN:", min) if max == min: if max == 0: return np.zeros((data.shape)), (False, max, min) diff --git a/src/ml_tools/kerasmodel.py b/src/ml_tools/kerasmodel.py index cd0d5469..32f1f9c7 100644 --- a/src/ml_tools/kerasmodel.py +++ b/src/ml_tools/kerasmodel.py @@ -550,7 +550,7 @@ def train_model( # self.model.load_weights(weights) if rebalance: self.class_weights = get_weighting(self.train, self.labels) - logging.info( + logging.info( "Training on %s with class weights %s", self.labels, self.class_weights, diff --git a/src/ml_tools/thermaldataset.py b/src/ml_tools/thermaldataset.py index f84e5752..079ad772 100644 --- a/src/ml_tools/thermaldataset.py +++ b/src/ml_tools/thermaldataset.py @@ -307,17 +307,6 @@ def read_tfrecord( return rgb_image -def decode_image(thermals, filtereds, image_size): - deoced_thermals = [] - decoded_filtered = [] - for thermal, filtered in zip(thermals, filtereds): - image = tf.image.decode_png(image, channels=1) - filtered = tf.image.decode_png(filtered, channels=1) - decoded_thermal.append(image) - decoded_filtered.append(filtered) - return decoded_thermal, decoded_filtered - - def tile_images(images): index = 0 image = None diff --git a/src/ml_tools/thermalwriter.py b/src/ml_tools/thermalwriter.py index ee9b67da..bf87e30c 100644 --- a/src/ml_tools/thermalwriter.py +++ b/src/ml_tools/thermalwriter.py @@ -277,10 +277,12 @@ def get_data(clip_samples, extra_args): normalize=True, cropped=True, ) - + return None by_frame_number = {} + thermal_max_diff = 0 + thermal_min_diff = None max_diff = 0 - min_diff = 0 + min_diff = None for f in track_frames: if f.region.blank or f.region.width <= 0 or f.region.height <= 0: continue @@ -290,11 +292,27 @@ def get_data(clip_samples, extra_args): diff_frame = f.thermal - f.region.subimage(background) new_max = np.amax(diff_frame) new_min = np.amin(diff_frame) - if new_min < min_diff: + if min_diff is None or new_min < min_diff: min_diff = new_min + # min_diff = max(0, new_min) if new_max > max_diff: max_diff = new_max + diff_frame = f.thermal - frame_temp_median[f.frame_number] + new_max = np.amax(diff_frame) + new_min = np.amin(diff_frame) + if thermal_min_diff is None or new_min < thermal_min_diff: + thermal_min_diff = new_min + # min_diff = max(0, new_min) + if new_max > thermal_max_diff: + thermal_max_diff = new_max + logging.info( + "Min diff %s max diff %s thermal %s - %s", + min_diff, + max_diff, + thermal_min_diff, + thermal_max_diff, + ) # normalize by maximum difference between background and tracked region # probably only need to use difference on the frames used for this record # also min_diff maybe could just be set to 0 and clip values below 0, @@ -326,37 +344,39 @@ def get_data(clip_samples, extra_args): raise Exception( f"Strange values for {clip_id} - {track_id} #{frame_number}" ) - # cv2.imwrite( - # str( - # out_folder / f"{clip_id}-{track_id}-{frame_number}.png" - # ), - # np.uint8(frame.filtered), - # ) + logging.info( + "Median is %s median in thermal is %s", + temp_median, + np.median(frame.thermal), + ) frame.thermal -= temp_median - np.clip(frame.thermal, a_min=0, a_max=None, out=frame.thermal) + # np.clip(frame.thermal, a_min=0, a_max=None, out=frame.thermal) frame.thermal, stats = imageprocessing.normalize( - frame.thermal, new_max=255 + frame.thermal, + min=thermal_min_diff, + max=thermal_max_diff, + new_max=255, ) if not stats[0]: frame.thermal = np.zeros((frame.thermal.shape)) - # continue - # f2 = frame.filtered.copy() - # frame.filtered, stats = imageprocessing.normalize( - # frame.filtered, new_max=255 - # ) - # np.clip(frame.filtered, a_min=min_diff, a_max=None, out=frame.filtered) frame.filtered, stats = imageprocessing.normalize( frame.filtered, min=min_diff, max=max_diff, new_max=255 ) + np.clip(frame.filtered, a_min=0, a_max=None, out=frame.filtered) - # cv2.imwrite( - # str( - # out_folder / f"{clip_id}-{track_id}-{frame_number}.png" - # ), - # np.uint8(frame.filtered), - # ) + logging.info( + "Normalied %s %s", + np.amin(frame.thermal), + np.amax(frame.thermal), + ) + cv2.imwrite( + str( + out_folder / f"{clip_id}-{track_id}-{frame_number}.png" + ), + np.uint8(frame.thermal), + ) if not stats[0]: frame.filtered = np.zeros((frame.filtered.shape)) From 29f0d69318ae2849808d8174a347f8d677624834 Mon Sep 17 00:00:00 2001 From: gferraro Date: Tue, 24 Sep 2024 17:25:33 +0200 Subject: [PATCH 034/117] fixed resize and keep edge --- src/ml_tools/dataset.py | 4 +++- src/ml_tools/datasetstructures.py | 9 ------- src/ml_tools/imageprocessing.py | 15 +----------- src/ml_tools/thermalwriter.py | 39 ++++++++++++------------------- 4 files changed, 19 insertions(+), 48 deletions(-) diff --git a/src/ml_tools/dataset.py b/src/ml_tools/dataset.py index e9b75ae2..30bbb224 100644 --- a/src/ml_tools/dataset.py +++ b/src/ml_tools/dataset.py @@ -587,7 +587,9 @@ def filter_track(track_header, excluded_tags, filtered_stats={}): return True if track_header.human_tags is not None: - found_tags = [tag for tag in track_header.human_tags if tag in excluded_tags] + found_tags = [ + tag[0] for tag in track_header.human_tags if tag[0] in excluded_tags + ] if len(found_tags) > 0: filter_tags = filtered_stats.setdefault("tag_names", set()) filter_tags |= set(found_tags) diff --git a/src/ml_tools/datasetstructures.py b/src/ml_tools/datasetstructures.py index 81f1e64c..57292b92 100644 --- a/src/ml_tools/datasetstructures.py +++ b/src/ml_tools/datasetstructures.py @@ -941,13 +941,6 @@ def get_segments( dont_filter=False, skip_ffc=True, ): - logging.info( - "Getting segments %s min mass %s max %s ffc %s", - segment_type, - segment_min_mass, - max_segments, - ffc_frames, - ) if segment_type == SegmentType.ALL_RANDOM_NOMIN: segment_min_mass = None if min_frames is None: @@ -971,7 +964,6 @@ def get_segments( and region.width > 0 and region.height > 0 ] - logging.info("Frames are %s / %s", len(frame_indices), before) if len(frame_indices) == 0: logging.warn("Nothing to load for %s - %s", clip_id, track_id) return [], filtered_stats @@ -1074,7 +1066,6 @@ def get_segments( segment_mass = np.sum(mass_slice) segment_avg_mass = segment_mass / len(mass_slice) filtered = False - logging.info("avg mass is %s mass slice %s %s", segment_avg_mass, mass_slice) if segment_min_mass and segment_avg_mass < segment_min_mass: if dont_filter: filtered = True diff --git a/src/ml_tools/imageprocessing.py b/src/ml_tools/imageprocessing.py index d9e9f738..4eeebcac 100644 --- a/src/ml_tools/imageprocessing.py +++ b/src/ml_tools/imageprocessing.py @@ -27,15 +27,7 @@ def resize_and_pad( width = min(width, new_dim[0]) height = min(height, new_dim[1]) - logging.info( - "Resizing image with dim %s into dim %s height %s and width %s keep edge %s region %s", - frame.shape, - new_dim, - height, - width, - keep_edge, - region, - ) + if len(frame.shape) == 3: resize_dim = (width, height, frame.shape[2]) else: @@ -53,20 +45,16 @@ def resize_and_pad( offset_x = (new_dim[1] - frame_width) // 2 offset_y = (new_dim[0] - frame_height) // 2 if keep_edge and crop_region is not None: - logging.info("Checking region %s against crop %s", region, crop_region) if region.left <= crop_region.left: offset_x = 0 - logging.info("On left offset so setting 0 %s", region) elif region.right >= crop_region.right: offset_x = new_dim[1] - frame_width - logging.info("On right offset so setting 0 %s", region) if region.top <= crop_region.top: offset_y = 0 elif region.bottom >= crop_region.bottom: offset_y = new_dim[0] - frame_height - logging.info("Offsets are %s %s", offset_x, offset_y) if len(resized.shape) == 3: resized[ offset_y : offset_y + frame_height, offset_x : offset_x + frame_width, : @@ -175,7 +163,6 @@ def normalize(data, min=None, max=None, new_max=1): max = np.amax(data) if min is None: min = np.amin(data) - print("normalizing with", max, "MIN:", min) if max == min: if max == 0: return np.zeros((data.shape)), (False, max, min) diff --git a/src/ml_tools/thermalwriter.py b/src/ml_tools/thermalwriter.py index bf87e30c..345bb9fb 100644 --- a/src/ml_tools/thermalwriter.py +++ b/src/ml_tools/thermalwriter.py @@ -277,7 +277,7 @@ def get_data(clip_samples, extra_args): normalize=True, cropped=True, ) - return None + by_frame_number = {} thermal_max_diff = 0 thermal_min_diff = None @@ -306,13 +306,13 @@ def get_data(clip_samples, extra_args): # min_diff = max(0, new_min) if new_max > thermal_max_diff: thermal_max_diff = new_max - logging.info( - "Min diff %s max diff %s thermal %s - %s", - min_diff, - max_diff, - thermal_min_diff, - thermal_max_diff, - ) + # logging.info( + # "Min diff %s max diff %s thermal %s - %s", + # min_diff, + # max_diff, + # thermal_min_diff, + # thermal_max_diff, + # ) # normalize by maximum difference between background and tracked region # probably only need to use difference on the frames used for this record # also min_diff maybe could just be set to 0 and clip values below 0, @@ -344,11 +344,7 @@ def get_data(clip_samples, extra_args): raise Exception( f"Strange values for {clip_id} - {track_id} #{frame_number}" ) - logging.info( - "Median is %s median in thermal is %s", - temp_median, - np.median(frame.thermal), - ) + frame.thermal -= temp_median # np.clip(frame.thermal, a_min=0, a_max=None, out=frame.thermal) @@ -366,17 +362,12 @@ def get_data(clip_samples, extra_args): ) np.clip(frame.filtered, a_min=0, a_max=None, out=frame.filtered) - logging.info( - "Normalied %s %s", - np.amin(frame.thermal), - np.amax(frame.thermal), - ) - cv2.imwrite( - str( - out_folder / f"{clip_id}-{track_id}-{frame_number}.png" - ), - np.uint8(frame.thermal), - ) + # cv2.imwrite( + # str( + # out_folder / f"{clip_id}-{track_id}-{frame_number}.png" + # ), + # np.uint8(frame.thermal), + # ) if not stats[0]: frame.filtered = np.zeros((frame.filtered.shape)) From 09be2465c12babb6bf9d5cc9f3d1ce53efceca07 Mon Sep 17 00:00:00 2001 From: gferraro Date: Tue, 24 Sep 2024 17:30:41 +0200 Subject: [PATCH 035/117] fix tf --- src/ml_tools/tfdataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ml_tools/tfdataset.py b/src/ml_tools/tfdataset.py index d4790df4..027b7b92 100644 --- a/src/ml_tools/tfdataset.py +++ b/src/ml_tools/tfdataset.py @@ -99,7 +99,7 @@ def get_dataset(load_function, base_dir, labels, **args): # excluded_labels.append("cat") new_labels = labels.copy() for excluded in excluded_labels: - if excluded in labels: + if excluded in new_labels: new_labels.remove(excluded) for remapped_lbl in to_remap.keys(): if remapped_lbl in new_labels: From 79d931eb2be8ed716a12bcc37e1a40b1f01fa392 Mon Sep 17 00:00:00 2001 From: gferraro Date: Tue, 24 Sep 2024 17:59:03 +0200 Subject: [PATCH 036/117] rought balance --- src/build.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/src/build.py b/src/build.py index 7b91dad8..879985fd 100644 --- a/src/build.py +++ b/src/build.py @@ -717,6 +717,39 @@ def dump_split_ids(datasets, out_file="datasplit.json"): return +def rough_balance(datasets): + logging.info("ROUGH BALANCE") + print_counts(*datasets) + + for dataset in datasets: + lbl_counts = {} + counts = [] + for label in dataset.labels: + label_count = len(dataset.samples_by_label.get(label, [])) + lbl_counts[label] = label_count + counts.append(label_count) + counts.sort() + std_dev = np.std(counts) + logging.info("Counts are %s std dev %s", counts, std_dev) + if std_dev < 2000: + logging.info("Not balancing") + continue + if len(counts) < 7: + cap_at = counts[-2] + else: + cap_at = counts[-3] + logging.info("Capping dataset %s at %s", dataset.name, cap_at) + for lbl, count in lbl_counts.items(): + if count <= cap_at: + continue + samples_to_remove = count - cap_at + by_labels = dataset.samples_by_label[lbl] + np.random.shuffle(by_labels) + for i in range(samples_to_remove): + dataset.remove_sample(by_labels[i]) + print_counts(*datasets) + + def main(): init_logging() args = parse_args() @@ -782,6 +815,8 @@ def main(): print("Splitting data set into train / validation") datasets = split_randomly(master_dataset, config, args.date, test_clips) + + rough_balance(datasets) validate_datasets(datasets, test_clips, args.date) dump_split_ids(datasets, record_dir / "datasplit.json") From 31db14d23abf6e289e94971287e1f07b98697537 Mon Sep 17 00:00:00 2001 From: gferraro Date: Tue, 24 Sep 2024 20:47:51 +0200 Subject: [PATCH 037/117] more debug --- src/ml_tools/tfwriter.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/ml_tools/tfwriter.py b/src/ml_tools/tfwriter.py index 3d9129ad..677a975e 100644 --- a/src/ml_tools/tfwriter.py +++ b/src/ml_tools/tfwriter.py @@ -101,7 +101,14 @@ def create_tf_records( samples_by_source = dataset.get_samples_by_source() source_files = list(samples_by_source.keys()) np.random.shuffle(source_files) - + lbl_samples = {} + for samples, source in samples_by_source.items(): + for s in samples: + if s.label not in lbl_samples: + lbl_samples[s.label] = 0 + lbl_samples[s.label] += 1 + for lbl, count in lbl_samples.items(): + logging.info("%s samples are %s", lbl, count) num_labels = len(dataset.labels) logging.info( "writing to output path: %s for %s samples", output_path, len(samples_by_source) From 91b196f7a0629a62e218174487d6f0f622b270a7 Mon Sep 17 00:00:00 2001 From: gferraro Date: Tue, 24 Sep 2024 20:59:24 +0200 Subject: [PATCH 038/117] debug source --- src/ml_tools/thermaldataset.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/ml_tools/thermaldataset.py b/src/ml_tools/thermaldataset.py index 079ad772..9d4f5c27 100644 --- a/src/ml_tools/thermaldataset.py +++ b/src/ml_tools/thermaldataset.py @@ -234,9 +234,9 @@ def read_tfrecord( ) if include_track: + tfrecord_format["image/source_id"] = tf.io.FixedLenFeature((), tf.string) tfrecord_format["image/track_id"] = tf.io.FixedLenFeature((), tf.int64, -1) tfrecord_format["image/avg_mass"] = tf.io.FixedLenFeature((), tf.int64, -1) - if include_features or only_features: tfrecord_format["image/features"] = tf.io.FixedLenSequenceFeature( [36 * 5 + 8], dtype=tf.float32, allow_missing=True @@ -291,9 +291,11 @@ def read_tfrecord( if extra_label_map is not None: label = tf.reduce_max(label, axis=0) if include_track: + + source_id = tf.cast(example["image/source_id"], tf.string) track_id = tf.cast(example["image/track_id"], tf.int32) avg_mass = tf.cast(example["image/avg_mass"], tf.int32) - label = (label, track_id, avg_mass) + label = (label, track_id, avg_mass, source_id) if include_features or only_features: features = tf.squeeze(example["image/features"]) if only_features: @@ -351,7 +353,7 @@ def main(): include_features=False, remapped_labels=get_remapped(), excluded_labels=get_excluded(), - include_track=False, + include_track=True, num_frames=25, ) print("Ecpoh size is", epoch_size) @@ -364,6 +366,10 @@ def main(): batch_i = 0 print("epoch", e) for x, y in resampled_ds: + source = y[3] + for s in source: + print(s) + continue show_batch(x, y, labels, save=save_dir / f"{batch_i}.jpg") batch_i += 1 # return From 03316c039094e2dffd380ce0a067a6d090f9f9ec Mon Sep 17 00:00:00 2001 From: gferraro Date: Tue, 24 Sep 2024 21:03:16 +0200 Subject: [PATCH 039/117] print id --- src/ml_tools/tfwriter.py | 11 ++++++----- src/ml_tools/thermalwriter.py | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/ml_tools/tfwriter.py b/src/ml_tools/tfwriter.py index 677a975e..e0b6ebaa 100644 --- a/src/ml_tools/tfwriter.py +++ b/src/ml_tools/tfwriter.py @@ -102,13 +102,14 @@ def create_tf_records( source_files = list(samples_by_source.keys()) np.random.shuffle(source_files) lbl_samples = {} - for samples, source in samples_by_source.items(): + for source, samples in samples_by_source.items(): for s in samples: if s.label not in lbl_samples: - lbl_samples[s.label] = 0 - lbl_samples[s.label] += 1 - for lbl, count in lbl_samples.items(): - logging.info("%s samples are %s", lbl, count) + lbl_samples[s.label] = [] + lbl_samples[s.label].append(s) + for lbl, samples in lbl_samples.items(): + logging.info("%s samples are %s", lbl, len(samples)) + logging.info("Unique ids are %s", [s.unique_id for s in samples]) num_labels = len(dataset.labels) logging.info( "writing to output path: %s for %s samples", output_path, len(samples_by_source) diff --git a/src/ml_tools/thermalwriter.py b/src/ml_tools/thermalwriter.py index 345bb9fb..35f75d5f 100644 --- a/src/ml_tools/thermalwriter.py +++ b/src/ml_tools/thermalwriter.py @@ -90,7 +90,7 @@ def create_tf_example(sample, data, features, labels, num_frames): average_dim = int(round(np.mean(average_dim) ** 0.5)) thermals = list(data[0]) filtereds = list(data[1]) - image_id = sample.unique_track_id + image_id = sample.unique_id image_height, image_width = thermals[0].shape while len(thermals) < num_frames: # ensure 25 frames even if 0s From 5478116ecd92fb6265e257d2427e1324ba364356 Mon Sep 17 00:00:00 2001 From: gferraro Date: Tue, 24 Sep 2024 21:30:17 +0200 Subject: [PATCH 040/117] max smaples --- src/ml_tools/tfwriter.py | 18 +++++++++--------- src/ml_tools/thermaldataset.py | 5 +++-- src/ml_tools/thermalwriter.py | 1 + 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/src/ml_tools/tfwriter.py b/src/ml_tools/tfwriter.py index e0b6ebaa..402071df 100644 --- a/src/ml_tools/tfwriter.py +++ b/src/ml_tools/tfwriter.py @@ -101,15 +101,15 @@ def create_tf_records( samples_by_source = dataset.get_samples_by_source() source_files = list(samples_by_source.keys()) np.random.shuffle(source_files) - lbl_samples = {} - for source, samples in samples_by_source.items(): - for s in samples: - if s.label not in lbl_samples: - lbl_samples[s.label] = [] - lbl_samples[s.label].append(s) - for lbl, samples in lbl_samples.items(): - logging.info("%s samples are %s", lbl, len(samples)) - logging.info("Unique ids are %s", [s.unique_id for s in samples]) + # lbl_samples = {} + # for source, samples in samples_by_source.items(): + # for s in samples: + # if s.label not in lbl_samples: + # lbl_samples[s.label] = [] + # lbl_samples[s.label].append(s) + # for lbl, samples in lbl_samples.items(): + # logging.info("%s samples are %s", lbl, len(samples)) + # logging.info("Unique ids are %s", [s.unique_id for s in samples]) num_labels = len(dataset.labels) logging.info( "writing to output path: %s for %s samples", output_path, len(samples_by_source) diff --git a/src/ml_tools/thermaldataset.py b/src/ml_tools/thermaldataset.py index 9d4f5c27..8508f601 100644 --- a/src/ml_tools/thermaldataset.py +++ b/src/ml_tools/thermaldataset.py @@ -367,8 +367,9 @@ def main(): print("epoch", e) for x, y in resampled_ds: source = y[3] - for s in source: - print(s) + y_b = y[0] + for s, y_s in zip(source, y_b): + print(labels[np.argmax(y_s)], s.numpy().decode("utf-8")) continue show_batch(x, y, labels, save=save_dir / f"{batch_i}.jpg") batch_i += 1 diff --git a/src/ml_tools/thermalwriter.py b/src/ml_tools/thermalwriter.py index 35f75d5f..6e0fd74c 100644 --- a/src/ml_tools/thermalwriter.py +++ b/src/ml_tools/thermalwriter.py @@ -233,6 +233,7 @@ def get_data(clip_samples, extra_args): dont_filter=extra_args.get("dont_filter_segment", False), skip_ffc=extra_args.get("skip_ffc", True), ffc_frames=clip_meta.ffc_frames, + max_segments=len(samples), ) else: filter_by_lq = extra_args.get("filter_by_lq", False) From 5c16876c9ec097308ff488902bf79523ebe71ee0 Mon Sep 17 00:00:00 2001 From: gferraro Date: Tue, 24 Sep 2024 21:38:41 +0200 Subject: [PATCH 041/117] more test --- src/build.py | 2 +- src/ml_tools/thermaldataset.py | 9 ++------- src/ml_tools/thermalwriter.py | 1 - 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/src/build.py b/src/build.py index 879985fd..155ced26 100644 --- a/src/build.py +++ b/src/build.py @@ -731,7 +731,7 @@ def rough_balance(datasets): counts.sort() std_dev = np.std(counts) logging.info("Counts are %s std dev %s", counts, std_dev) - if std_dev < 2000: + if std_dev < 0: logging.info("Not balancing") continue if len(counts) < 7: diff --git a/src/ml_tools/thermaldataset.py b/src/ml_tools/thermaldataset.py index 8508f601..0ce97e54 100644 --- a/src/ml_tools/thermaldataset.py +++ b/src/ml_tools/thermaldataset.py @@ -351,8 +351,8 @@ def main(): # preprocess_fn=tf.keras.applications.inception_v3.preprocess_input, resample=False, include_features=False, - remapped_labels=get_remapped(), - excluded_labels=get_excluded(), + # remapped_labels=get_remapped(), + # excluded_labels=get_excluded(), include_track=True, num_frames=25, ) @@ -366,11 +366,6 @@ def main(): batch_i = 0 print("epoch", e) for x, y in resampled_ds: - source = y[3] - y_b = y[0] - for s, y_s in zip(source, y_b): - print(labels[np.argmax(y_s)], s.numpy().decode("utf-8")) - continue show_batch(x, y, labels, save=save_dir / f"{batch_i}.jpg") batch_i += 1 # return diff --git a/src/ml_tools/thermalwriter.py b/src/ml_tools/thermalwriter.py index 6e0fd74c..2025ff40 100644 --- a/src/ml_tools/thermalwriter.py +++ b/src/ml_tools/thermalwriter.py @@ -229,7 +229,6 @@ def get_data(clip_samples, extra_args): ), segment_type=extra_args.get("segment_type"), segment_min_mass=extra_args.get("segment_min_avg_mass"), - max_segments=extra_args.get("max_segments"), dont_filter=extra_args.get("dont_filter_segment", False), skip_ffc=extra_args.get("skip_ffc", True), ffc_frames=clip_meta.ffc_frames, From 8d9eed746dd47f1c85c600ff582c3ec1c0c3d69a Mon Sep 17 00:00:00 2001 From: gferraro Date: Tue, 24 Sep 2024 21:53:03 +0200 Subject: [PATCH 042/117] add lbls --- src/ml_tools/thermaldataset.py | 55 +++++++++++++++++----------------- 1 file changed, 28 insertions(+), 27 deletions(-) diff --git a/src/ml_tools/thermaldataset.py b/src/ml_tools/thermaldataset.py index 0ce97e54..96c8d9fd 100644 --- a/src/ml_tools/thermaldataset.py +++ b/src/ml_tools/thermaldataset.py @@ -43,29 +43,30 @@ def get_excluded(): "fox", "cow", "wombat", - "dog", - "sheep" "cat", - "duck", - "pheasant", - "pukeko", - "brown quail", - "black swan", - "quail", - "california quail", + # "dog", "sheep", - "echidna", - "mouse", - "rodent", - "possum", - "cat", - "dog", - "hedgehog", - "kiwi", - "leporidae", - "mustelid", - "wallaby", - "human", - "vehicle", + # "cat", + # "duck", + # "pheasant", + # "pukeko", + # "brown quail", + # "black swan", + # "quail", + # "california quail", + # "sheep", + # "echidna", + # "mouse", + # "rodent", + # "possum", + # "cat", + # "dog", + # "hedgehog", + # "kiwi", + # "leporidae", + # "mustelid", + # "wallaby", + # "human", + # "vehicle", ] @@ -73,10 +74,10 @@ def get_remapped(multi_label=False): land_bird = "land-bird" if multi_label else "bird" return { "echidna": "hedgehog", - # "grey kangaroo": "wallaby", - # "sambar deer": "deer", - # "mouse": "rodent", - # "rat": "rodent", + "grey kangaroo": "wallaby", + "sambar deer": "deer", + "mouse": "rodent", + "rat": "rodent", "rain": "false-positive", "water": "false-positive", "insect": "false-positive", @@ -88,7 +89,7 @@ def get_remapped(multi_label=False): "pheasant": land_bird, "pukeko": land_bird, "quail": land_bird, - # "chicken": land_bird, + "chicken": land_bird, } From c13aa4b44674e9b3675b0411270590db269c9575 Mon Sep 17 00:00:00 2001 From: gferraro Date: Wed, 25 Sep 2024 14:52:41 +0200 Subject: [PATCH 043/117] tidy up --- src/build.py | 5 +++-- src/ml_tools/dataset.py | 2 -- src/ml_tools/datasetstructures.py | 2 +- src/ml_tools/kerasmodel.py | 35 ++++--------------------------- src/ml_tools/tfdataset.py | 3 --- src/ml_tools/tfwriter.py | 9 -------- src/ml_tools/thermaldataset.py | 16 +++++++------- src/ml_tools/thermalwriter.py | 25 +++------------------- 8 files changed, 19 insertions(+), 78 deletions(-) diff --git a/src/build.py b/src/build.py index 155ced26..ee86b56f 100644 --- a/src/build.py +++ b/src/build.py @@ -718,7 +718,8 @@ def dump_split_ids(datasets, out_file="datasplit.json"): def rough_balance(datasets): - logging.info("ROUGH BALANCE") + dev_threshold = 2000 + logging.info("Roughly Balancing") print_counts(*datasets) for dataset in datasets: @@ -731,7 +732,7 @@ def rough_balance(datasets): counts.sort() std_dev = np.std(counts) logging.info("Counts are %s std dev %s", counts, std_dev) - if std_dev < 0: + if std_dev < dev_threshold: logging.info("Not balancing") continue if len(counts) < 7: diff --git a/src/ml_tools/dataset.py b/src/ml_tools/dataset.py index 30bbb224..15748b1e 100644 --- a/src/ml_tools/dataset.py +++ b/src/ml_tools/dataset.py @@ -192,8 +192,6 @@ def load_clips( counter += 1 if counter % 50 == 0: logging.debug("Dataset loaded %s", counter) - if counter == 500: - break return [counter, counter] def load_clip(self, db_clip, dont_filter_segment=False): diff --git a/src/ml_tools/datasetstructures.py b/src/ml_tools/datasetstructures.py index 57292b92..f1840527 100644 --- a/src/ml_tools/datasetstructures.py +++ b/src/ml_tools/datasetstructures.py @@ -950,7 +950,7 @@ def get_segments( filtered_stats = {"segment_mass": 0, "too short": 0} has_no_mass = np.sum(mass_history) == 0 - before = len(regions) + frame_indices = [ region.frame_number for region in regions diff --git a/src/ml_tools/kerasmodel.py b/src/ml_tools/kerasmodel.py index 32f1f9c7..955f0617 100644 --- a/src/ml_tools/kerasmodel.py +++ b/src/ml_tools/kerasmodel.py @@ -502,9 +502,6 @@ def train_model( multi_label=self.params.multi_label, num_frames=self.params.square_width**2, channels=self.params.channels, - deterministic=True, - # epoch_size=1000, - # include_Track=True, ) self.remapped = remapped self.validate, remapped, _, _ = get_dataset( @@ -522,39 +519,15 @@ def train_model( multi_label=self.params.multi_label, num_frames=self.params.square_width**2, channels=self.params.channels, - deterministic=True, - # epoch_size=250, - # in2clude_track=True, - # dist=self.dataset_counts["validation"], ) - # logging.info("Saving datasets") - # save_dir = Path("./train-images") - # save_dir.mkdir(exist_ok=True) - # batch_i = 0 - # for x, y in self.train: - # thermaldataset.show_batch( - # x, y, self.labels, save=save_dir / f"{batch_i}.jpg", tracks=True - # ) - # batch_i += 1 - - # save_dir = Path("./val-images") - # save_dir.mkdir(exist_ok=True) - # batch_i = 0 - # for x, y in self.validate: - # thermaldataset.show_batch( - # x, y, self.labels, save=save_dir / f"{batch_i}.jpg" - # ) - # batch_i += 1 - # if weights is not None: - # self.model.load_weights(weights) if rebalance: self.class_weights = get_weighting(self.train, self.labels) logging.info( - "Training on %s with class weights %s", - self.labels, - self.class_weights, - ) + "Training on %s with class weights %s", + self.labels, + self.class_weights, + ) self.save_metadata(run_name) self.save(run_name) diff --git a/src/ml_tools/tfdataset.py b/src/ml_tools/tfdataset.py index 027b7b92..33abeb0a 100644 --- a/src/ml_tools/tfdataset.py +++ b/src/ml_tools/tfdataset.py @@ -94,9 +94,6 @@ def get_dataset(load_function, base_dir, labels, **args): else: logging.info("Excluding %s", excluded_labels) - - # excluded_labels.append("insect") - # excluded_labels.append("cat") new_labels = labels.copy() for excluded in excluded_labels: if excluded in new_labels: diff --git a/src/ml_tools/tfwriter.py b/src/ml_tools/tfwriter.py index 402071df..519f4ffb 100644 --- a/src/ml_tools/tfwriter.py +++ b/src/ml_tools/tfwriter.py @@ -101,15 +101,6 @@ def create_tf_records( samples_by_source = dataset.get_samples_by_source() source_files = list(samples_by_source.keys()) np.random.shuffle(source_files) - # lbl_samples = {} - # for source, samples in samples_by_source.items(): - # for s in samples: - # if s.label not in lbl_samples: - # lbl_samples[s.label] = [] - # lbl_samples[s.label].append(s) - # for lbl, samples in lbl_samples.items(): - # logging.info("%s samples are %s", lbl, len(samples)) - # logging.info("Unique ids are %s", [s.unique_id for s in samples]) num_labels = len(dataset.labels) logging.info( "writing to output path: %s for %s samples", output_path, len(samples_by_source) diff --git a/src/ml_tools/thermaldataset.py b/src/ml_tools/thermaldataset.py index 96c8d9fd..096de9ff 100644 --- a/src/ml_tools/thermaldataset.py +++ b/src/ml_tools/thermaldataset.py @@ -37,14 +37,14 @@ def get_excluded(): "bat", "mammal", "frog", - "grey kangaroo", - "sambar deer", - "chicken", + # "grey kangaroo", + # "sambar deer", + # "chicken", "fox", - "cow", + # "cow", "wombat", # "dog", - "sheep", + # "sheep", # "cat", # "duck", # "pheasant", @@ -352,8 +352,8 @@ def main(): # preprocess_fn=tf.keras.applications.inception_v3.preprocess_input, resample=False, include_features=False, - # remapped_labels=get_remapped(), - # excluded_labels=get_excluded(), + remapped_labels=get_remapped(), + excluded_labels=get_excluded(), include_track=True, num_frames=25, ) @@ -367,7 +367,7 @@ def main(): batch_i = 0 print("epoch", e) for x, y in resampled_ds: - show_batch(x, y, labels, save=save_dir / f"{batch_i}.jpg") + show_batch(x, y, labels, save=save_dir / f"{batch_i}.jpg", tracks=True) batch_i += 1 # return diff --git a/src/ml_tools/thermalwriter.py b/src/ml_tools/thermalwriter.py index 2025ff40..c6a8f40b 100644 --- a/src/ml_tools/thermalwriter.py +++ b/src/ml_tools/thermalwriter.py @@ -177,18 +177,13 @@ def get_data(clip_samples, extra_args): data = [] crop_rectangle = tools.Rectangle(2, 2, 160 - 2 * 2, 140 - 2 * 2) - out_folder = None if clip_samples[0].source_file.suffix == ".hdf5": db = TrackDatabase(clip_samples[0].source_file) - out_folder = "hdf5" else: db = RawDatabase(clip_samples[0].source_file) db.load_frames() - out_folder = "raw" - # going to redo segments to get rid of ffc segments - out_folder = Path(out_folder) - out_folder.mkdir(exist_ok=True) + # going to redo segments to get rid of ffc segments clip_id = clip_samples[0].clip_id try: background = db.get_clip_background() @@ -303,16 +298,9 @@ def get_data(clip_samples, extra_args): new_min = np.amin(diff_frame) if thermal_min_diff is None or new_min < thermal_min_diff: thermal_min_diff = new_min - # min_diff = max(0, new_min) if new_max > thermal_max_diff: thermal_max_diff = new_max - # logging.info( - # "Min diff %s max diff %s thermal %s - %s", - # min_diff, - # max_diff, - # thermal_min_diff, - # thermal_max_diff, - # ) + # normalize by maximum difference between background and tracked region # probably only need to use difference on the frames used for this record # also min_diff maybe could just be set to 0 and clip values below 0, @@ -332,7 +320,7 @@ def get_data(clip_samples, extra_args): (32, 32), crop_rectangle, keep_edge=True ) if ( - np.amax(frame.thermal) > 40000 + np.amax(frame.thermal) > 50000 or np.amin(frame.thermal) < 1000 ): logging.error( @@ -362,13 +350,6 @@ def get_data(clip_samples, extra_args): ) np.clip(frame.filtered, a_min=0, a_max=None, out=frame.filtered) - # cv2.imwrite( - # str( - # out_folder / f"{clip_id}-{track_id}-{frame_number}.png" - # ), - # np.uint8(frame.thermal), - # ) - if not stats[0]: frame.filtered = np.zeros((frame.filtered.shape)) f2 = np.uint8(frame.filtered) From 670ec4bb02a5d43df4ef164c3d20ce98499a93d3 Mon Sep 17 00:00:00 2001 From: gferraro Date: Wed, 25 Sep 2024 15:09:44 +0200 Subject: [PATCH 044/117] remove load config --- src/build.py | 10 ++++++---- src/classify/clipclassifier.py | 2 +- src/config/buildconfig.py | 32 ++++++++++++++++++++++++++++++++ src/config/config.py | 7 +++---- src/config/loadconfig.py | 24 +----------------------- src/ml_tools/dataset.py | 8 ++++---- src/ml_tools/thermalwriter.py | 31 ++++++++++++++++++------------- src/ml_tools/tools.py | 3 +++ src/mldataset/makedataset.py | 4 ++-- src/modelevaluate.py | 5 ++--- 10 files changed, 72 insertions(+), 54 deletions(-) diff --git a/src/build.py b/src/build.py index ee86b56f..3105ec25 100644 --- a/src/build.py +++ b/src/build.py @@ -19,8 +19,8 @@ from ml_tools.tfwriter import create_tf_records from ml_tools.irwriter import save_data as save_ir_data from ml_tools.thermalwriter import save_data as save_thermal_data - - +from ml_tools.tools import CustomJSONEncoder +import attrs import numpy as np from pathlib import Path @@ -890,8 +890,9 @@ def main(): "max_segments": master_dataset.max_segments, "dont_filter_segment": True, "skip_ffc": True, - "tag_precedence": config.load.tag_precedence, + "tag_precedence": config.build.tag_precedence, "min_mass": master_dataset.min_frame_mass, + "thermal_diff_norm": config.build.thermal_diff_norm, } ) create_tf_records( @@ -915,10 +916,11 @@ def main(): "type": config.train.type, "counts": dataset_counts, "by_label": False, + "config": attrs.asdict(config), } with open(meta_filename, "w") as f: - json.dump(meta_data, f, indent=4) + json.dump(meta_data, f, indent=4, cls=CustomJSONEncoder) if __name__ == "__main__": diff --git a/src/classify/clipclassifier.py b/src/classify/clipclassifier.py index f9e239db..9bcaa135 100644 --- a/src/classify/clipclassifier.py +++ b/src/classify/clipclassifier.py @@ -129,7 +129,7 @@ def process_file(self, filename, cache=None, reuse_frames=None): clip = Clip(track_extractor.config, filename) clip.load_metadata( meta_data, - self.config.load.tag_precedence, + self.config.build.tag_precedence, ) track_extractor.parse_clip(clip) diff --git a/src/config/buildconfig.py b/src/config/buildconfig.py index 0d203f95..045de438 100644 --- a/src/config/buildconfig.py +++ b/src/config/buildconfig.py @@ -34,6 +34,32 @@ class BuildConfig(DefaultConfig): min_frame_mass = attr.ib() filter_by_lq = attr.ib() max_segments = attr.ib() + thermal_diff_norm = attr.ib() + tag_precedence = attr.ib() + excluded_tags = attr.ib() + + EXCLUDED_TAGS = ["poor tracking", "part", "untagged", "unidentified"] + + DEFAULT_GROUPS = { + 0: [ + "bird", + "false-positive", + "hedgehog", + "possum", + "rodent", + "mustelid", + "cat", + "kiwi", + "dog", + "leporidae", + "human", + "insect", + "pest", + ], + 1: ["unidentified", "other"], + 2: ["part", "bad track"], + 3: ["default"], + } @classmethod def load(cls, build): @@ -46,6 +72,9 @@ def load(cls, build): min_frame_mass=build["min_frame_mass"], filter_by_lq=build["filter_by_lq"], max_segments=build["max_segments"], + thermal_diff_norm=build["thermal_diff_norm"], + tag_precedence=build["tag_precedence"], + excluded_tags=build["excluded_tags"], ) @classmethod @@ -59,6 +88,9 @@ def get_defaults(cls): min_frame_mass=10, filter_by_lq=False, max_segments=5, + thermal_diff_norm=True, + tag_precedence=BuildConfig.DEFAULT_GROUPS, + excluded_tags=BuildConfig.EXCLUDED_TAGS, ) def validate(self): diff --git a/src/config/config.py b/src/config/config.py index e78feb40..d99cadc9 100644 --- a/src/config/config.py +++ b/src/config/config.py @@ -5,7 +5,6 @@ import logging import yaml -from .loadconfig import LoadConfig from .trackingconfig import TrackingConfig from .trainconfig import TrainConfig from .classifyconfig import ClassifyConfig @@ -31,7 +30,7 @@ class Config(DefaultConfig): "wallaby", ] base_folder = attr.ib() - load = attr.ib() + # load = attr.ib() labels = attr.ib() build = attr.ib() tracking = attr.ib() @@ -66,7 +65,7 @@ def load_from_stream(cls, stream): return cls( base_folder=Path(base_folder), tracking=TrackingConfig.load(raw["tracking"]), - load=LoadConfig.load(raw["load"]), + # load=LoadConfig.load(raw["load"]), train=TrainConfig.load(raw["train"], base_folder), classify=ClassifyConfig.load(raw["classify"]), reprocess=raw["reprocess"], @@ -89,7 +88,7 @@ def get_defaults(cls): worker_threads=0, build=BuildConfig.get_defaults(), tracking=TrackingConfig.get_defaults(), - load=LoadConfig.get_defaults(), + # load=LoadConfig.get_defaults(), train=TrainConfig.get_defaults(), classify=ClassifyConfig.get_defaults(), debug=False, diff --git a/src/config/loadconfig.py b/src/config/loadconfig.py index bb28f7d3..4d42fba6 100644 --- a/src/config/loadconfig.py +++ b/src/config/loadconfig.py @@ -24,29 +24,7 @@ @attr.s class LoadConfig(DefaultConfig): - EXCLUDED_TAGS = ["poor tracking", "part", "untagged", "unidentified"] - - DEFAULT_GROUPS = { - 0: [ - "bird", - "false-positive", - "hedgehog", - "possum", - "rodent", - "mustelid", - "cat", - "kiwi", - "dog", - "leporidae", - "human", - "insect", - "pest", - ], - 1: ["unidentified", "other"], - 2: ["part", "bad track"], - 3: ["default"], - } - + enable_compression = attr.ib() include_filtered_channel = attr.ib() preview = attr.ib() diff --git a/src/ml_tools/dataset.py b/src/ml_tools/dataset.py index 15748b1e..dcb58fb0 100644 --- a/src/ml_tools/dataset.py +++ b/src/ml_tools/dataset.py @@ -20,7 +20,7 @@ from ml_tools import tools from track.region import Region import json -from config.loadconfig import LoadConfig +from config.buildconfig import BuildConfig from pathlib import Path @@ -64,7 +64,7 @@ def __init__( self.label_caps = {} self.use_segments = True if config: - self.tag_precedence = config.load.tag_precedence + self.tag_precedence = config.build.tag_precedence self.type = config.train.type if config.train.type == "IR": self.use_segments = False @@ -80,13 +80,13 @@ def __init__( self.banned_clips = config.build.banned_clips self.included_labels = config.labels self.segment_min_avg_mass = config.build.segment_min_avg_mass - self.excluded_tags = config.load.excluded_tags + self.excluded_tags = config.build.excluded_tags self.min_frame_mass = config.build.min_frame_mass self.filter_by_lq = config.build.filter_by_lq self.segment_type = SegmentType.ALL_RANDOM self.max_segments = config.build.max_segments else: - self.tag_precedence = LoadConfig.DEFAULT_GROUPS + self.tag_precedence = BuildConfig.DEFAULT_GROUPS self.filter_by_lq = False # number of seconds each segment should be if self.use_segments: diff --git a/src/ml_tools/thermalwriter.py b/src/ml_tools/thermalwriter.py index c6a8f40b..603698e4 100644 --- a/src/ml_tools/thermalwriter.py +++ b/src/ml_tools/thermalwriter.py @@ -274,10 +274,13 @@ def get_data(clip_samples, extra_args): ) by_frame_number = {} - thermal_max_diff = 0 + thermal_max_diff = None thermal_min_diff = None - max_diff = 0 + max_diff = None min_diff = None + + thermal_diff_norm = extra_args.get("thermal_diff_norm", False) + for f in track_frames: if f.region.blank or f.region.width <= 0 or f.region.height <= 0: continue @@ -290,16 +293,16 @@ def get_data(clip_samples, extra_args): if min_diff is None or new_min < min_diff: min_diff = new_min # min_diff = max(0, new_min) - if new_max > max_diff: + if max_diff is None or new_max > max_diff: max_diff = new_max - - diff_frame = f.thermal - frame_temp_median[f.frame_number] - new_max = np.amax(diff_frame) - new_min = np.amin(diff_frame) - if thermal_min_diff is None or new_min < thermal_min_diff: - thermal_min_diff = new_min - if new_max > thermal_max_diff: - thermal_max_diff = new_max + if thermal_diff_norm: + diff_frame = f.thermal - frame_temp_median[f.frame_number] + new_max = np.amax(diff_frame) + new_min = np.amin(diff_frame) + if thermal_min_diff is None or new_min < thermal_min_diff: + thermal_min_diff = new_min + if thermal_max_diff is None or new_max > thermal_max_diff: + thermal_max_diff = new_max # normalize by maximum difference between background and tracked region # probably only need to use difference on the frames used for this record @@ -334,8 +337,10 @@ def get_data(clip_samples, extra_args): ) frame.thermal -= temp_median - - # np.clip(frame.thermal, a_min=0, a_max=None, out=frame.thermal) + if not thermal_diff_norm: + np.clip( + frame.thermal, a_min=0, a_max=None, out=frame.thermal + ) frame.thermal, stats = imageprocessing.normalize( frame.thermal, min=thermal_min_diff, diff --git a/src/ml_tools/tools.py b/src/ml_tools/tools.py index 519c9e2f..bdfb51de 100644 --- a/src/ml_tools/tools.py +++ b/src/ml_tools/tools.py @@ -52,6 +52,9 @@ def default(self, obj): return obj.isoformat() elif isinstance(obj, Rectangle): return obj.meta_dictionary() + elif isinstance(obj, Path): + return str(obj) + # Let the base class default method raise the TypeError return json.JSONEncoder.default(self, obj) diff --git a/src/mldataset/makedataset.py b/src/mldataset/makedataset.py index 65368843..cbb7e75a 100644 --- a/src/mldataset/makedataset.py +++ b/src/mldataset/makedataset.py @@ -131,7 +131,7 @@ def process_file(self, filename, out_dir, config): clip = Clip(config.tracking["thermal"], filename) clip.load_metadata( metadata, - config.load.tag_precedence, + config.build.tag_precedence, ) with h5py.File(out_file, "w") as f: @@ -263,7 +263,7 @@ def process_file(self, filename, out_dir, config): node_attrs["id"] = track_id tags = track.get("tags", []) tag = Track.get_best_human_tag( - tags, self.config.load.tag_precedence, 0 + tags, self.config.build.tag_precedence, 0 ) master_tag = [ diff --git a/src/modelevaluate.py b/src/modelevaluate.py index 009451c8..e3ff9d79 100644 --- a/src/modelevaluate.py +++ b/src/modelevaluate.py @@ -44,7 +44,7 @@ from ml_tools.frame import Frame from ml_tools import imageprocessing import cv2 -from config.loadconfig import LoadConfig +from config.buildconfig import BuildConfig from sklearn.metrics import confusion_matrix from multiprocessing import Pool @@ -255,7 +255,6 @@ def evalute_prod_confusion(dir, confusion_file): tag.get("what") for tag in tags if tag.get("automatic") == False - # and tag.get("what", "") not in LoadConfig.EXCLUDED_TAGS ] human_tags = set(human_tags) if len(human_tags) > 1: @@ -299,7 +298,7 @@ def load_clip_data(cptv_file): # for clip in dataset.clips: reason = {} clip_db = RawDatabase(cptv_file) - clip = clip_db.get_clip_tracks(LoadConfig.DEFAULT_GROUPS) + clip = clip_db.get_clip_tracks(BuildConfig.DEFAULT_GROUPS) if clip is None: logging.warn("No clip for %s", cptv_file) return None From 1531bec6ce8604d2ba642e3579ab908740c6323c Mon Sep 17 00:00:00 2001 From: gferraro Date: Wed, 25 Sep 2024 15:09:51 +0200 Subject: [PATCH 045/117] delete load config --- src/config/loadconfig.py | 61 ---------------------------------------- 1 file changed, 61 deletions(-) delete mode 100644 src/config/loadconfig.py diff --git a/src/config/loadconfig.py b/src/config/loadconfig.py deleted file mode 100644 index 4d42fba6..00000000 --- a/src/config/loadconfig.py +++ /dev/null @@ -1,61 +0,0 @@ -""" -classifier-pipeline - this is a server side component that manipulates cptv -files and to create a classification model of animals present -Copyright (C) 2018, The Cacophony Project - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . -""" - -import attr - -from .defaultconfig import DefaultConfig - - -@attr.s -class LoadConfig(DefaultConfig): - - enable_compression = attr.ib() - include_filtered_channel = attr.ib() - preview = attr.ib() - tag_precedence = attr.ib() - cache_to_disk = attr.ib() - high_quality_optical_flow = attr.ib() - excluded_tags = attr.ib() - - @classmethod - def load(cls, config): - return cls( - enable_compression=config["enable_compression"], - include_filtered_channel=config["include_filtered_channel"], - preview=config["preview"], - tag_precedence=config["tag_precedence"], - cache_to_disk=config["cache_to_disk"], - high_quality_optical_flow=config["high_quality_optical_flow"], - excluded_tags=config["excluded_tags"], - ) - - @classmethod - def get_defaults(cls): - return cls( - enable_compression=False, - include_filtered_channel=True, - preview=None, - tag_precedence=LoadConfig.DEFAULT_GROUPS, - cache_to_disk=False, - high_quality_optical_flow=True, - excluded_tags=LoadConfig.EXCLUDED_TAGS, - ) - - def validate(self): - return True From c5f2106ba9d526e0c321baaa115bd915618cc237 Mon Sep 17 00:00:00 2001 From: gferraro Date: Wed, 25 Sep 2024 15:19:19 +0200 Subject: [PATCH 046/117] remove load --- requirements.txt | 2 +- src/config/config.py | 4 ---- src/ml_tools/kerasmodel.py | 3 ++- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/requirements.txt b/requirements.txt index af94548d..ac2d76b0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -tensorflow~=2.14.0 +tensorflow~=2.17.0 matplotlib~=3.0 pytz cptv~=1.5.4 diff --git a/src/config/config.py b/src/config/config.py index d99cadc9..78ca23be 100644 --- a/src/config/config.py +++ b/src/config/config.py @@ -30,7 +30,6 @@ class Config(DefaultConfig): "wallaby", ] base_folder = attr.ib() - # load = attr.ib() labels = attr.ib() build = attr.ib() tracking = attr.ib() @@ -65,7 +64,6 @@ def load_from_stream(cls, stream): return cls( base_folder=Path(base_folder), tracking=TrackingConfig.load(raw["tracking"]), - # load=LoadConfig.load(raw["load"]), train=TrainConfig.load(raw["train"], base_folder), classify=ClassifyConfig.load(raw["classify"]), reprocess=raw["reprocess"], @@ -88,7 +86,6 @@ def get_defaults(cls): worker_threads=0, build=BuildConfig.get_defaults(), tracking=TrackingConfig.get_defaults(), - # load=LoadConfig.get_defaults(), train=TrainConfig.get_defaults(), classify=ClassifyConfig.get_defaults(), debug=False, @@ -100,7 +97,6 @@ def validate(self): self.build.validate() for tracker in self.tracking.values(): tracker.validate() - self.load.validate() self.train.validate() self.classify.validate() return True diff --git a/src/ml_tools/kerasmodel.py b/src/ml_tools/kerasmodel.py index 955f0617..4d684489 100644 --- a/src/ml_tools/kerasmodel.py +++ b/src/ml_tools/kerasmodel.py @@ -520,7 +520,8 @@ def train_model( num_frames=self.params.square_width**2, channels=self.params.channels, ) - + if weights is not None: + self.model.load_weights(weights) if rebalance: self.class_weights = get_weighting(self.train, self.labels) logging.info( From 40d88744c663969e0308a2b12a311011950e22fa Mon Sep 17 00:00:00 2001 From: gferraro Date: Wed, 25 Sep 2024 15:19:49 +0200 Subject: [PATCH 047/117] fix base_training default --- src/ml_tools/hyperparams.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ml_tools/hyperparams.py b/src/ml_tools/hyperparams.py index 946f4454..cbe05d11 100644 --- a/src/ml_tools/hyperparams.py +++ b/src/ml_tools/hyperparams.py @@ -105,7 +105,7 @@ def label_smoothing(self): @property def base_training(self): - return self.get("base_training", False) + return self.get("base_training", True) @property def retrain_layer(self): From f38b83ac8a43f36e6266aa698698530f883c4d8b Mon Sep 17 00:00:00 2001 From: gferraro Date: Wed, 25 Sep 2024 15:22:34 +0200 Subject: [PATCH 048/117] labels --- src/ml_tools/thermaldataset.py | 32 ++------------------------------ 1 file changed, 2 insertions(+), 30 deletions(-) diff --git a/src/ml_tools/thermaldataset.py b/src/ml_tools/thermaldataset.py index 096de9ff..afbbbbe1 100644 --- a/src/ml_tools/thermaldataset.py +++ b/src/ml_tools/thermaldataset.py @@ -37,36 +37,8 @@ def get_excluded(): "bat", "mammal", "frog", - # "grey kangaroo", - # "sambar deer", - # "chicken", - "fox", - # "cow", - "wombat", - # "dog", - # "sheep", - # "cat", - # "duck", - # "pheasant", - # "pukeko", - # "brown quail", - # "black swan", - # "quail", - # "california quail", - # "sheep", - # "echidna", - # "mouse", - # "rodent", - # "possum", - # "cat", - # "dog", - # "hedgehog", - # "kiwi", - # "leporidae", - # "mustelid", - # "wallaby", - # "human", - # "vehicle", + # "fox", + # "wombat", ] From f8fffb6df495f05d1d1fa7a0ddb9118aaa7c3aed Mon Sep 17 00:00:00 2001 From: gferraro Date: Wed, 25 Sep 2024 15:23:29 +0200 Subject: [PATCH 049/117] adjusted defaults --- src/ml_tools/hyperparams.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/ml_tools/hyperparams.py b/src/ml_tools/hyperparams.py index cbe05d11..cbeab17a 100644 --- a/src/ml_tools/hyperparams.py +++ b/src/ml_tools/hyperparams.py @@ -24,9 +24,8 @@ def insert_defaults(self): self["square_width"] = self.square_width self["frame_size"] = self.frame_size self["segment_width"] = self.segment_width - self["segment_type"] = self.segment_type - self["multi_label"] = False + self["multi_label"] = True self["diff_norm"] = self.diff_norm self["smooth_predictions"] = self.smooth_predictions self["channels"] = self.channels From 7720d5918b81b97b7b6137dab50975e2b09ca578 Mon Sep 17 00:00:00 2001 From: gferraro Date: Wed, 25 Sep 2024 16:03:54 +0200 Subject: [PATCH 050/117] remove source id --- src/ml_tools/kerasmodel.py | 10 +--------- src/ml_tools/thermaldataset.py | 5 ++--- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/src/ml_tools/kerasmodel.py b/src/ml_tools/kerasmodel.py index 4d684489..ed3c39fe 100644 --- a/src/ml_tools/kerasmodel.py +++ b/src/ml_tools/kerasmodel.py @@ -803,15 +803,7 @@ def confusion_tracks(self, dataset, filename, threshold=0.8): new_smooth = pred.predictions * masses new_smooth = np.sum(new_smooth, axis=0) new_smooth /= np.sum(masses) - # logging.info( - # "Smoothing %s with masses %s", np.round(100 * pred.predictions), masses - # ) - # logging.info( - # "N smooth %s old %s new %s", - # np.round(100 * no_smoothing), - # np.round(100 * old_smoothing), - # np.round(100 * new_smooth), - # ) + for i, pred_type in enumerate([no_smoothing, old_smoothing, new_smooth]): best_pred = np.argmax(pred_type) confidence = pred_type[best_pred] diff --git a/src/ml_tools/thermaldataset.py b/src/ml_tools/thermaldataset.py index afbbbbe1..d4a7e9e7 100644 --- a/src/ml_tools/thermaldataset.py +++ b/src/ml_tools/thermaldataset.py @@ -37,6 +37,7 @@ def get_excluded(): "bat", "mammal", "frog", + "cow", # "fox", # "wombat", ] @@ -207,7 +208,6 @@ def read_tfrecord( ) if include_track: - tfrecord_format["image/source_id"] = tf.io.FixedLenFeature((), tf.string) tfrecord_format["image/track_id"] = tf.io.FixedLenFeature((), tf.int64, -1) tfrecord_format["image/avg_mass"] = tf.io.FixedLenFeature((), tf.int64, -1) if include_features or only_features: @@ -265,10 +265,9 @@ def read_tfrecord( label = tf.reduce_max(label, axis=0) if include_track: - source_id = tf.cast(example["image/source_id"], tf.string) track_id = tf.cast(example["image/track_id"], tf.int32) avg_mass = tf.cast(example["image/avg_mass"], tf.int32) - label = (label, track_id, avg_mass, source_id) + label = (label, track_id, avg_mass) if include_features or only_features: features = tf.squeeze(example["image/features"]) if only_features: From 1ac2e23f2708f35b909c1552e3098ae6b411e35e Mon Sep 17 00:00:00 2001 From: gferraro Date: Wed, 25 Sep 2024 17:26:15 +0200 Subject: [PATCH 051/117] add config to split data by location --- src/build.py | 6 ++++-- src/config/buildconfig.py | 14 ++++++++++++++ src/ml_tools/dataset.py | 32 +++++++++++++++++++++++++++++--- src/ml_tools/rawdb.py | 11 +++++++++-- src/ml_tools/rectangle.py | 4 ++++ 5 files changed, 60 insertions(+), 7 deletions(-) diff --git a/src/build.py b/src/build.py index 3105ec25..ff1edac9 100644 --- a/src/build.py +++ b/src/build.py @@ -732,10 +732,12 @@ def rough_balance(datasets): counts.sort() std_dev = np.std(counts) logging.info("Counts are %s std dev %s", counts, std_dev) - if std_dev < dev_threshold: + if std_dev < dev_threshold or len(counts) == 0: logging.info("Not balancing") continue - if len(counts) < 7: + if len(counts) <= 2: + cap_at = counts[0] + elif len(counts) < 7: cap_at = counts[-2] else: cap_at = counts[-3] diff --git a/src/config/buildconfig.py b/src/config/buildconfig.py index 045de438..31d2aa01 100644 --- a/src/config/buildconfig.py +++ b/src/config/buildconfig.py @@ -22,6 +22,7 @@ import logging from os import path from .defaultconfig import DefaultConfig +from ml_tools.rectangle import Rectangle @attr.s @@ -37,9 +38,20 @@ class BuildConfig(DefaultConfig): thermal_diff_norm = attr.ib() tag_precedence = attr.ib() excluded_tags = attr.ib() + country = attr.ib() EXCLUDED_TAGS = ["poor tracking", "part", "untagged", "unidentified"] + # country bounding boxs + COUNTRY_LOCATIONS = { + "AU": Rectangle.from_ltrb( + 113.338953078, -10.6681857235, 153.569469029, -43.6345972634 + ), + "NZ": Rectangle.from_ltrb( + 166.509144322, -34.4506617165, 178.517093541, -46.641235447 + ), + } + DEFAULT_GROUPS = { 0: [ "bird", @@ -75,6 +87,7 @@ def load(cls, build): thermal_diff_norm=build["thermal_diff_norm"], tag_precedence=build["tag_precedence"], excluded_tags=build["excluded_tags"], + country=build["country"], ) @classmethod @@ -91,6 +104,7 @@ def get_defaults(cls): thermal_diff_norm=True, tag_precedence=BuildConfig.DEFAULT_GROUPS, excluded_tags=BuildConfig.EXCLUDED_TAGS, + country="NZ", ) def validate(self): diff --git a/src/ml_tools/dataset.py b/src/ml_tools/dataset.py index dcb58fb0..290f0f1a 100644 --- a/src/ml_tools/dataset.py +++ b/src/ml_tools/dataset.py @@ -85,7 +85,9 @@ def __init__( self.filter_by_lq = config.build.filter_by_lq self.segment_type = SegmentType.ALL_RANDOM self.max_segments = config.build.max_segments + self.country = config.build.country else: + self.country = "NZ" self.tag_precedence = BuildConfig.DEFAULT_GROUPS self.filter_by_lq = False # number of seconds each segment should be @@ -98,6 +100,13 @@ def __init__( self.segment_min_avg_mass = 10 self.min_frame_mass = 16 self.segment_type = SegmentType.ALL_RANDOM + + self.country_rectangle = BuildConfig.COUNTRY_LOCATIONS.get(self.country) + logging.info( + "Filtering by country %s have boundying %s", + self.country, + self.country_rectangle, + ) self.max_frame_mass = None self.filtered_stats = { "confidence": 0, @@ -204,7 +213,12 @@ def load_clip(self, db_clip, dont_filter_segment=False): except: logging.error("Could not load %s", db_clip, exc_info=True) return 0 - if clip_header is None or filter_clip(clip_header): + if clip_header is None or filter_clip( + clip_header, + clip_header.location, + self.country_rectangle, + self.filtered_stats, + ): return 0 filtered = 0 added = 0 @@ -616,12 +630,24 @@ def filter_track(track_header, excluded_tags, filtered_stats={}): return False -def filter_clip(clip, filtered_stats={}): +def filter_clip(clip, location, location_bounds, filtered_stats=None): # remove tracks of trapped animals if (clip.events is not None and "trap" in clip.events.lower()) or ( clip.trap is not None and "trap" in clip.trap.lower() ): - self.filtered_stats["trap"] += 1 + if filtered_stats is not None: + if "trap" in filtered_stats: + filtered_stats["trap"] += 1 + else: + filtered_stats["trap"] = 1 logging.info("Filtered because in trap") return True + + if location_bounds is not None and not location_bounds.contains(*location): + if filtered_stats is not None: + if "location" in filtered_stats: + filtered_stats["location"] += 1 + else: + filtered_stats["location"] = 1 + return True return False diff --git a/src/ml_tools/rawdb.py b/src/ml_tools/rawdb.py index 29921198..2b41b30e 100644 --- a/src/ml_tools/rawdb.py +++ b/src/ml_tools/rawdb.py @@ -113,12 +113,19 @@ def get_clip_tracks(self, tag_precedence): self.crop_rectangle = Rectangle( edge_pixels, edge_pixels, resx - edge_pixels * 2, resy - edge_pixels * 2 ) - + location = metadata.get("location") + lat = None + lng = None + try: + lat = location.get("lat") + lng = location.get("lng") + except: + pass clip_header = ClipHeader( clip_id=int(metadata["id"]), station_id=metadata.get("stationId"), source_file=self.file, - location=metadata.get("location"), + location=None if lat is None or lng is None else (lng, lat), camera=metadata.get("deviceId"), rec_time=parse_date(metadata["recordingDateTime"]), frames_per_second=10 if self.file.suffix == "mp4" else 9, diff --git a/src/ml_tools/rectangle.py b/src/ml_tools/rectangle.py index 225a754f..4191654c 100644 --- a/src/ml_tools/rectangle.py +++ b/src/ml_tools/rectangle.py @@ -106,6 +106,10 @@ def enlarge(self, border, max=None): if max: self.crop(max) + def contains(self, x, y): + """Is this point contained in the rectangle""" + return self.left <= x and self.right >= x and self.top >= y and self.bottom <= y + @property def area(self): return int(self.width) * self.height From b50918eaf27787d6771cac67a0d96ed5f983f646 Mon Sep 17 00:00:00 2001 From: gferraro Date: Wed, 25 Sep 2024 17:56:27 +0200 Subject: [PATCH 052/117] up requirements --- pirequirements.txt | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pirequirements.txt b/pirequirements.txt index 62280bdf..48625de4 100644 --- a/pirequirements.txt +++ b/pirequirements.txt @@ -9,7 +9,7 @@ scipy==1.9.3 python-dateutil scikit-learn==1.1.3 tables==3.8.0 -h5py==3.8.0 +h5py==3.10.0 pyyaml==6.0 pillow==10.0.1 attrs==19.2.0 diff --git a/requirements.txt b/requirements.txt index ac2d76b0..3988be34 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,7 @@ scipy python-dateutil scikit-learn tables~=3.8.0 -h5py~=3.9.0 +h5py~=3.10.0 pyyaml>=4.2b1 pillow~=10.0.1 attrs~=19.1 From a174fc5e8d74c51b958efba9e093e7fa14f360cc Mon Sep 17 00:00:00 2001 From: gferraro Date: Thu, 26 Sep 2024 09:21:10 +0200 Subject: [PATCH 053/117] none location check --- src/ml_tools/dataset.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/ml_tools/dataset.py b/src/ml_tools/dataset.py index 290f0f1a..505dc039 100644 --- a/src/ml_tools/dataset.py +++ b/src/ml_tools/dataset.py @@ -643,7 +643,11 @@ def filter_clip(clip, location, location_bounds, filtered_stats=None): logging.info("Filtered because in trap") return True - if location_bounds is not None and not location_bounds.contains(*location): + if ( + location is not None + and location_bounds is not None + and not location_bounds.contains(*location) + ): if filtered_stats is not None: if "location" in filtered_stats: filtered_stats["location"] += 1 From d40f7e6a7cefaeaa1ba3b2e074a99b73eb990e03 Mon Sep 17 00:00:00 2001 From: gferraro Date: Sun, 29 Sep 2024 19:34:04 +0200 Subject: [PATCH 054/117] added fine tune option --- src/ml_tools/hyperparams.py | 8 ++++ src/ml_tools/kerasmodel.py | 78 ++++++++++++++++++++++++++++++++++--- src/ml_tools/tfdataset.py | 7 ++++ src/train.py | 2 + src/train/train.py | 4 +- 5 files changed, 93 insertions(+), 6 deletions(-) diff --git a/src/ml_tools/hyperparams.py b/src/ml_tools/hyperparams.py index cbeab17a..b4b57055 100644 --- a/src/ml_tools/hyperparams.py +++ b/src/ml_tools/hyperparams.py @@ -50,6 +50,14 @@ def output_dim(self): def smooth_predictions(self): return self.get("smooth_predictions", True) + @property + def excluded_labels(self): + return self.get("excluded_labels", None) + + @property + def remapped_labels(self): + return self.get("remapped_labels", None) + @property def diff_norm(self): return self.get("diff_norm", True) diff --git a/src/ml_tools/kerasmodel.py b/src/ml_tools/kerasmodel.py index ed3c39fe..937ce9c9 100644 --- a/src/ml_tools/kerasmodel.py +++ b/src/ml_tools/kerasmodel.py @@ -362,6 +362,59 @@ def build_model( ], ) + def adjust_final_layer(self): + # Adjust final layer to a new set of labels, by removing it and re adding + # new_model = tf.keras.models.Sequential(self.model.layers[:-3]) + self.model = tf.keras.Model( + inputs=self.model.input, outputs=self.model.layers[-2].output + ) + + # model = tf.keras.Model(inputs=self.model.input, outputs=x) + + activation = "softmax" + if self.params.multi_label: + activation = "sigmoid" + + retrain_from = self.params.retrain_layer + if retrain_from: + for i, layer in enumerate(self.model.layers): + if isinstance(layer, tf.keras.layers.BatchNormalization): + # apparently this shouldn't matter as we set base_training = False + layer.trainable = False + logging.info("dont train %s %s", i, layer.name) + else: + layer.trainable = i >= retrain_from + else: + self.model.trainable = self.params.base_training + + # add final layer after as always want this trainable + logging.info( + "Adding new final layer with %s activation and %s labels ", + activation, + len(self.labels), + ) + preds = tf.keras.layers.Dense( + len(self.labels), activation=activation, name="prediction" + )(self.model.output) + + self.model = tf.keras.models.Model(self.model.inputs, outputs=preds) + if self.params.multi_label: + acc = tf.metrics.binary_accuracy + else: + acc = tf.metrics.categorical_accuracy + logging.info("Using acc %s", acc) + self.model.summary() + self.model.compile( + optimizer=optimizer(self.params), + loss=loss(self.params), + metrics=[ + acc, + tf.keras.metrics.AUC(), + tf.keras.metrics.Recall(), + tf.keras.metrics.Precision(), + ], + ) + def load_model(self, model_file, training=False, weights=None): model_file = Path(model_file) super().__init__(model_file) @@ -450,14 +503,26 @@ def close(self): gc.collect() def train_model( - self, epochs, run_name, weights=None, rebalance=False, resample=False + self, + epochs, + run_name, + weights=None, + rebalance=False, + resample=False, + fine_tune=None, ): logging.info( "%s Training model for %s epochs with weights %s", run_name, epochs, weights ) - self.excluded_labels, self.remapped_labels = get_excluded( - self.data_type, self.params.multi_label - ) + + if self.params.excluded_labels is None: + self.excluded_labels, self.remapped_labels = get_excluded( + self.data_type, self.params.multi_label + ) + if self.params.remapped_labels is None: + self.remapped_labels, self.remapped_labels = get_excluded( + self.data_type, self.params.multi_label + ) train_files = self.data_dir / "train" validate_files = self.data_dir / "validation" logging.info( @@ -475,8 +540,11 @@ def train_model( self.labels.remove(l) self.log_dir = self.log_base / run_name self.log_dir.mkdir(parents=True, exist_ok=True) + if fine_tune is not None: + self.load_model(fine_tune, weights=weights) + self.adjust_final_layer() - if not self.model: + elif not self.model: self.build_model( dense_sizes=self.params.dense_sizes, retrain_from=self.params.retrain_layer, diff --git a/src/ml_tools/tfdataset.py b/src/ml_tools/tfdataset.py index 33abeb0a..bcb29027 100644 --- a/src/ml_tools/tfdataset.py +++ b/src/ml_tools/tfdataset.py @@ -94,6 +94,8 @@ def get_dataset(load_function, base_dir, labels, **args): else: logging.info("Excluding %s", excluded_labels) + + # get new labels after excluding and removing remapped labels new_labels = labels.copy() for excluded in excluded_labels: if excluded in new_labels: @@ -101,6 +103,8 @@ def get_dataset(load_function, base_dir, labels, **args): for remapped_lbl in to_remap.keys(): if remapped_lbl in new_labels: new_labels.remove(remapped_lbl) + + # initialize remapped dictionary, setting labels that have been removed to -1, these values will be filtered later for l in labels: keys.append(labels.index(l)) if l not in new_labels: @@ -110,11 +114,14 @@ def get_dataset(load_function, base_dir, labels, **args): else: remapped[l] = [l] values.append(new_labels.index(l)) + + # add the remapped labels to the correct place for k, v in to_remap.items(): if k in labels and v in labels: remapped[v].append(k) values[labels.index(k)] = new_labels.index(v) del remapped[k] + remap_lookup = tf.lookup.StaticHashTable( initializer=tf.lookup.KeyValueTensorInitializer( keys=tf.constant(keys), diff --git a/src/train.py b/src/train.py index 5091b78b..16677575 100644 --- a/src/train.py +++ b/src/train.py @@ -45,6 +45,7 @@ def load_config(): parser.add_argument("-w", "--weights", help="Fine tune using these weights") parser.add_argument("-i", "--ignore", help="Ignore clips in this file") parser.add_argument("-e", "--epochs", type=int, help="Epochs to train") + parser.add_argument("-f", "--fine_tune", help="Model to fine tune") parser.add_argument( "name", @@ -67,6 +68,7 @@ def main(): weights=args.weights, ignore=args.ignore, epochs=args.epochs, + fine_tune=args.fine_tune, ) diff --git a/src/train/train.py b/src/train/train.py index 880678b0..60af5ca8 100644 --- a/src/train/train.py +++ b/src/train/train.py @@ -28,7 +28,7 @@ def remove_fp_segments(datasets, ignore_file): print("deleting segment", segment.unique_track_id) for delete in delete_me: try: - datset.remove_track(delete.track_id) + dataset.remove_track(delete.track_id) except: pass dataset.segments.remove(delete) @@ -44,6 +44,7 @@ def train_model( do_grid_search=None, ignore=None, epochs=None, + fine_tune=None, ): init_logging() """Trains a model with the given hyper parameters.""" @@ -77,6 +78,7 @@ def train_model( weights=weights, resample=False, rebalance=False, + fine_tune=fine_tune, ) except KeyboardInterrupt: pass From 4ab73d86d4522866cd9c813d585bdff7203eebe3 Mon Sep 17 00:00:00 2001 From: gferraro Date: Sun, 29 Sep 2024 19:42:51 +0200 Subject: [PATCH 055/117] fix load --- src/ml_tools/kerasmodel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ml_tools/kerasmodel.py b/src/ml_tools/kerasmodel.py index 937ce9c9..c4c18dd2 100644 --- a/src/ml_tools/kerasmodel.py +++ b/src/ml_tools/kerasmodel.py @@ -427,7 +427,7 @@ def load_model(self, model_file, training=False, weights=None): self.model.trainable = training if weights is not None: - self.model.load_weights(weights).expect_partial() + self.model.load_weights(weights) logging.info("Loaded weight %s", weights) # print(self.model.summary()) From 552b788b8c9dc6d864a9227a1ea17bc3c157bdb4 Mon Sep 17 00:00:00 2001 From: gferraro Date: Tue, 1 Oct 2024 13:47:03 +0200 Subject: [PATCH 056/117] add date filtering --- src/ml_tools/dataset.py | 11 +++++- src/modelevaluate.py | 83 +++++++++++++++++++++-------------------- 2 files changed, 52 insertions(+), 42 deletions(-) diff --git a/src/ml_tools/dataset.py b/src/ml_tools/dataset.py index 505dc039..4c4b0b63 100644 --- a/src/ml_tools/dataset.py +++ b/src/ml_tools/dataset.py @@ -630,7 +630,7 @@ def filter_track(track_header, excluded_tags, filtered_stats={}): return False -def filter_clip(clip, location, location_bounds, filtered_stats=None): +def filter_clip(clip, location, location_bounds, filtered_stats=None, after_date=None): # remove tracks of trapped animals if (clip.events is not None and "trap" in clip.events.lower()) or ( clip.trap is not None and "trap" in clip.trap.lower() @@ -654,4 +654,13 @@ def filter_clip(clip, location, location_bounds, filtered_stats=None): else: filtered_stats["location"] = 1 return True + + if after_date is not None and clip.rec_time <= after_date: + if filtered_stats is not None: + if "date" in filtered_stats: + filtered_stats["date"] += 1 + else: + filtered_stats["date"] = 1 + return True + return False diff --git a/src/modelevaluate.py b/src/modelevaluate.py index e3ff9d79..e07ed872 100644 --- a/src/modelevaluate.py +++ b/src/modelevaluate.py @@ -163,6 +163,9 @@ def load_args(): parser.add_argument("-d", "--date", help="Use clips after this") parser.add_argument("--split-file", help="Use split for evaluation") + parser.add_argument( + "--confusion-from-meta", help="Use metadata to produce a confusion matrix" + ) parser.add_argument( "confusion", @@ -213,32 +216,14 @@ def filter_diffs(track_frames, background): return min_diff, max_diff -def evalute_prod_confusion(dir, confusion_file): +# evaluate a confusion matrix from metadata of files, already evaluated by our current model on browse + + +def metadata_confusion(dir, confusion_file): with open("label_paths.json", "r") as f: label_paths = json.load(f) label_mapping = get_mappings(label_paths) - - labels = [ - "bird", - "cat", - "deer", - "dog", - "false-positive", - "hedgehog", - "human", - "kiwi", - "leporidae", - "mustelid", - "penguin", - "possum", - "rodent", - "sheep", - "vehicle", - "wallaby", - "land-bird", - "None", - "unidentified", - ] + labels = set() y_true = [] y_pred = [] dir = Path(dir) @@ -252,9 +237,7 @@ def evalute_prod_confusion(dir, confusion_file): for track in meta_data.get("Tracks", []): tags = track.get("tags", []) human_tags = [ - tag.get("what") - for tag in tags - if tag.get("automatic") == False + tag.get("what") for tag in tags if tag.get("automatic") == False ] human_tags = set(human_tags) if len(human_tags) > 1: @@ -264,6 +247,7 @@ def evalute_prod_confusion(dir, confusion_file): continue human_tag = human_tags.pop() human_tag = label_mapping.get(human_tag, human_tag) + labels.add(human_tag) ai_tag = [ tag.get("what") for tag in tags @@ -273,9 +257,13 @@ def evalute_prod_confusion(dir, confusion_file): y_true.append(human_tag) if len(ai_tag) == 0: y_pred.append("None") + labels.add("None") else: + labels.add(ai_tag[0]) y_pred.append(ai_tag[0]) - + labels = list(labels) + labels.sort() + logging.info("Using labels %s",labels) cm = confusion_matrix(y_true, y_pred, labels=labels) # Log the confusion matrix as an image summary. figure = plot_confusion_matrix(cm, class_names=labels) @@ -287,11 +275,13 @@ def evalute_prod_confusion(dir, confusion_file): EXCLUDED_TAGS = ["poor tracking", "part", "untagged", "unidentified"] worker_model = None +after_date = None -def init_worker(model): - global worker_model +def init_worker(model, date): + global worker_model, after_date worker_model = model + after_date = date def load_clip_data(cptv_file): @@ -303,7 +293,7 @@ def load_clip_data(cptv_file): logging.warn("No clip for %s", cptv_file) return None - if filter_clip(clip, reason): + if filter_clip(clip, reason, after_date=after_date): logging.info("Filtering %s", cptv_file) return None clip.tracks = [ @@ -349,6 +339,7 @@ def evaluate_dir( split_file=None, split_dataset="test", threshold=0.5, + after_date=None, ): logging.info("Evaluating cptv files in %s with threshold %s", dir, threshold) @@ -374,7 +365,14 @@ def evaluate_dir( # files = files[:8] start = time.time() # quite faster with just one process for loading and using main process for predicting - with Pool(processes=1, initializer=init_worker, initargs=(model,)) as pool: + with Pool( + processes=1, + initializer=init_worker, + initargs=( + model, + after_date, + ), + ) as pool: for clip_data in pool.imap_unordered(load_clip_data, files): if clip_data is None: continue @@ -468,17 +466,20 @@ def main(): model = KerasModel(train_config=config.train) model.load_model(model_file, training=False, weights=weights) - if args.evaluate_dir: - evaluate_dir( - model, - Path(args.evaluate_dir), - config, - args.confusion, - args.split_file, - args.dataset, - threshold=args.threshold, - ) + if args.confusion_from_meta: + evalute_prod_confusion(Path(args.evaluate_dir), args.confusion) + else: + evaluate_dir( + model, + Path(args.evaluate_dir), + config, + args.confusion, + args.split_file, + args.dataset, + threshold=args.threshold, + after_date=args.date, + ) elif args.dataset: model_labels = model.labels.copy() model.load_training_meta(base_dir) From c52246d2bef9750d684a0e91544b5d2ce6373a29 Mon Sep 17 00:00:00 2001 From: gferraro Date: Tue, 1 Oct 2024 13:49:46 +0200 Subject: [PATCH 057/117] count action --- src/modelevaluate.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/modelevaluate.py b/src/modelevaluate.py index e07ed872..0836a206 100644 --- a/src/modelevaluate.py +++ b/src/modelevaluate.py @@ -155,6 +155,7 @@ def load_args(): parser.add_argument( "--evaluate-dir", + actoun="count", help="Evalute directory of cptv files", ) @@ -263,7 +264,7 @@ def metadata_confusion(dir, confusion_file): y_pred.append(ai_tag[0]) labels = list(labels) labels.sort() - logging.info("Using labels %s",labels) + logging.info("Using labels %s", labels) cm = confusion_matrix(y_true, y_pred, labels=labels) # Log the confusion matrix as an image summary. figure = plot_confusion_matrix(cm, class_names=labels) From a24daded5d6a235ecd6b79b7e39f5073539290a9 Mon Sep 17 00:00:00 2001 From: gferraro Date: Tue, 1 Oct 2024 14:00:15 +0200 Subject: [PATCH 058/117] adjust --- src/modelevaluate.py | 100 ++++++++++++++++++++++--------------------- 1 file changed, 52 insertions(+), 48 deletions(-) diff --git a/src/modelevaluate.py b/src/modelevaluate.py index 0836a206..f3b445b9 100644 --- a/src/modelevaluate.py +++ b/src/modelevaluate.py @@ -72,8 +72,9 @@ def model_score(cm, labels): cm = np.around(cm.astype("float") / cm.sum(axis=1)[:, np.newaxis], decimals=2) cm = np.nan_to_num(cm) - - fp_index = labels.index("false-positive") + fp_index = None + if "false-positive" in labels: + fp_index = labels.index("false-positive") none_index = None unid_index = None if "None" in labels: @@ -82,7 +83,9 @@ def model_score(cm, labels): unid_index = labels.index("unidentified") score = 0 for l_i, l in enumerate(labels): - fp_acc = cm[l_i][fp_index] + fp_acc = 0 + if fp_index is not None: + fp_acc = cm[l_i][fp_index] none_acc = 0 unid_acc = 0 accuracy = cm[l_i][l_i] @@ -155,7 +158,6 @@ def load_args(): parser.add_argument( "--evaluate-dir", - actoun="count", help="Evalute directory of cptv files", ) @@ -165,7 +167,9 @@ def load_args(): parser.add_argument("--split-file", help="Use split for evaluation") parser.add_argument( - "--confusion-from-meta", help="Use metadata to produce a confusion matrix" + "--confusion-from-meta", + action="count", + help="Use metadata to produce a confusion matrix", ) parser.add_argument( @@ -253,7 +257,7 @@ def metadata_confusion(dir, confusion_file): tag.get("what") for tag in tags if tag.get("automatic") is True - and tag.get("data", {}).get("name") == "Inc3 RF" + and tag.get("data", {}).get("name") == "Master" ] y_true.append(human_tag) if len(ai_tag) == 0: @@ -464,13 +468,13 @@ def main(): if args.weights: weights = model_file / args.weights base_dir = Path(config.base_folder) / "training-data" + if args.evaluate_dir and args.confusion_from_meta: + metadata_confusion(Path(args.evaluate_dir), args.confusion) + else: - model = KerasModel(train_config=config.train) - model.load_model(model_file, training=False, weights=weights) - if args.evaluate_dir: - if args.confusion_from_meta: - evalute_prod_confusion(Path(args.evaluate_dir), args.confusion) - else: + model = KerasModel(train_config=config.train) + model.load_model(model_file, training=False, weights=weights) + if args.evaluate_dir: evaluate_dir( model, Path(args.evaluate_dir), @@ -481,42 +485,42 @@ def main(): threshold=args.threshold, after_date=args.date, ) - elif args.dataset: - model_labels = model.labels.copy() - model.load_training_meta(base_dir) - # model.labels = model_labels - if model.params.multi_label: - model.labels.append("land-bird") - excluded, remapped = get_excluded(model.data_type) - files = base_dir / args.dataset - dataset, _, new_labels, _ = get_dataset( - files, - model.data_type, - model.labels, - model_labels=model_labels, - batch_size=64, - image_size=model.params.output_dim[:2], - preprocess_fn=model.preprocess_fn, - augment=False, - resample=False, - include_features=model.params.mvm, - one_hot=True, - deterministic=True, - shuffle=False, - excluded_labels=excluded, - remapped_labels=remapped, - multi_label=model.params.multi_label, - include_track=True, - cache=True, - channels=model.params.channels, - ) - model.labels = new_labels - logging.info( - "Dataset loaded %s, using labels %s", - args.dataset, - model.labels, - ) - model.confusion_tracks(dataset, args.confusion, threshold=args.threshold) + elif args.dataset: + model_labels = model.labels.copy() + model.load_training_meta(base_dir) + # model.labels = model_labels + if model.params.multi_label: + model.labels.append("land-bird") + excluded, remapped = get_excluded(model.data_type) + files = base_dir / args.dataset + dataset, _, new_labels, _ = get_dataset( + files, + model.data_type, + model.labels, + model_labels=model_labels, + batch_size=64, + image_size=model.params.output_dim[:2], + preprocess_fn=model.preprocess_fn, + augment=False, + resample=False, + include_features=model.params.mvm, + one_hot=True, + deterministic=True, + shuffle=False, + excluded_labels=excluded, + remapped_labels=remapped, + multi_label=model.params.multi_label, + include_track=True, + cache=True, + channels=model.params.channels, + ) + model.labels = new_labels + logging.info( + "Dataset loaded %s, using labels %s", + args.dataset, + model.labels, + ) + model.confusion_tracks(dataset, args.confusion, threshold=args.threshold) if __name__ == "__main__": From 5d0ab220cf7347c1d153f3de9e25416dbb5d79c9 Mon Sep 17 00:00:00 2001 From: gferraro Date: Tue, 1 Oct 2024 14:06:14 +0200 Subject: [PATCH 059/117] fix confusion --- src/modelevaluate.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/src/modelevaluate.py b/src/modelevaluate.py index f3b445b9..fef33862 100644 --- a/src/modelevaluate.py +++ b/src/modelevaluate.py @@ -253,19 +253,23 @@ def metadata_confusion(dir, confusion_file): human_tag = human_tags.pop() human_tag = label_mapping.get(human_tag, human_tag) labels.add(human_tag) - ai_tag = [ - tag.get("what") - for tag in tags - if tag.get("automatic") is True - and tag.get("data", {}).get("name") == "Master" - ] + ai_tags = [] + for tag in tags: + if tag.get("automatic") is True: + data = tag.get("data", {}) + if isinstance(data, str): + if data == "Master": + ai_tags.append(tag["what"]) + elif data.get("name") == "Master": + ai_tags.append(tag["what"]) + y_true.append(human_tag) - if len(ai_tag) == 0: + if len(ai_tags) == 0: y_pred.append("None") labels.add("None") else: - labels.add(ai_tag[0]) - y_pred.append(ai_tag[0]) + labels.add(ai_tags[0]) + y_pred.append(ai_tags[0]) labels = list(labels) labels.sort() logging.info("Using labels %s", labels) From 34d072aa512c501cbc07a00299c85562b566960f Mon Sep 17 00:00:00 2001 From: gferraro Date: Tue, 1 Oct 2024 14:14:03 +0200 Subject: [PATCH 060/117] add date filter --- src/modelevaluate.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/modelevaluate.py b/src/modelevaluate.py index fef33862..3f0beb0d 100644 --- a/src/modelevaluate.py +++ b/src/modelevaluate.py @@ -47,6 +47,7 @@ from config.buildconfig import BuildConfig from sklearn.metrics import confusion_matrix from multiprocessing import Pool +from dateutil.parser import parse as parse_date root_logger = logging.getLogger() @@ -224,7 +225,7 @@ def filter_diffs(track_frames, background): # evaluate a confusion matrix from metadata of files, already evaluated by our current model on browse -def metadata_confusion(dir, confusion_file): +def metadata_confusion(dir, confusion_file, after_date=None): with open("label_paths.json", "r") as f: label_paths = json.load(f) label_mapping = get_mappings(label_paths) @@ -238,7 +239,9 @@ def metadata_confusion(dir, confusion_file): with open(meta_f, "r") as t: # add in some metadata stats meta_data = json.load(t) - + rec_time = parse_date(meta_data["recordingDateTime"]) + if after_date is not None and rec_time <= after_date: + continue for track in meta_data.get("Tracks", []): tags = track.get("tags", []) human_tags = [ @@ -248,7 +251,7 @@ def metadata_confusion(dir, confusion_file): if len(human_tags) > 1: print("Conflicting tags for ", track.get("id"), cptv_file) if len(human_tags) == 0: - print("No humans in ", tags) + print("No humans in ", meta_f) continue human_tag = human_tags.pop() human_tag = label_mapping.get(human_tag, human_tag) @@ -270,8 +273,12 @@ def metadata_confusion(dir, confusion_file): else: labels.add(ai_tags[0]) y_pred.append(ai_tags[0]) + if len(labels) == 0: + logging.info("No data found") + return labels = list(labels) labels.sort() + logging.info("Using labels %s", labels) cm = confusion_matrix(y_true, y_pred, labels=labels) # Log the confusion matrix as an image summary. @@ -473,7 +480,7 @@ def main(): weights = model_file / args.weights base_dir = Path(config.base_folder) / "training-data" if args.evaluate_dir and args.confusion_from_meta: - metadata_confusion(Path(args.evaluate_dir), args.confusion) + metadata_confusion(Path(args.evaluate_dir), args.confusion, args.date) else: model = KerasModel(train_config=config.train) From 5ccea12927f18ebbe5c4f59aece9553ec02c1780 Mon Sep 17 00:00:00 2001 From: gferraro Date: Tue, 1 Oct 2024 14:57:12 +0200 Subject: [PATCH 061/117] add loading of metadata --- src/modelevaluate.py | 67 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 57 insertions(+), 10 deletions(-) diff --git a/src/modelevaluate.py b/src/modelevaluate.py index 3f0beb0d..f032f123 100644 --- a/src/modelevaluate.py +++ b/src/modelevaluate.py @@ -161,7 +161,10 @@ def load_args(): "--evaluate-dir", help="Evalute directory of cptv files", ) - + parser.add_argument( + "--model-metadata", + help="Meta data file for model, used with confusion from meta", + ) parser.add_argument("-c", "--config-file", help="Path to config file to use") parser.add_argument("-d", "--date", help="Use clips after this") @@ -225,11 +228,51 @@ def filter_diffs(track_frames, background): # evaluate a confusion matrix from metadata of files, already evaluated by our current model on browse -def metadata_confusion(dir, confusion_file, after_date=None): +def metadata_confusion(dir, confusion_file, after_date=None, model_metadata=None): with open("label_paths.json", "r") as f: label_paths = json.load(f) label_mapping = get_mappings(label_paths) - labels = set() + if model_metadata is not None and Path(model_metadata).exists(): + with open(model_metadata, "r") as t: + # add in some metadata stats + model_meta = json.load(t) + labels = model_meta.get("labels", []) + excluded_labels = model_meta.get("excluded_labels", {}) + remapped = model_meta.get("remapped_labels", {}) + remapped_labels = {} + # slightly different format than from thermaldataset + for mapped_to, mapped_labels in remapped.items(): + for mapped_label in mapped_labels: + remapped_labels[mapped_label] = mapped_to + else: + labels = [ + "bird", + "cat", + "deer", + "dog", + "falsepositive", + "hedgehog", + "human", + "kiwi", + "leporidae", + "mustelid", + "penguin", + "possum", + "rodent", + "sheep", + "vehicle", + "wallaby", + "landbird", + "None", + "unidentified", + ] + excluded_labels, remapped_labels = get_excluded("thermal") + logging.info( + "Labels are %s excluded %s remapped %s", + labels, + excluded_labels, + remapped_labels, + ) y_true = [] y_pred = [] dir = Path(dir) @@ -255,7 +298,14 @@ def metadata_confusion(dir, confusion_file, after_date=None): continue human_tag = human_tags.pop() human_tag = label_mapping.get(human_tag, human_tag) - labels.add(human_tag) + if human_tag in excluded_labels: + logging.info("Excluding %s", human_tag) + continue + if human_tag in remapped_labels: + logging.info( + "Remapping %s to %s", human_tag, remapped_labels[human_tag] + ) + human_tag = remapped_labels[human_tag] ai_tags = [] for tag in tags: if tag.get("automatic") is True: @@ -269,17 +319,12 @@ def metadata_confusion(dir, confusion_file, after_date=None): y_true.append(human_tag) if len(ai_tags) == 0: y_pred.append("None") - labels.add("None") else: - labels.add(ai_tags[0]) y_pred.append(ai_tags[0]) if len(labels) == 0: logging.info("No data found") return - labels = list(labels) - labels.sort() - logging.info("Using labels %s", labels) cm = confusion_matrix(y_true, y_pred, labels=labels) # Log the confusion matrix as an image summary. figure = plot_confusion_matrix(cm, class_names=labels) @@ -480,7 +525,9 @@ def main(): weights = model_file / args.weights base_dir = Path(config.base_folder) / "training-data" if args.evaluate_dir and args.confusion_from_meta: - metadata_confusion(Path(args.evaluate_dir), args.confusion, args.date) + metadata_confusion( + Path(args.evaluate_dir), args.confusion, args.date, args.model_metadata + ) else: model = KerasModel(train_config=config.train) From 166c25234724e18e7851463a65ebe0e5359f5b39 Mon Sep 17 00:00:00 2001 From: gferraro Date: Tue, 1 Oct 2024 14:59:18 +0200 Subject: [PATCH 062/117] adjust --- src/modelevaluate.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/modelevaluate.py b/src/modelevaluate.py index f032f123..b0d30bed 100644 --- a/src/modelevaluate.py +++ b/src/modelevaluate.py @@ -238,12 +238,7 @@ def metadata_confusion(dir, confusion_file, after_date=None, model_metadata=None model_meta = json.load(t) labels = model_meta.get("labels", []) excluded_labels = model_meta.get("excluded_labels", {}) - remapped = model_meta.get("remapped_labels", {}) - remapped_labels = {} - # slightly different format than from thermaldataset - for mapped_to, mapped_labels in remapped.items(): - for mapped_label in mapped_labels: - remapped_labels[mapped_label] = mapped_to + remapped_labels = model_meta.get("remapped_labels", {}) else: labels = [ "bird", From 68bc3b8a7560bbeb2777647eec235e3fbaf74d61 Mon Sep 17 00:00:00 2001 From: gferraro Date: Tue, 1 Oct 2024 15:00:36 +0200 Subject: [PATCH 063/117] no land bird --- src/modelevaluate.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/modelevaluate.py b/src/modelevaluate.py index b0d30bed..51e2a7b5 100644 --- a/src/modelevaluate.py +++ b/src/modelevaluate.py @@ -239,6 +239,9 @@ def metadata_confusion(dir, confusion_file, after_date=None, model_metadata=None labels = model_meta.get("labels", []) excluded_labels = model_meta.get("excluded_labels", {}) remapped_labels = model_meta.get("remapped_labels", {}) + for k, v in remapped_labels.items(): + if v == "land-bird": + remapped_labels[k] = "bird" else: labels = [ "bird", From 2d1eb3f087a432ed34b50afe90fed11d319e381f Mon Sep 17 00:00:00 2001 From: gferraro Date: Tue, 1 Oct 2024 15:05:04 +0200 Subject: [PATCH 064/117] ignore no meta --- src/modelevaluate.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/modelevaluate.py b/src/modelevaluate.py index 51e2a7b5..b6ab3ba9 100644 --- a/src/modelevaluate.py +++ b/src/modelevaluate.py @@ -276,6 +276,8 @@ def metadata_confusion(dir, confusion_file, after_date=None, model_metadata=None dir = Path(dir) for cptv_file in dir.glob(f"**/*cptv"): meta_f = cptv_file.with_suffix(".txt") + if not meta_f.exists(): + continue meta_data = None with open(meta_f, "r") as t: # add in some metadata stats From 70a4ff89846dd76a2658c10f3083dd0210267063 Mon Sep 17 00:00:00 2001 From: gferraro Date: Tue, 1 Oct 2024 15:23:12 +0200 Subject: [PATCH 065/117] add none and unid --- src/modelevaluate.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/modelevaluate.py b/src/modelevaluate.py index b6ab3ba9..0cde9f5a 100644 --- a/src/modelevaluate.py +++ b/src/modelevaluate.py @@ -242,6 +242,10 @@ def metadata_confusion(dir, confusion_file, after_date=None, model_metadata=None for k, v in remapped_labels.items(): if v == "land-bird": remapped_labels[k] = "bird" + if "None" not in labels: + labels.append("None") + if "unidentified" not in labels: + labels.append("unidentified") else: labels = [ "bird", From e6eb8b83342c73f177c9cea4275e4edee4dc0e6e Mon Sep 17 00:00:00 2001 From: gferraro Date: Tue, 1 Oct 2024 15:31:05 +0200 Subject: [PATCH 066/117] catch non existend labels --- src/modelevaluate.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/modelevaluate.py b/src/modelevaluate.py index 0cde9f5a..cf2a7682 100644 --- a/src/modelevaluate.py +++ b/src/modelevaluate.py @@ -321,13 +321,14 @@ def metadata_confusion(dir, confusion_file, after_date=None, model_metadata=None ai_tags.append(tag["what"]) y_true.append(human_tag) + if human_tag not in labels: + labels.append(human_tag) if len(ai_tags) == 0: y_pred.append("None") else: y_pred.append(ai_tags[0]) - if len(labels) == 0: - logging.info("No data found") - return + if ai_tags[0] not in labels: + labels.append(ai_tags[0]) cm = confusion_matrix(y_true, y_pred, labels=labels) # Log the confusion matrix as an image summary. From d7e3c102696ad032b1e8a0282f805426d65949f5 Mon Sep 17 00:00:00 2001 From: gferraro Date: Tue, 1 Oct 2024 15:35:32 +0200 Subject: [PATCH 067/117] use logging --- src/modelevaluate.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/modelevaluate.py b/src/modelevaluate.py index cf2a7682..ccfbb604 100644 --- a/src/modelevaluate.py +++ b/src/modelevaluate.py @@ -470,16 +470,12 @@ def evaluate_dir( predicted_tag = ",".join(predicted_labels) y_pred.append(predicted_tag) if y_pred[-1] != y_true[-1]: - print( + logging.info("%s predicted %s but should be %s with confidence %s" data[0], - "Got a prediction of", y_pred[-1], - " should be ", label, np.round(100 * prediction.class_best_score), ) - # if predicted_tag not in model.labels: - # model.labels.append(predicted_tag) model.labels.append("None") model.labels.append("unidentified") cm = confusion_matrix(y_true, y_pred, labels=model.labels) From c6c6abb84c3f69a43173fb9e368fe81826231604 Mon Sep 17 00:00:00 2001 From: gferraro Date: Tue, 1 Oct 2024 15:43:56 +0200 Subject: [PATCH 068/117] exclude unknown tag --- src/modelevaluate.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/modelevaluate.py b/src/modelevaluate.py index ccfbb604..2f9478e6 100644 --- a/src/modelevaluate.py +++ b/src/modelevaluate.py @@ -310,6 +310,9 @@ def metadata_confusion(dir, confusion_file, after_date=None, model_metadata=None "Remapping %s to %s", human_tag, remapped_labels[human_tag] ) human_tag = remapped_labels[human_tag] + if human_tag not in labels: + logging.info("Excluding %s", human_tag) + ai_tags = [] for tag in tags: if tag.get("automatic") is True: @@ -321,8 +324,6 @@ def metadata_confusion(dir, confusion_file, after_date=None, model_metadata=None ai_tags.append(tag["what"]) y_true.append(human_tag) - if human_tag not in labels: - labels.append(human_tag) if len(ai_tags) == 0: y_pred.append("None") else: From 4131892522a247c20a35585c501b79c079e72c3a Mon Sep 17 00:00:00 2001 From: gferraro Date: Tue, 1 Oct 2024 15:48:29 +0200 Subject: [PATCH 069/117] correct method --- src/modelevaluate.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/modelevaluate.py b/src/modelevaluate.py index 2f9478e6..3bfb8cd6 100644 --- a/src/modelevaluate.py +++ b/src/modelevaluate.py @@ -310,8 +310,8 @@ def metadata_confusion(dir, confusion_file, after_date=None, model_metadata=None "Remapping %s to %s", human_tag, remapped_labels[human_tag] ) human_tag = remapped_labels[human_tag] - if human_tag not in labels: - logging.info("Excluding %s", human_tag) + # if human_tag not in labels: + # logging.info("Excluding %s", human_tag) ai_tags = [] for tag in tags: @@ -324,6 +324,8 @@ def metadata_confusion(dir, confusion_file, after_date=None, model_metadata=None ai_tags.append(tag["what"]) y_true.append(human_tag) + if human_tag not in labels: + labels.append(human_tag) if len(ai_tags) == 0: y_pred.append("None") else: @@ -360,7 +362,7 @@ def load_clip_data(cptv_file): logging.warn("No clip for %s", cptv_file) return None - if filter_clip(clip, reason, after_date=after_date): + if filter_clip(clip, None,None,reason, after_date=after_date): logging.info("Filtering %s", cptv_file) return None clip.tracks = [ From f56fca7a2ef77dfa35389122db0aaaf6c591d8fe Mon Sep 17 00:00:00 2001 From: gferraro Date: Tue, 1 Oct 2024 15:49:11 +0200 Subject: [PATCH 070/117] comma --- src/modelevaluate.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/modelevaluate.py b/src/modelevaluate.py index 3bfb8cd6..5eace7f2 100644 --- a/src/modelevaluate.py +++ b/src/modelevaluate.py @@ -311,8 +311,8 @@ def metadata_confusion(dir, confusion_file, after_date=None, model_metadata=None ) human_tag = remapped_labels[human_tag] # if human_tag not in labels: - # logging.info("Excluding %s", human_tag) - + # logging.info("Excluding %s", human_tag) + ai_tags = [] for tag in tags: if tag.get("automatic") is True: @@ -362,7 +362,7 @@ def load_clip_data(cptv_file): logging.warn("No clip for %s", cptv_file) return None - if filter_clip(clip, None,None,reason, after_date=after_date): + if filter_clip(clip, None, None, reason, after_date=after_date): logging.info("Filtering %s", cptv_file) return None clip.tracks = [ @@ -473,7 +473,8 @@ def evaluate_dir( predicted_tag = ",".join(predicted_labels) y_pred.append(predicted_tag) if y_pred[-1] != y_true[-1]: - logging.info("%s predicted %s but should be %s with confidence %s" + logging.info( + "%s predicted %s but should be %s with confidence %s", data[0], y_pred[-1], label, From 2b88fe402b7c3943922abda421edf2dedd78607c Mon Sep 17 00:00:00 2001 From: gferraro Date: Tue, 1 Oct 2024 15:57:06 +0200 Subject: [PATCH 071/117] add get id --- src/ml_tools/rawdb.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/ml_tools/rawdb.py b/src/ml_tools/rawdb.py index 2b41b30e..e671462e 100644 --- a/src/ml_tools/rawdb.py +++ b/src/ml_tools/rawdb.py @@ -208,6 +208,9 @@ def get_clip_tracks(self, tag_precedence): clip_header.tracks.append(header) return clip_header + def get_id(self): + return self.meta_data_file + def get_clip_meta(self, tag_precedence): return self.get_clip_tracks(tag_precedence) # From f160436850cb4ad08021c12092583449a0e3d1f3 Mon Sep 17 00:00:00 2001 From: gferraro Date: Tue, 1 Oct 2024 15:57:35 +0200 Subject: [PATCH 072/117] remove debugg --- src/modelevaluate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/modelevaluate.py b/src/modelevaluate.py index 5eace7f2..8e2280d9 100644 --- a/src/modelevaluate.py +++ b/src/modelevaluate.py @@ -363,7 +363,7 @@ def load_clip_data(cptv_file): return None if filter_clip(clip, None, None, reason, after_date=after_date): - logging.info("Filtering %s", cptv_file) + # logging.info("Filtering %s", cptv_file) return None clip.tracks = [ track for track in clip.tracks if not filter_track(track, EXCLUDED_TAGS, reason) From ae4a81c1234ed63766750b4f9402203e7b9fcacf Mon Sep 17 00:00:00 2001 From: gferraro Date: Tue, 1 Oct 2024 16:08:55 +0200 Subject: [PATCH 073/117] catch ex --- src/modelevaluate.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/src/modelevaluate.py b/src/modelevaluate.py index 8e2280d9..858556e8 100644 --- a/src/modelevaluate.py +++ b/src/modelevaluate.py @@ -379,18 +379,22 @@ def load_clip_data(cptv_file): thermal_medians = np.uint16(thermal_medians) data = [] for track in clip.tracks: - frames, preprocessed, masses = worker_model.preprocess( - clip_db, track, frames_per_classify=25, dont_filter=True - ) - data.append( - ( - f"{track.clip_id}-{track.get_id()}", - track.label, - frames, - preprocessed, - masses, + try: + frames, preprocessed, masses = worker_model.preprocess( + clip_db, track, frames_per_classify=25, dont_filter=True ) - ) + + data.append( + ( + f"{track.clip_id}-{track.get_id()}", + track.label, + frames, + preprocessed, + masses, + ) + ) + except: + logging.error("Could not load %s", clip.clip_id, exc_info=True) return data From 23eed72933dfb42db69664cb857ea846a1298c77 Mon Sep 17 00:00:00 2001 From: gferraro Date: Tue, 1 Oct 2024 16:29:38 +0200 Subject: [PATCH 074/117] and support for therml norm diff --- src/ml_tools/hyperparams.py | 6 +++ src/ml_tools/interpreter.py | 96 ++++++++++++++++++++++++++----------- src/ml_tools/preprocess.py | 13 +++-- 3 files changed, 85 insertions(+), 30 deletions(-) diff --git a/src/ml_tools/hyperparams.py b/src/ml_tools/hyperparams.py index b4b57055..90bcc362 100644 --- a/src/ml_tools/hyperparams.py +++ b/src/ml_tools/hyperparams.py @@ -27,6 +27,8 @@ def insert_defaults(self): self["segment_type"] = self.segment_type self["multi_label"] = True self["diff_norm"] = self.diff_norm + self["thermal_diff_norm"] = self.thermal_diff_norm + self["smooth_predictions"] = self.smooth_predictions self["channels"] = self.channels @@ -58,6 +60,10 @@ def excluded_labels(self): def remapped_labels(self): return self.get("remapped_labels", None) + @property + def thermal_diff_norm(self): + return self.get("thermal_diff_norm", False) + @property def diff_norm(self): return self.get("diff_norm", True) diff --git a/src/ml_tools/interpreter.py b/src/ml_tools/interpreter.py index 2763957b..e4c0a99e 100644 --- a/src/ml_tools/interpreter.py +++ b/src/ml_tools/interpreter.py @@ -183,9 +183,12 @@ def preprocess_frames( data = [] frames_used = [] filtered_norm_limits = None - if self.params.diff_norm: + thermal_norm_limits = None + if self.params.diff_norm or self.params.thermal_diff_norm: min_diff = None max_diff = 0 + thermal_max_diff = None + thermal_min_diff = None for i, region in enumerate(reversed(track.bounds_history)): if region.blank: continue @@ -201,16 +204,30 @@ def preprocess_frames( continue f.float_arrays() - diff_frame = region.subimage(f.thermal) - region.subimage( - clip.background - ) - new_max = np.amax(diff_frame) - new_min = np.amin(diff_frame) - if min_diff is None or new_min < min_diff: - min_diff = new_min - if new_max > max_diff: - max_diff = new_max - filtered_norm_limits = (min_diff, max_diff) + + if self.params.thermal_diff_norm: + diff_frame = f.thermal - np.median(f.thermal) + new_max = np.amax(diff_frame) + new_min = np.amin(diff_frame) + if thermal_min_diff is None or new_min < thermal_min_diff: + thermal_min_diff = new_min + if thermal_max_diff is None or new_max > thermal_max_diff: + thermal_max_diff = new_max + if self.params.diff_norm: + diff_frame = region.subimage(f.thermal) - region.subimage( + clip.background + ) + new_max = np.amax(diff_frame) + new_min = np.amin(diff_frame) + if min_diff is None or new_min < min_diff: + min_diff = new_min + if new_max > max_diff: + max_diff = new_max + if self.params.thermal_diff_norm: + thermal_norm_limits = (thermal_min_diff, thermal_max_diff) + + if self.params.diff_norm: + filtered_norm_limits = (min_diff, max_diff) for i, region in enumerate(reversed(track.bounds_history)): if region.blank: continue @@ -249,6 +266,7 @@ def preprocess_frames( clip.background, clip.crop_rectangle, filtered_norm_limits=filtered_norm_limits, + thermal_norm_limits=thermal_norm_limits, ) preprocessed = preprocess_single_frame( cropped_frame, @@ -293,30 +311,52 @@ def preprocess_segments( # should really be over whole track buts let just do the indices we predict of # seems to make little different to just doing a min max normalization + thermal_norm_limits = None filtered_norm_limits = None - if self.params.diff_norm: + if self.params.diff_norm or self.params.thermal_diff_norm: min_diff = None max_diff = 0 - for frame_index in frame_indices: - region = track.bounds_history[frame_index - track.start_frame] - f = clip.get_frame(region.frame_number) - if f is None: - logging.warn("Could not get frame {}", region.frame_number) + thermal_max_diff = None + thermal_min_diff = None + for i, region in enumerate(reversed(track.bounds_history)): + if region.blank: continue + if region.width == 0 or region.height == 0: + logging.warn( + "No width or height for frame %s regoin %s", + region.frame_number, + region, + ) + continue + f = clip.get_frame(region.frame_number) if region.blank or region.width <= 0 or region.height <= 0: continue f.float_arrays() - diff_frame = region.subimage(f.thermal) - region.subimage( - clip.background - ) - new_max = np.amax(diff_frame) - new_min = np.amin(diff_frame) - if min_diff is None or new_min < min_diff: - min_diff = new_min - if new_max > max_diff: - max_diff = new_max - filtered_norm_limits = (min_diff, max_diff) + + if self.params.thermal_diff_norm: + diff_frame = f.thermal - np.median(f.thermal) + new_max = np.amax(diff_frame) + new_min = np.amin(diff_frame) + if thermal_min_diff is None or new_min < thermal_min_diff: + thermal_min_diff = new_min + if thermal_max_diff is None or new_max > thermal_max_diff: + thermal_max_diff = new_max + if self.params.diff_norm: + diff_frame = region.subimage(f.thermal) - region.subimage( + clip.background + ) + new_max = np.amax(diff_frame) + new_min = np.amin(diff_frame) + if min_diff is None or new_min < min_diff: + min_diff = new_min + if new_max > max_diff: + max_diff = new_max + if self.params.thermal_diff_norm: + thermal_norm_limits = (thermal_min_diff, thermal_max_diff) + + if self.params.diff_norm: + filtered_norm_limits = (min_diff, max_diff) for frame_index in frame_indices: region = track.bounds_history[frame_index - track.start_frame] @@ -341,6 +381,7 @@ def preprocess_segments( clip.background, clip.crop_rectangle, filtered_norm_limits=filtered_norm_limits, + thermal_norm_limits=thermal_norm_limits, ) track_data[frame.frame_number] = cropped_frame features = None @@ -365,6 +406,7 @@ def preprocess_segments( self.params.frame_size, self.params.channels, self.preprocess_fn, + sample=f"{clip.get_id()}-{track.get_id()}", ) if frames is None: logging.warn("No frames to predict on") diff --git a/src/ml_tools/preprocess.py b/src/ml_tools/preprocess.py index b3186127..89214614 100644 --- a/src/ml_tools/preprocess.py +++ b/src/ml_tools/preprocess.py @@ -61,6 +61,7 @@ def preprocess_frame( crop_rectangle=None, calculate_filtered=True, filtered_norm_limits=None, + thermal_norm_limits=None, ): median = np.median(frame.thermal) cropped_frame = frame.crop_by_region(region, only_thermal=True) @@ -79,7 +80,8 @@ def preprocess_frame( True, ) cropped_frame.thermal -= median - np.clip(cropped_frame.thermal, 0, None, out=cropped_frame.thermal) + if thermal_norm_limits is None: + np.clip(cropped_frame.thermal, 0, None, out=cropped_frame.thermal) if calculate_filtered and filtered_norm_limits is not None: cropped_frame.filtered, stats = imageprocessing.normalize( cropped_frame.filtered, @@ -88,8 +90,13 @@ def preprocess_frame( new_max=255, ) if frame.thermal is not None: + thermal_min = None + thermal_max = None + if thermal_norm_limits is not None: + thermal_min, thermal_max = thermal_norm_limits + logging.info("Using therml min max %s, %s", thermal_min, thermal_max) cropped_frame.thermal, _ = imageprocessing.normalize( - cropped_frame.thermal, new_max=255 + cropped_frame.thermal, min=thermal_min, max=thermal_max, new_max=255 ) else: cropped_frame.normalize() @@ -161,7 +168,7 @@ def preprocess_movement( # index += 1 # tools.saveclassify_image( # data, - # f"samples/{index}", + # f"samples/{sample}-{index}", # ) if preprocess_fn: From 1e67f11893a94dc6aa2cccc6863c8df1e24810ae Mon Sep 17 00:00:00 2001 From: gferraro Date: Tue, 1 Oct 2024 16:30:39 +0200 Subject: [PATCH 075/117] remove test logging --- src/ml_tools/preprocess.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/ml_tools/preprocess.py b/src/ml_tools/preprocess.py index 89214614..bcb94da3 100644 --- a/src/ml_tools/preprocess.py +++ b/src/ml_tools/preprocess.py @@ -94,7 +94,6 @@ def preprocess_frame( thermal_max = None if thermal_norm_limits is not None: thermal_min, thermal_max = thermal_norm_limits - logging.info("Using therml min max %s, %s", thermal_min, thermal_max) cropped_frame.thermal, _ = imageprocessing.normalize( cropped_frame.thermal, min=thermal_min, max=thermal_max, new_max=255 ) From c3ce590f9c51f789a4b2038e319ddb672d7fa6e7 Mon Sep 17 00:00:00 2001 From: gferraro Date: Tue, 1 Oct 2024 16:38:35 +0200 Subject: [PATCH 076/117] add smooething --- src/modelevaluate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/modelevaluate.py b/src/modelevaluate.py index 858556e8..191f5491 100644 --- a/src/modelevaluate.py +++ b/src/modelevaluate.py @@ -462,7 +462,7 @@ def evaluate_dir( # top_score = len(output) # smoothed = output # else: - # smoothed = output * output * masses + smoothed = output * output * masses prediction.classified_clip(output, output, data[2], top_score=top_score) y_true.append(label_mapping.get(label, label)) predicted_labels = [prediction.predicted_tag()] From 8f77f5a7e8ab51f6caadadeeedb67ceb6663344b Mon Sep 17 00:00:00 2001 From: gferraro Date: Tue, 1 Oct 2024 16:41:49 +0200 Subject: [PATCH 077/117] dont sq --- src/modelevaluate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/modelevaluate.py b/src/modelevaluate.py index 191f5491..286dfb04 100644 --- a/src/modelevaluate.py +++ b/src/modelevaluate.py @@ -462,7 +462,7 @@ def evaluate_dir( # top_score = len(output) # smoothed = output # else: - smoothed = output * output * masses + smoothed = output * masses prediction.classified_clip(output, output, data[2], top_score=top_score) y_true.append(label_mapping.get(label, label)) predicted_labels = [prediction.predicted_tag()] From 34f2f210a01899871f5d8cf496932e58c04d8674 Mon Sep 17 00:00:00 2001 From: gferraro Date: Wed, 2 Oct 2024 16:37:22 +0200 Subject: [PATCH 078/117] build frames dataset --- src/build.py | 6 +++--- src/config/buildconfig.py | 4 +++- src/ml_tools/dataset.py | 2 +- src/ml_tools/preprocess.py | 12 ++++++------ src/ml_tools/tools.py | 3 ++- 5 files changed, 15 insertions(+), 12 deletions(-) diff --git a/src/build.py b/src/build.py index ff1edac9..f614d764 100644 --- a/src/build.py +++ b/src/build.py @@ -736,11 +736,11 @@ def rough_balance(datasets): logging.info("Not balancing") continue if len(counts) <= 2: - cap_at = counts[0] + cap_at = counts[-1] elif len(counts) < 7: - cap_at = counts[-2] + cap_at = counts[-1] else: - cap_at = counts[-3] + cap_at = counts[-1] logging.info("Capping dataset %s at %s", dataset.name, cap_at) for lbl, count in lbl_counts.items(): if count <= cap_at: diff --git a/src/config/buildconfig.py b/src/config/buildconfig.py index 31d2aa01..c659ca75 100644 --- a/src/config/buildconfig.py +++ b/src/config/buildconfig.py @@ -39,7 +39,7 @@ class BuildConfig(DefaultConfig): tag_precedence = attr.ib() excluded_tags = attr.ib() country = attr.ib() - + use_segments = attr.ib() EXCLUDED_TAGS = ["poor tracking", "part", "untagged", "unidentified"] # country bounding boxs @@ -88,6 +88,7 @@ def load(cls, build): tag_precedence=build["tag_precedence"], excluded_tags=build["excluded_tags"], country=build["country"], + use_segments=build["use_segments"], ) @classmethod @@ -105,6 +106,7 @@ def get_defaults(cls): tag_precedence=BuildConfig.DEFAULT_GROUPS, excluded_tags=BuildConfig.EXCLUDED_TAGS, country="NZ", + use_segments=True, ) def validate(self): diff --git a/src/ml_tools/dataset.py b/src/ml_tools/dataset.py index 4c4b0b63..3e422076 100644 --- a/src/ml_tools/dataset.py +++ b/src/ml_tools/dataset.py @@ -70,7 +70,7 @@ def __init__( self.use_segments = False self.segment_length = 1 else: - self.use_segments = config.train.hyper_params.get("use_segments", True) + self.use_segments = config.build.use_segments if self.use_segments: self.segment_length = config.build.segment_length else: diff --git a/src/ml_tools/preprocess.py b/src/ml_tools/preprocess.py index bcb94da3..1600425c 100644 --- a/src/ml_tools/preprocess.py +++ b/src/ml_tools/preprocess.py @@ -163,12 +163,12 @@ def preprocess_movement( # # # # # # for testing - # global index - # index += 1 - # tools.saveclassify_image( - # data, - # f"samples/{sample}-{index}", - # ) + global index + index += 1 + tools.saveclassify_image( + data, + f"samples/{sample}-{index}", + ) if preprocess_fn: data = preprocess_fn(data) diff --git a/src/ml_tools/tools.py b/src/ml_tools/tools.py index bdfb51de..38dd9e90 100644 --- a/src/ml_tools/tools.py +++ b/src/ml_tools/tools.py @@ -193,7 +193,8 @@ def saveclassify_image(data, filename): Path(filename).parent.mkdir(parents=True, exist_ok=True) r = Image.fromarray(np.uint8(data[:, :, 0])) g = Image.fromarray(np.uint8(data[:, :, 1])) - b = Image.fromarray(np.uint8(data[:, :, 2])) + b = g + # b = Image.fromarray(np.uint8(data[:, :, 2])) concat = np.concatenate((r, g, b), axis=1) # horizontally img = Image.fromarray(np.uint8(concat)) img.save(filename + ".png") From a34b20544550da8c69b7ce04209934b077670eaa Mon Sep 17 00:00:00 2001 From: gferraro Date: Wed, 2 Oct 2024 17:25:33 +0200 Subject: [PATCH 079/117] support for frames model --- src/ml_tools/hyperparams.py | 7 +++++++ src/ml_tools/kerasmodel.py | 3 +++ src/ml_tools/thermaldataset.py | 7 +++++-- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/ml_tools/hyperparams.py b/src/ml_tools/hyperparams.py index 90bcc362..cd6ddb79 100644 --- a/src/ml_tools/hyperparams.py +++ b/src/ml_tools/hyperparams.py @@ -164,6 +164,13 @@ def square_width(self): def frame_size(self): return self.get("frame_size", 32) + def set_use_segments(self, use_segments): + self["use_segments"] = use_segments + if use_segments: + self["square_width"] = 5 + else: + self["square_width"] = 1 + # # @property # def red_type(self): diff --git a/src/ml_tools/kerasmodel.py b/src/ml_tools/kerasmodel.py index c4c18dd2..f722dcf3 100644 --- a/src/ml_tools/kerasmodel.py +++ b/src/ml_tools/kerasmodel.py @@ -81,6 +81,9 @@ def load_training_meta(self, base_dir): self.ds_by_label = meta.get("by_label", True) self.excluded_labels = meta.get("excluded_labels") self.remapped_labels = meta.get("remapped_labels") + self.params.set_use_segments( + meta.get("config").get("build", {}).get("use_segments", True) + ) def shape(self): if self.model is None: diff --git a/src/ml_tools/thermaldataset.py b/src/ml_tools/thermaldataset.py index d4a7e9e7..6604be7c 100644 --- a/src/ml_tools/thermaldataset.py +++ b/src/ml_tools/thermaldataset.py @@ -130,7 +130,9 @@ def load_dataset(filenames, remap_lookup, labels, args): extra_label_map=extra_label_map, include_track=args.get("include_track", False), num_frames=args.get("num_frames", 25), - channels=args.get("channels", [TrackChannels.thermal.name]), + channels=args.get( + "channels", [TrackChannels.thermal.name, TrackChannels.filtered.name] + ), ), num_parallel_calls=AUTOTUNE, deterministic=deterministic, @@ -183,7 +185,7 @@ def read_tfrecord( channels=[TrackChannels.thermal.name, TrackChannels.filtered.name], ): logging.info( - "Read tf record with image %s lbls %s labeld %s aug %s prepr %s only features %s one hot %s include fetures %s", + "Read tf record with image %s lbls %s labeld %s aug %s prepr %s only features %s one hot %s include fetures %s num frames %s", image_size, num_labels, labeled, @@ -192,6 +194,7 @@ def read_tfrecord( only_features, one_hot, include_features, + num_frames, ) load_images = not only_features tfrecord_format = { From 8d122397273d55d6518f3dedddb622c17b746adc Mon Sep 17 00:00:00 2001 From: gferraro Date: Thu, 3 Oct 2024 20:43:13 +0200 Subject: [PATCH 080/117] skip frames on edge --- src/build.py | 1 + src/ml_tools/datasetstructures.py | 55 ++++++++++++++++++++++--------- src/ml_tools/thermaldataset.py | 2 +- 3 files changed, 41 insertions(+), 17 deletions(-) diff --git a/src/build.py b/src/build.py index f614d764..7638f7a8 100644 --- a/src/build.py +++ b/src/build.py @@ -895,6 +895,7 @@ def main(): "tag_precedence": config.build.tag_precedence, "min_mass": master_dataset.min_frame_mass, "thermal_diff_norm": config.build.thermal_diff_norm, + "filter_by_lq": master_dataset.filter_by_lq, } ) create_tf_records( diff --git a/src/ml_tools/datasetstructures.py b/src/ml_tools/datasetstructures.py index f1840527..871b4b79 100644 --- a/src/ml_tools/datasetstructures.py +++ b/src/ml_tools/datasetstructures.py @@ -144,6 +144,7 @@ def __init__( human_tags=None, remapped_lbl=None, mega_missed_regions=None, + skip_ffc=True, ): # regions that megadetector found nothing in self.mega_missed_regions = mega_missed_regions @@ -173,12 +174,8 @@ def __init__( self.frame_crop = None self.num_frames = num_frames self.important_predicted = 0 - - mass_history = np.uint16( - [region.mass for region in self.regions_by_frame.values()] - ) mass_history = [ - region.frame_number + region.mass for region in self.regions_by_frame.values() if region.mass > 0 and ( @@ -243,29 +240,55 @@ def add_sample(self, sample): def calculate_sample_frames( self, min_mass=None, max_mass=None, ffc_frames=None, skip_last=None ): + crop_rectangle = Rectangle(2, 2, 160 - 2 * 2, 140 - 2 * 2) + + logging.debug( + "Calculating sample with min %s and max %s ffc %s and skip %s", + min_mass, + max_mass, + ffc_frames, + skip_last, + ) frame_numbers = list(self.regions_by_frame.keys()) + previous_mass = None + if skip_last is not None: skip_x = int(len(frame_numbers) * skip_last) frame_numbers = frame_numbers[:-skip_x] - frame_numbers = [ - frame - for frame in frame_numbers - if (ffc_frames is None or frame not in ffc_frames) - and ( - self.mega_missed_regions is None - or frame not in self.mega_missed_regions - ) - ] - frame_numbers.sort() + frame_numbers.sort() for frame_num in frame_numbers: region = self.regions_by_frame[frame_num] - if region.mass == 0 or region.blank: + + if ( + region.mass == 0 + or region.blank + or region.width <= 0 + or region.height <= 0 + ): + continue + if ffc_frames is not None and frame_num in ffc_frames: continue + + if ( + self.mega_missed_regions is not None + and frame_num in self.mega_missed_regions + ): + continue + if min_mass is not None and region.mass < min_mass: continue if max_mass is not None and region.mass > max_mass: continue + # dont use regions on the edge if the mass deviates too much from the last known good mass + region.set_is_along_border(crop_rectangle) + if region.is_along_border: + if previous_mass is not None: + previous_mass_thresh = previous_mass * 0.1 + if (abs(previous_mass - region.mass)) >= previous_mass_thresh: + continue + else: + previous_mass = region.mass f = FrameSample( self.clip_id, self.track_id, diff --git a/src/ml_tools/thermaldataset.py b/src/ml_tools/thermaldataset.py index 6604be7c..94f1d46c 100644 --- a/src/ml_tools/thermaldataset.py +++ b/src/ml_tools/thermaldataset.py @@ -329,7 +329,7 @@ def main(): remapped_labels=get_remapped(), excluded_labels=get_excluded(), include_track=True, - num_frames=25, + num_frames=1, ) print("Ecpoh size is", epoch_size) # print(get_distribution(resampled_ds, len(labels), extra_meta=False)) From 86fad5953015c4421e44a8077cc77a9bed025309 Mon Sep 17 00:00:00 2001 From: gferraro Date: Thu, 3 Oct 2024 21:06:12 +0200 Subject: [PATCH 081/117] tweak a few defaults and min mass filtering --- src/config/buildconfig.py | 4 ++-- src/ml_tools/datasetstructures.py | 31 ++++++++++++++++++------------- src/ml_tools/thermalwriter.py | 1 + 3 files changed, 21 insertions(+), 15 deletions(-) diff --git a/src/config/buildconfig.py b/src/config/buildconfig.py index c659ca75..52d9f085 100644 --- a/src/config/buildconfig.py +++ b/src/config/buildconfig.py @@ -101,11 +101,11 @@ def get_defaults(cls): segment_min_avg_mass=10, min_frame_mass=10, filter_by_lq=False, - max_segments=5, + max_segments=3, thermal_diff_norm=True, tag_precedence=BuildConfig.DEFAULT_GROUPS, excluded_tags=BuildConfig.EXCLUDED_TAGS, - country="NZ", + country=None, use_segments=True, ) diff --git a/src/ml_tools/datasetstructures.py b/src/ml_tools/datasetstructures.py index 871b4b79..51b26c6f 100644 --- a/src/ml_tools/datasetstructures.py +++ b/src/ml_tools/datasetstructures.py @@ -362,12 +362,14 @@ def get_segments( location=None, segment_frames=None, from_last=None, + frame_min_mass=None, ): if segment_frames is not None: raise Exception("Have not implement this path") - min_frames = segment_width - if self.label == "vehicle" or self.label == "human": - min_frames = segment_width / 4.0 + min_frames = segment_width / 4.0 + if self.label in ["stoat", "mustelid", "weasel", "ferret"]: + # try and always get one for these + min_frames = 0 # in python3.7+ can just take the values and it guarantees order it was added to dict regions = self.bounds_history @@ -390,6 +392,7 @@ def get_segments( source_file=self.source_file, dont_filter=dont_filter, skip_ffc=skip_ffc, + frame_min_mass=frame_min_mass, ) # GP could get this from the tracks when writing # but might be best to keep samples independent for ease @@ -963,11 +966,12 @@ def get_segments( source_file=None, dont_filter=False, skip_ffc=True, + frame_min_mass=None, ): if segment_type == SegmentType.ALL_RANDOM_NOMIN: segment_min_mass = None if min_frames is None: - min_frames = 25 + min_frames = segment_width / 4.0 segments = [] mass_history = np.uint16([region.mass for region in regions]) filtered_stats = {"segment_mass": 0, "too short": 0} @@ -986,6 +990,7 @@ def get_segments( and not region.blank and region.width > 0 and region.height > 0 + and ((has_no_mass or frame_min_mass is None) or region.mass >= frame_min_mass) ] if len(frame_indices) == 0: logging.warn("Nothing to load for %s - %s", clip_id, track_id) @@ -1024,9 +1029,9 @@ def get_segments( segment_min_mass, source_file=source_file, ) - # if len(frame_indices) < min_frames: - # filtered_stats["too short"] += 1 - # return segments, filtered_stats + if len(frame_indices) < min_frames: + filtered_stats["too short"] += 1 + return segments, filtered_stats frame_indices = np.array(frame_indices) segment_count = max(1, len(frame_indices) // segment_frame_spacing) segment_count = int(segment_count) @@ -1047,12 +1052,12 @@ def get_segments( # random_frames and not random_sections: np.random.shuffle(frame_indices) for i in range(segment_count): - # always get atleast one segmnet - if i > 0: - if (len(frame_indices) < segment_width and len(segments) > 1) or len( - frame_indices - ) < (segment_width / 4.0): - break + # always get atleast one segment, not doing annymore + # if i > 0: + if (len(frame_indices) < segment_width and len(segments) > 1) or len( + frame_indices + ) < segment_width / 4: + break if segment_type == SegmentType.ALL_SECTIONS: # random frames from section 2.2 * segment_width diff --git a/src/ml_tools/thermalwriter.py b/src/ml_tools/thermalwriter.py index 603698e4..4c7d0713 100644 --- a/src/ml_tools/thermalwriter.py +++ b/src/ml_tools/thermalwriter.py @@ -228,6 +228,7 @@ def get_data(clip_samples, extra_args): skip_ffc=extra_args.get("skip_ffc", True), ffc_frames=clip_meta.ffc_frames, max_segments=len(samples), + frame_min_mass=extra_args.get("min_mass"), ) else: filter_by_lq = extra_args.get("filter_by_lq", False) From a9044d6aae52cf1a02ae5cb278d302a1bf2d0bb5 Mon Sep 17 00:00:00 2001 From: gferraro Date: Thu, 3 Oct 2024 21:14:33 +0200 Subject: [PATCH 082/117] add max samples --- src/build.py | 1 + src/config/buildconfig.py | 4 ++++ src/ml_tools/dataset.py | 2 ++ src/ml_tools/datasetstructures.py | 9 ++++++++- src/ml_tools/thermalwriter.py | 1 + 5 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/build.py b/src/build.py index 7638f7a8..76ef6e09 100644 --- a/src/build.py +++ b/src/build.py @@ -896,6 +896,7 @@ def main(): "min_mass": master_dataset.min_frame_mass, "thermal_diff_norm": config.build.thermal_diff_norm, "filter_by_lq": master_dataset.filter_by_lq, + "max_frames": master_dataset.max_frames, } ) create_tf_records( diff --git a/src/config/buildconfig.py b/src/config/buildconfig.py index 52d9f085..3ff3bdb5 100644 --- a/src/config/buildconfig.py +++ b/src/config/buildconfig.py @@ -40,6 +40,8 @@ class BuildConfig(DefaultConfig): excluded_tags = attr.ib() country = attr.ib() use_segments = attr.ib() + max_frames = attr.ib() + EXCLUDED_TAGS = ["poor tracking", "part", "untagged", "unidentified"] # country bounding boxs @@ -89,6 +91,7 @@ def load(cls, build): excluded_tags=build["excluded_tags"], country=build["country"], use_segments=build["use_segments"], + max_frames=build["max_frames"], ) @classmethod @@ -107,6 +110,7 @@ def get_defaults(cls): excluded_tags=BuildConfig.EXCLUDED_TAGS, country=None, use_segments=True, + max_frames=75, ) def validate(self): diff --git a/src/ml_tools/dataset.py b/src/ml_tools/dataset.py index 3e422076..8556c5f9 100644 --- a/src/ml_tools/dataset.py +++ b/src/ml_tools/dataset.py @@ -86,6 +86,7 @@ def __init__( self.segment_type = SegmentType.ALL_RANDOM self.max_segments = config.build.max_segments self.country = config.build.country + self.max_frames = config.build.max_frames else: self.country = "NZ" self.tag_precedence = BuildConfig.DEFAULT_GROUPS @@ -100,6 +101,7 @@ def __init__( self.segment_min_avg_mass = 10 self.min_frame_mass = 16 self.segment_type = SegmentType.ALL_RANDOM + self.max_frames = 75 self.country_rectangle = BuildConfig.COUNTRY_LOCATIONS.get(self.country) logging.info( diff --git a/src/ml_tools/datasetstructures.py b/src/ml_tools/datasetstructures.py index 51b26c6f..890b0209 100644 --- a/src/ml_tools/datasetstructures.py +++ b/src/ml_tools/datasetstructures.py @@ -238,7 +238,12 @@ def add_sample(self, sample): self.samples.append(sample) def calculate_sample_frames( - self, min_mass=None, max_mass=None, ffc_frames=None, skip_last=None + self, + min_mass=None, + max_mass=None, + ffc_frames=None, + skip_last=None, + max_frames=None, ): crop_rectangle = Rectangle(2, 2, 160 - 2 * 2, 140 - 2 * 2) @@ -302,6 +307,8 @@ def calculate_sample_frames( track_median_mass=self.median_mass, ) self.samples.append(f) + if max_frames is not None and len(self.samples) > max_frames: + self.samples = np.random.choice(self.samples, max_frames, replace=False) def remove_sample(self, f): self.samples.remove(f) diff --git a/src/ml_tools/thermalwriter.py b/src/ml_tools/thermalwriter.py index 4c7d0713..67d04188 100644 --- a/src/ml_tools/thermalwriter.py +++ b/src/ml_tools/thermalwriter.py @@ -244,6 +244,7 @@ def get_data(clip_samples, extra_args): else track.upper_mass ), ffc_frames=clip_meta.ffc_frames, + max_frames =extra_args.get("max_frames") ) samples = track.samples frame_temp_median = {} From 29dc459cfd2295992b93a68c8291b1af04e9e9c7 Mon Sep 17 00:00:00 2001 From: gferraro Date: Thu, 3 Oct 2024 21:19:13 +0200 Subject: [PATCH 083/117] fix cap --- src/build.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/build.py b/src/build.py index 76ef6e09..2251974a 100644 --- a/src/build.py +++ b/src/build.py @@ -732,15 +732,15 @@ def rough_balance(datasets): counts.sort() std_dev = np.std(counts) logging.info("Counts are %s std dev %s", counts, std_dev) - if std_dev < dev_threshold or len(counts) == 0: + if std_dev < dev_threshold or len(counts) <= 1: logging.info("Not balancing") continue if len(counts) <= 2: - cap_at = counts[-1] + cap_at = counts[-2] elif len(counts) < 7: - cap_at = counts[-1] + cap_at = counts[-2] else: - cap_at = counts[-1] + cap_at = counts[-2] logging.info("Capping dataset %s at %s", dataset.name, cap_at) for lbl, count in lbl_counts.items(): if count <= cap_at: From 64e283a32f49a23458102067228a51fcfeb4492e Mon Sep 17 00:00:00 2001 From: gferraro Date: Fri, 4 Oct 2024 08:58:55 +0200 Subject: [PATCH 084/117] load fp or animal model --- src/config/buildconfig.py | 2 +- src/ml_tools/kerasmodel.py | 18 ++++++++++++------ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/src/config/buildconfig.py b/src/config/buildconfig.py index 3ff3bdb5..ae5e9baf 100644 --- a/src/config/buildconfig.py +++ b/src/config/buildconfig.py @@ -105,7 +105,7 @@ def get_defaults(cls): min_frame_mass=10, filter_by_lq=False, max_segments=3, - thermal_diff_norm=True, + thermal_diff_norm=False, tag_precedence=BuildConfig.DEFAULT_GROUPS, excluded_tags=BuildConfig.EXCLUDED_TAGS, country=None, diff --git a/src/ml_tools/kerasmodel.py b/src/ml_tools/kerasmodel.py index f722dcf3..546a83ad 100644 --- a/src/ml_tools/kerasmodel.py +++ b/src/ml_tools/kerasmodel.py @@ -66,8 +66,8 @@ def __init__(self, train_config=None, labels=None, data_dir=None): self.label_probabilities = None self.class_weights = None self.ds_by_label = True - self.excluded_labels = [] - self.remapped_labels = [] + self.excluded_labels = None + self.remapped_labels = None self.orig_labels = None def load_training_meta(self, base_dir): @@ -517,12 +517,15 @@ def train_model( logging.info( "%s Training model for %s epochs with weights %s", run_name, epochs, weights ) - - if self.params.excluded_labels is None: + if self.params.excluded_labels is not None: + self.excluded_labels = self.params.excluded_labels + else: self.excluded_labels, self.remapped_labels = get_excluded( self.data_type, self.params.multi_label ) - if self.params.remapped_labels is None: + if self.params.remapped_labels is not None: + self.remapped_labels = self.params.remapped_labels + else: self.remapped_labels, self.remapped_labels = get_excluded( self.data_type, self.params.multi_label ) @@ -531,7 +534,10 @@ def train_model( logging.info( "Excluding %s remapping %s", self.excluded_labels, self.remapped_labels ) - + for lbl in self.remapped_labels.values(): + if lbl not in self.labels: + self.labels.append(lbl) + self.labels.sort() if self.params.multi_label: self.labels.append("land-bird") self.orig_labels = self.labels.copy() From acb90bd66bfd5ba2f668a9a5f61a7d4eef3c09cd Mon Sep 17 00:00:00 2001 From: gferraro Date: Sun, 6 Oct 2024 16:37:20 +0200 Subject: [PATCH 085/117] fix variable name --- src/ml_tools/interpreter.py | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/src/ml_tools/interpreter.py b/src/ml_tools/interpreter.py index e4c0a99e..c485ead5 100644 --- a/src/ml_tools/interpreter.py +++ b/src/ml_tools/interpreter.py @@ -49,35 +49,31 @@ def get_preprocess_fn(self): else: import tensorflow as tf - if pretrained_model == "resnet": + if model_name == "resnet": return tf.keras.applications.resnet.preprocess_input - elif pretrained_model == "nasnet": + elif model_name == "nasnet": return tf.keras.applications.nasnet.preprocess_input - elif pretrained_model == "resnetv2": + elif model_name == "resnetv2": return tf.keras.applications.resnet_v2.preprocess_input - elif pretrained_model == "resnet152": + elif model_name == "resnet152": return tf.keras.applications.resnet.preprocess_input - elif pretrained_model == "vgg16": + elif model_name == "vgg16": return tf.keras.applications.vgg16.preprocess_input - elif pretrained_model == "vgg19": + elif model_name == "vgg19": return tf.keras.applications.vgg19.preprocess_input - elif pretrained_model == "mobilenet": + elif model_name == "mobilenet": return tf.keras.applications.mobilenet_v2.preprocess_input - elif pretrained_model == "densenet121": + elif model_name == "densenet121": return tf.keras.applications.densenet.preprocess_input - elif pretrained_model == "inceptionresnetv2": + elif model_name == "inceptionresnetv2": return tf.keras.applications.inception_resnet_v2.preprocess_input - logging.warn( - "pretrained model %s has no preprocessing function", pretrained_model - ) - return None - logging.info("No preprocess defined for %s", model_name) + logging.warn("pretrained model %s has no preprocessing function", model_name) return None def preprocess(self, clip, track, **args): From f254e405563bf25ee672f56b40e92f7e52e118a5 Mon Sep 17 00:00:00 2001 From: gferraro Date: Sun, 6 Oct 2024 16:49:54 +0200 Subject: [PATCH 086/117] fix variable --- src/classify/trackprediction.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/classify/trackprediction.py b/src/classify/trackprediction.py index 464569d5..482a6b5b 100644 --- a/src/classify/trackprediction.py +++ b/src/classify/trackprediction.py @@ -162,7 +162,6 @@ def classified_frames(self, frame_numbers, predictions, mass): self.class_best_score += smoothed_prediction def classified_frame(self, frame_number, predictions, mass): - self.prediction_frames.append([frame_number]) self.last_frame_classified = frame_number self.num_frames_classified += 1 self.masses.append(mass) From 7d87815b51c39408de9e499366dcb4bdfe50171b Mon Sep 17 00:00:00 2001 From: gferraro Date: Sun, 6 Oct 2024 17:01:10 +0200 Subject: [PATCH 087/117] fix confusion --- src/classify/trackprediction.py | 2 +- src/ml_tools/kerasmodel.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/classify/trackprediction.py b/src/classify/trackprediction.py index 482a6b5b..04dcf886 100644 --- a/src/classify/trackprediction.py +++ b/src/classify/trackprediction.py @@ -165,7 +165,7 @@ def classified_frame(self, frame_number, predictions, mass): self.last_frame_classified = frame_number self.num_frames_classified += 1 self.masses.append(mass) - smoothed_prediction = prediction * prediction * mass + smoothed_prediction = predictions**2 * mass prediction = Prediction( predictions, diff --git a/src/ml_tools/kerasmodel.py b/src/ml_tools/kerasmodel.py index 546a83ad..6d721c44 100644 --- a/src/ml_tools/kerasmodel.py +++ b/src/ml_tools/kerasmodel.py @@ -874,10 +874,12 @@ def confusion_tracks(self, dataset, filename, threshold=0.8): ] for y, pred in pred_per_track.values(): pred.normalize_score() - no_smoothing = np.mean(pred.predictions, axis=0) + preds = np.array([p.prediction for p in pred.predictions]) + + no_smoothing = np.mean(preds, axis=0) masses = np.array(pred.masses)[:, None] old_smoothing = pred.class_best_score - new_smooth = pred.predictions * masses + new_smooth = preds * masses new_smooth = np.sum(new_smooth, axis=0) new_smooth /= np.sum(masses) From 51ef0a39bde02a7e83fdcce46d205a219e0fffdd Mon Sep 17 00:00:00 2001 From: gferraro Date: Sun, 6 Oct 2024 17:02:23 +0200 Subject: [PATCH 088/117] add num frames --- src/modelevaluate.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/modelevaluate.py b/src/modelevaluate.py index 286dfb04..4c7b7796 100644 --- a/src/modelevaluate.py +++ b/src/modelevaluate.py @@ -580,6 +580,7 @@ def main(): include_track=True, cache=True, channels=model.params.channels, + num_frames=self.params.square_width**2, ) model.labels = new_labels logging.info( From 602d8e0064aad0b785e1a5f64e55a78f5b691fea Mon Sep 17 00:00:00 2001 From: gferraro Date: Sun, 6 Oct 2024 17:16:54 +0200 Subject: [PATCH 089/117] load params properly --- src/modelevaluate.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/modelevaluate.py b/src/modelevaluate.py index 4c7b7796..803e8a89 100644 --- a/src/modelevaluate.py +++ b/src/modelevaluate.py @@ -559,6 +559,13 @@ def main(): if model.params.multi_label: model.labels.append("land-bird") excluded, remapped = get_excluded(model.data_type) + + if model.params.excluded_labels is not None: + excluded = model.params.excluded_labels + + if model.params.remapped_labels is not None: + remapped = model.params.remapped_labels + files = base_dir / args.dataset dataset, _, new_labels, _ = get_dataset( files, @@ -580,7 +587,7 @@ def main(): include_track=True, cache=True, channels=model.params.channels, - num_frames=self.params.square_width**2, + num_frames=model.params.square_width**2, ) model.labels = new_labels logging.info( From 4ecf68451d109137a1f65cbe97003defc32d6e11 Mon Sep 17 00:00:00 2001 From: gferraro Date: Sun, 6 Oct 2024 20:21:23 +0200 Subject: [PATCH 090/117] set shuffle based on number of frames --- src/ml_tools/tfdataset.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/ml_tools/tfdataset.py b/src/ml_tools/tfdataset.py index bcb29027..8bb2c4de 100644 --- a/src/ml_tools/tfdataset.py +++ b/src/ml_tools/tfdataset.py @@ -68,10 +68,13 @@ def get_dataset(load_function, base_dir, labels, **args): remapped = {} keys = [] values = [] + shuffle_size = 4096 + if args.get("num_frames", 25) == 1: + shuffle_size *= 25 if model_labels is not None: new_labels = model_labels - logging.info("Mapping DS labels to model labels ") + logging.info("Mapping DS labels %s to model labels %s", labels, model_labels) # if we are loading a model with different labels we need to map the dataset labels # to the equivalent model labels for l_i, og_lbl in enumerate(labels): @@ -80,7 +83,6 @@ def get_dataset(load_function, base_dir, labels, **args): lbl = og_lbl if lbl in to_remap: lbl = to_remap[lbl] - l_i = labels.index(lbl) mdl_i = model_labels.index(lbl) if lbl not in remapped: @@ -171,7 +173,9 @@ def get_dataset(load_function, base_dir, labels, **args): l_filter = lambda x, y: tf.math.reduce_all(tf.math.equal(y, l_mask)) l_dataset = dataset.filter(l_filter) - l_dataset = l_dataset.shuffle(40096, reshuffle_each_iteration=True) + l_dataset = l_dataset.shuffle( + shuffle_size * 10, reshuffle_each_iteration=True + ) label_ds.append(l_dataset) dataset = tf.data.Dataset.sample_from_datasets( @@ -190,9 +194,9 @@ def get_dataset(load_function, base_dir, labels, **args): and args.get("shuffle", True) and not args.get("resample") ): - logging.info("shuffling data") + logging.info("shuffling data with buffer %s", shuffle_size) dataset = dataset.shuffle( - 4096, reshuffle_each_iteration=args.get("reshuffle", True) + shuffle_size, reshuffle_each_iteration=args.get("reshuffle", True) ) # tf refues to run if epoch sizes change so we must decide a costant epoch size even though with reject res # it will chang eeach epoch, to ensure this take this repeat data and always take epoch_size elements From 5b7732c1ebbb3b5d2d7634ac56dbc5dddf7bf987 Mon Sep 17 00:00:00 2001 From: gferraro Date: Tue, 8 Oct 2024 16:45:44 +0200 Subject: [PATCH 091/117] do not sort --- src/ml_tools/kerasmodel.py | 2 +- src/ml_tools/preprocess.py | 19 +++++++++++++------ 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/src/ml_tools/kerasmodel.py b/src/ml_tools/kerasmodel.py index 6d721c44..d07ed2f2 100644 --- a/src/ml_tools/kerasmodel.py +++ b/src/ml_tools/kerasmodel.py @@ -537,7 +537,7 @@ def train_model( for lbl in self.remapped_labels.values(): if lbl not in self.labels: self.labels.append(lbl) - self.labels.sort() + if self.params.multi_label: self.labels.append("land-bird") self.orig_labels = self.labels.copy() diff --git a/src/ml_tools/preprocess.py b/src/ml_tools/preprocess.py index 1600425c..fe02199a 100644 --- a/src/ml_tools/preprocess.py +++ b/src/ml_tools/preprocess.py @@ -121,6 +121,13 @@ def preprocess_single_frame( data, axis=2, ) + # global index + # index += 1 + # tools.saveclassify_image( + # image, + # f"samples/{save_info}-{index}", + # ) + if preprocess_fn: image = preprocess_fn(image) return image @@ -163,12 +170,12 @@ def preprocess_movement( # # # # # # for testing - global index - index += 1 - tools.saveclassify_image( - data, - f"samples/{sample}-{index}", - ) + # global index + # index += 1 + # tools.saveclassify_image( + # data, + # f"samples/{sample}-{index}", + # ) if preprocess_fn: data = preprocess_fn(data) From 708afadd255f31587a7c764bad9ee19fe5ff51aa Mon Sep 17 00:00:00 2001 From: gferraro Date: Tue, 8 Oct 2024 17:06:33 +0200 Subject: [PATCH 092/117] dont resample evenly some labels --- src/ml_tools/tfdataset.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/src/ml_tools/tfdataset.py b/src/ml_tools/tfdataset.py index 8bb2c4de..cadbc359 100644 --- a/src/ml_tools/tfdataset.py +++ b/src/ml_tools/tfdataset.py @@ -70,7 +70,7 @@ def get_dataset(load_function, base_dir, labels, **args): values = [] shuffle_size = 4096 if args.get("num_frames", 25) == 1: - shuffle_size *= 25 + shuffle_size *= 20 if model_labels is not None: new_labels = model_labels @@ -166,6 +166,8 @@ def get_dataset(load_function, base_dir, labels, **args): logging.info("RESAMPLING") # seems the only way to get even distribution label_ds = [] + unbalanced_ds = [] + dont_balance = ["vehicle"] for i, l in enumerate(new_labels): l_mask = np.zeros((len(new_labels))) l_mask[i] = 1 @@ -173,17 +175,24 @@ def get_dataset(load_function, base_dir, labels, **args): l_filter = lambda x, y: tf.math.reduce_all(tf.math.equal(y, l_mask)) l_dataset = dataset.filter(l_filter) - l_dataset = l_dataset.shuffle( - shuffle_size * 10, reshuffle_each_iteration=True - ) - - label_ds.append(l_dataset) + l_dataset = l_dataset.shuffle(shuffle_size, reshuffle_each_iteration=True) + if l in dont_balance: + unbalanced_ds.append(l_dataset) + else: + label_ds.append(l_dataset) dataset = tf.data.Dataset.sample_from_datasets( label_ds, # weights=[1 / len(new_labels)] * len(new_labels), stop_on_empty_dataset=True, rerandomize_each_iteration=True, ) + dont_balance.append(dataset) + dataset = tf.data.Dataset.sample_from_datasets( + dont_balance, + # weights=[1 / len(new_labels)] * len(new_labels), + stop_on_empty_dataset=False, + rerandomize_each_iteration=True, + ) if args.get("epoch_size") is not None: dataset = dataset.take(args.get("epoch_size")) logging.info("Setting dataset to %s", args.get("epoch_size")) From f81ee53da9817a35516cb2bdda2cd594e14484a2 Mon Sep 17 00:00:00 2001 From: gferraro Date: Thu, 10 Oct 2024 18:47:50 +0200 Subject: [PATCH 093/117] add fp_frames --- src/classify/trackprediction.py | 3 +++ src/ml_tools/datasetstructures.py | 2 ++ src/ml_tools/interpreter.py | 3 ++- src/ml_tools/rawdb.py | 1 + 4 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/classify/trackprediction.py b/src/classify/trackprediction.py index 04dcf886..94afe43b 100644 --- a/src/classify/trackprediction.py +++ b/src/classify/trackprediction.py @@ -79,6 +79,9 @@ def clarity(self): best = np.argsort(self.prediction) return self.prediction[best[-1]] - self.prediction[best[-2]] + def __str__(self): + return f"{self.frames} conf: {np.round(100*self.prediction)}" + class TrackPrediction: """ diff --git a/src/ml_tools/datasetstructures.py b/src/ml_tools/datasetstructures.py index 890b0209..76f24e34 100644 --- a/src/ml_tools/datasetstructures.py +++ b/src/ml_tools/datasetstructures.py @@ -145,7 +145,9 @@ def __init__( remapped_lbl=None, mega_missed_regions=None, skip_ffc=True, + fp_frames=None, ): + self.fp_frames = fp_frames # regions that megadetector found nothing in self.mega_missed_regions = mega_missed_regions self.station_id = station_id diff --git a/src/ml_tools/interpreter.py b/src/ml_tools/interpreter.py index c485ead5..aae59708 100644 --- a/src/ml_tools/interpreter.py +++ b/src/ml_tools/interpreter.py @@ -18,7 +18,7 @@ def load_json(self, filename): filename = filename.with_suffix(".json") logging.info("Loading metadata from %s", filename) metadata = json.load(open(filename, "r")) - + self.version = metadata.get("version", None) self.labels = metadata["labels"] self.params = HyperParams() self.params.update(metadata.get("hyperparams", {})) @@ -224,6 +224,7 @@ def preprocess_frames( if self.params.diff_norm: filtered_norm_limits = (min_diff, max_diff) + for i, region in enumerate(reversed(track.bounds_history)): if region.blank: continue diff --git a/src/ml_tools/rawdb.py b/src/ml_tools/rawdb.py index e671462e..4fd53754 100644 --- a/src/ml_tools/rawdb.py +++ b/src/ml_tools/rawdb.py @@ -203,6 +203,7 @@ def get_clip_tracks(self, tag_precedence): source_file=self.file, mega_missed_regions=track_meta.get("mega_missed_regions"), station_id=clip_header.station_id, + fp_frames=track_meta.get("fp_model_predictions"), # frame_temp_median=frame_temp_median, ) clip_header.tracks.append(header) From 10762a07100c8a2a45bf993ecf3bb4f586d2ac1d Mon Sep 17 00:00:00 2001 From: gferraro Date: Mon, 14 Oct 2024 18:04:44 +0200 Subject: [PATCH 094/117] read fp model predictions --- src/build.py | 2 ++ src/ml_tools/datasetstructures.py | 11 +++++++++++ src/ml_tools/rawdb.py | 17 ++++++++++++++++- src/ml_tools/thermalwriter.py | 3 ++- 4 files changed, 31 insertions(+), 2 deletions(-) diff --git a/src/build.py b/src/build.py index 2251974a..c8c51cb8 100644 --- a/src/build.py +++ b/src/build.py @@ -899,6 +899,8 @@ def main(): "max_frames": master_dataset.max_frames, } ) + # dont filter the test set, + extra_args["filter_by_fp"] = dataset.name != "test" create_tf_records( dataset, dir, diff --git a/src/ml_tools/datasetstructures.py b/src/ml_tools/datasetstructures.py index 76f24e34..1b1a90fa 100644 --- a/src/ml_tools/datasetstructures.py +++ b/src/ml_tools/datasetstructures.py @@ -17,6 +17,9 @@ FRAME_SIZE = 32 MIN_SIZE = 4 +# hard coded for now +FP_LABELS = ["other", "unidentified", "rain", "false-positive", "water", "insect"] + class SegmentType(Enum): IMPORTANT_RANDOM = 0 @@ -147,7 +150,9 @@ def __init__( skip_ffc=True, fp_frames=None, ): + self.fp_frames = fp_frames + # regions that megadetector found nothing in self.mega_missed_regions = mega_missed_regions self.station_id = station_id @@ -372,6 +377,7 @@ def get_segments( segment_frames=None, from_last=None, frame_min_mass=None, + filter_by_fp=True, ): if segment_frames is not None: raise Exception("Have not implement this path") @@ -402,6 +408,7 @@ def get_segments( dont_filter=dont_filter, skip_ffc=skip_ffc, frame_min_mass=frame_min_mass, + fp_frames=self.fp_frames if filter_by_fp else None, ) # GP could get this from the tracks when writing # but might be best to keep samples independent for ease @@ -976,6 +983,7 @@ def get_segments( dont_filter=False, skip_ffc=True, frame_min_mass=None, + fp_frames=None, ): if segment_type == SegmentType.ALL_RANDOM_NOMIN: segment_min_mass = None @@ -1001,6 +1009,9 @@ def get_segments( and region.height > 0 and ((has_no_mass or frame_min_mass is None) or region.mass >= frame_min_mass) ] + if fp_frames is not None and label not in FP_LABELS: + frame_indices = [f for f in frame_indices if f not in fp_frames] + logging.info("FIltering with fp frames %s", fp_frames) if len(frame_indices) == 0: logging.warn("Nothing to load for %s - %s", clip_id, track_id) return [], filtered_stats diff --git a/src/ml_tools/rawdb.py b/src/ml_tools/rawdb.py index 4fd53754..4b6a93cc 100644 --- a/src/ml_tools/rawdb.py +++ b/src/ml_tools/rawdb.py @@ -135,6 +135,10 @@ def get_clip_tracks(self, tag_precedence): ffc_frames=self.ffc_frames, ) tracks = metadata.get("Tracks", []) + fp_labels = metadata.get("fp_model_labels") + fp_index = None + if fp_labels is not None: + fp_index = fp_labels.index("false-positive") meta = [] for track_meta in tracks: tags = track_meta.get("tags", []) @@ -191,6 +195,17 @@ def get_clip_tracks(self, tag_precedence): if start is None: start = region.frame_number end = region.frame_number + + fp_meta = track_meta.get("fp_model_predictions") + fp_frames = None + if fp_meta is not None: + fp_frames = [] + for pred in fp_meta.get("predictions", []): + scores = pred["prediction"] + best_arg = np.argmax(scores) + confidence = scores[best_arg] + if best_arg == fp_index and confidence > 75: + fp_frames.append(pred["frames"][0]) header = TrackHeader( clip_id=clip_header.clip_id, track_id=int(track_meta["id"]), @@ -203,7 +218,7 @@ def get_clip_tracks(self, tag_precedence): source_file=self.file, mega_missed_regions=track_meta.get("mega_missed_regions"), station_id=clip_header.station_id, - fp_frames=track_meta.get("fp_model_predictions"), + fp_frames=fp_frames, # frame_temp_median=frame_temp_median, ) clip_header.tracks.append(header) diff --git a/src/ml_tools/thermalwriter.py b/src/ml_tools/thermalwriter.py index 67d04188..fafbc505 100644 --- a/src/ml_tools/thermalwriter.py +++ b/src/ml_tools/thermalwriter.py @@ -229,6 +229,7 @@ def get_data(clip_samples, extra_args): ffc_frames=clip_meta.ffc_frames, max_segments=len(samples), frame_min_mass=extra_args.get("min_mass"), + filter_by_fp=extra_args.get("filter_by_fp"), ) else: filter_by_lq = extra_args.get("filter_by_lq", False) @@ -244,7 +245,7 @@ def get_data(clip_samples, extra_args): else track.upper_mass ), ffc_frames=clip_meta.ffc_frames, - max_frames =extra_args.get("max_frames") + max_frames=extra_args.get("max_frames"), ) samples = track.samples frame_temp_median = {} From 9c654521ef351cfd6ac7d2ab3a534a21049d755c Mon Sep 17 00:00:00 2001 From: gferraro Date: Tue, 15 Oct 2024 21:20:12 +0200 Subject: [PATCH 095/117] check for int --- src/ml_tools/rawdb.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/ml_tools/rawdb.py b/src/ml_tools/rawdb.py index 4b6a93cc..4a76a725 100644 --- a/src/ml_tools/rawdb.py +++ b/src/ml_tools/rawdb.py @@ -205,7 +205,11 @@ def get_clip_tracks(self, tag_precedence): best_arg = np.argmax(scores) confidence = scores[best_arg] if best_arg == fp_index and confidence > 75: - fp_frames.append(pred["frames"][0]) + frame_i = pred["frames"] + if isinstance(frame_i, int): + fp_frames.append(frame_i) + else: + fp_frames.append(frame_i[0]) header = TrackHeader( clip_id=clip_header.clip_id, track_id=int(track_meta["id"]), From 1ec5202d20d11dacf324cc6c921fc5b28b868f14 Mon Sep 17 00:00:00 2001 From: gferraro Date: Tue, 15 Oct 2024 21:21:02 +0200 Subject: [PATCH 096/117] remove log --- src/ml_tools/datasetstructures.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/ml_tools/datasetstructures.py b/src/ml_tools/datasetstructures.py index 1b1a90fa..2de71838 100644 --- a/src/ml_tools/datasetstructures.py +++ b/src/ml_tools/datasetstructures.py @@ -1011,7 +1011,6 @@ def get_segments( ] if fp_frames is not None and label not in FP_LABELS: frame_indices = [f for f in frame_indices if f not in fp_frames] - logging.info("FIltering with fp frames %s", fp_frames) if len(frame_indices) == 0: logging.warn("Nothing to load for %s - %s", clip_id, track_id) return [], filtered_stats From 39754a41f3a838acd226c0392022fce07d479f17 Mon Sep 17 00:00:00 2001 From: gferraro Date: Wed, 16 Oct 2024 09:17:56 +0200 Subject: [PATCH 097/117] add country code into tf records --- src/ml_tools/datasetstructures.py | 1 + src/ml_tools/rawdb.py | 10 ++++++++++ src/ml_tools/thermalwriter.py | 13 +++++++++---- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/src/ml_tools/datasetstructures.py b/src/ml_tools/datasetstructures.py index 2de71838..bc42aec0 100644 --- a/src/ml_tools/datasetstructures.py +++ b/src/ml_tools/datasetstructures.py @@ -117,6 +117,7 @@ class ClipHeader: trap = attr.ib() tracks = attr.ib() ffc_frames = attr.ib() + country_code = attr.ib() frame_temp_median = attr.ib(default=None) def get_samples(self): diff --git a/src/ml_tools/rawdb.py b/src/ml_tools/rawdb.py index 4a76a725..f3e5557c 100644 --- a/src/ml_tools/rawdb.py +++ b/src/ml_tools/rawdb.py @@ -22,6 +22,7 @@ from track.cliptrackextractor import is_affected_by_ffc from cptv_rs_python_bindings import CptvReader from ml_tools.rectangle import Rectangle +from config.buildconfig import BuildConfig special_datasets = [ "tag_frames", @@ -116,11 +117,19 @@ def get_clip_tracks(self, tag_precedence): location = metadata.get("location") lat = None lng = None + country_code = None try: lat = location.get("lat") lng = location.get("lng") + if lat is not None and lng is not None: + for country, location in BuildConfig.COUNTRY_LOCATIONS.items(): + if location.contains(lng, lat): + country_code = country + break except: + logging.error("Could not parse lat lng", exc_info=True) pass + clip_header = ClipHeader( clip_id=int(metadata["id"]), station_id=metadata.get("stationId"), @@ -133,6 +142,7 @@ def get_clip_tracks(self, tag_precedence): trap=metadata.get("trap", ""), tracks=[], ffc_frames=self.ffc_frames, + country_code=country_code, ) tracks = metadata.get("Tracks", []) fp_labels = metadata.get("fp_model_labels") diff --git a/src/ml_tools/thermalwriter.py b/src/ml_tools/thermalwriter.py index fafbc505..7a123460 100644 --- a/src/ml_tools/thermalwriter.py +++ b/src/ml_tools/thermalwriter.py @@ -55,7 +55,7 @@ from functools import lru_cache -def create_tf_example(sample, data, features, labels, num_frames): +def create_tf_example(sample, data, features, labels, num_frames, country_code): """Converts image and annotations to a tf.Example proto. Args: @@ -128,6 +128,9 @@ def create_tf_example(sample, data, features, labels, num_frames): "image/format": tfrecord_util.bytes_feature("jpeg".encode("utf8")), "image/class/text": tfrecord_util.bytes_feature(sample.label.encode("utf8")), "image/class/label": tfrecord_util.int64_feature(labels.index(sample.label)), + "image/country_id": tfrecord_util.bytes_feature( + str(country_code).encode("utf8") + ), } example = tf.train.Example(features=tf.train.Features(feature=feature_dict)) @@ -157,9 +160,11 @@ def save_data(samples, writer, labels, extra_args): return 0 saved = 0 try: - for data in sample_data: + country_code = sample_data[1] + sample_data = sample_data[0] + for sample, images, features in sample_data: tf_example = create_tf_example( - data[0], data[1], data[2], labels, extra_args["num_frames"] + sample, images, features, labels, extra_args["num_frames"], country_code ) writer.write(tf_example.SerializeToString()) saved += 1 @@ -372,4 +377,4 @@ def get_data(clip_samples, extra_args): "Cant get Samples for %s", clip_samples[0].source_file, exc_info=True ) return None - return data + return (data, clip_meta.country_code) From 7550768f76249a41c3cac39a487f55337c757b6d Mon Sep 17 00:00:00 2001 From: gferraro Date: Thu, 17 Oct 2024 18:18:11 +0200 Subject: [PATCH 098/117] remove some --- src/ml_tools/thermaldataset.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/ml_tools/thermaldataset.py b/src/ml_tools/thermaldataset.py index 94f1d46c..bd4da773 100644 --- a/src/ml_tools/thermaldataset.py +++ b/src/ml_tools/thermaldataset.py @@ -38,7 +38,13 @@ def get_excluded(): "mammal", "frog", "cow", - # "fox", + # added gp forretrain + "wombat", + "gray kangaroo", + "echidna", + "fox", + "deer", + "sheep", # "wombat", ] From c008df55b24a81fa66a6c9b8754c41360c786760 Mon Sep 17 00:00:00 2001 From: gferraro Date: Thu, 17 Oct 2024 18:20:02 +0200 Subject: [PATCH 099/117] dont filte rby fp --- src/ml_tools/interpreter.py | 1 + src/modelevaluate.py | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/ml_tools/interpreter.py b/src/ml_tools/interpreter.py index aae59708..12a5c57b 100644 --- a/src/ml_tools/interpreter.py +++ b/src/ml_tools/interpreter.py @@ -299,6 +299,7 @@ def preprocess_segments( from_last=predict_from_last, max_segments=max_segments, dont_filter=dont_filter, + filter_by_fp = False, ) frame_indices = set() for segment in segments: diff --git a/src/modelevaluate.py b/src/modelevaluate.py index 803e8a89..7c11df91 100644 --- a/src/modelevaluate.py +++ b/src/modelevaluate.py @@ -463,7 +463,9 @@ def evaluate_dir( # smoothed = output # else: smoothed = output * masses - prediction.classified_clip(output, output, data[2], top_score=top_score) + prediction.classified_clip( + output, smoothed, data[2], top_score=top_score + ) y_true.append(label_mapping.get(label, label)) predicted_labels = [prediction.predicted_tag()] confidence = prediction.max_score From 88aa20572e94dd7410b323a83608a6095bdccdff Mon Sep 17 00:00:00 2001 From: gferraro Date: Thu, 17 Oct 2024 18:26:42 +0200 Subject: [PATCH 100/117] fix excluded --- src/ml_tools/tfdataset.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/ml_tools/tfdataset.py b/src/ml_tools/tfdataset.py index cadbc359..f0299f79 100644 --- a/src/ml_tools/tfdataset.py +++ b/src/ml_tools/tfdataset.py @@ -110,7 +110,7 @@ def get_dataset(load_function, base_dir, labels, **args): for l in labels: keys.append(labels.index(l)) if l not in new_labels: - remapped[l] = -1 + remapped[l] = [-1] values.append(-1) logging.info("Excluding %s", l) else: @@ -119,7 +119,9 @@ def get_dataset(load_function, base_dir, labels, **args): # add the remapped labels to the correct place for k, v in to_remap.items(): - if k in labels and v in labels: + if k in excluded_labels: + continue + if k in labels and v in new_labels and k in new_labels: remapped[v].append(k) values[labels.index(k)] = new_labels.index(v) del remapped[k] From a8717ec7b199eaeeee0e8594af562636682c5919 Mon Sep 17 00:00:00 2001 From: gferraro Date: Fri, 18 Oct 2024 09:48:44 +0200 Subject: [PATCH 101/117] fix fine tune --- src/ml_tools/kerasmodel.py | 50 ++++++++++++++++++-------------------- src/ml_tools/tfdataset.py | 4 +-- 2 files changed, 26 insertions(+), 28 deletions(-) diff --git a/src/ml_tools/kerasmodel.py b/src/ml_tools/kerasmodel.py index d07ed2f2..c385b3f3 100644 --- a/src/ml_tools/kerasmodel.py +++ b/src/ml_tools/kerasmodel.py @@ -534,34 +534,12 @@ def train_model( logging.info( "Excluding %s remapping %s", self.excluded_labels, self.remapped_labels ) - for lbl in self.remapped_labels.values(): - if lbl not in self.labels: - self.labels.append(lbl) - if self.params.multi_label: + if self.params.multi_label and "land-bird" not in self.labels: self.labels.append("land-bird") self.orig_labels = self.labels.copy() - for l in self.excluded_labels: - if l in self.labels: - self.labels.remove(l) - for l in self.remapped_labels.keys(): - if l in self.labels: - self.labels.remove(l) - self.log_dir = self.log_base / run_name - self.log_dir.mkdir(parents=True, exist_ok=True) - if fine_tune is not None: - self.load_model(fine_tune, weights=weights) - self.adjust_final_layer() - - elif not self.model: - self.build_model( - dense_sizes=self.params.dense_sizes, - retrain_from=self.params.retrain_layer, - dropout=self.params.dropout, - run_name=run_name, - ) - self.model.summary() + self.preprocess_fn = self.get_preprocess_fn() self.train, remapped, new_labels, epoch_size = get_dataset( train_files, self.data_type, @@ -580,6 +558,28 @@ def train_model( num_frames=self.params.square_width**2, channels=self.params.channels, ) + self.labels = new_labels + + self.log_dir = self.log_base / run_name + self.log_dir.mkdir(parents=True, exist_ok=True) + if fine_tune is not None: + self.load_model(fine_tune, weights=weights) + self.adjust_final_layer() + else: + + if not self.model: + self.build_model( + dense_sizes=self.params.dense_sizes, + retrain_from=self.params.retrain_layer, + dropout=self.params.dropout, + run_name=run_name, + ) + + if weights is not None: + self.model.load_weights(weights) + + self.model.summary() + self.remapped = remapped self.validate, remapped, _, _ = get_dataset( validate_files, @@ -597,8 +597,6 @@ def train_model( num_frames=self.params.square_width**2, channels=self.params.channels, ) - if weights is not None: - self.model.load_weights(weights) if rebalance: self.class_weights = get_weighting(self.train, self.labels) logging.info( diff --git a/src/ml_tools/tfdataset.py b/src/ml_tools/tfdataset.py index f0299f79..4914ea5b 100644 --- a/src/ml_tools/tfdataset.py +++ b/src/ml_tools/tfdataset.py @@ -121,7 +121,7 @@ def get_dataset(load_function, base_dir, labels, **args): for k, v in to_remap.items(): if k in excluded_labels: continue - if k in labels and v in new_labels and k in new_labels: + if k in labels and v in new_labels: remapped[v].append(k) values[labels.index(k)] = new_labels.index(v) del remapped[k] @@ -135,7 +135,7 @@ def get_dataset(load_function, base_dir, labels, **args): name="remapped_y", ) num_labels = len(new_labels) - logging.info("New labels are %s", new_labels) + logging.info("New labels are %s from original %s", new_labels, labels) for k, v in zip(keys, values): logging.info( "Mapping %s to %s", labels[k], new_labels[v] if v >= 0 else "nothing" From 84a5da6be7b0467784eabcd6d2dd986634d7c819 Mon Sep 17 00:00:00 2001 From: gferraro Date: Mon, 21 Oct 2024 09:16:58 +0200 Subject: [PATCH 102/117] add parsing --- src/ml_tools/kerasmodel.py | 5 ++++- src/ml_tools/rawdb.py | 23 ++++++++++++----------- src/modelevaluate.py | 6 ++++++ 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/src/ml_tools/kerasmodel.py b/src/ml_tools/kerasmodel.py index c385b3f3..4e5acf8a 100644 --- a/src/ml_tools/kerasmodel.py +++ b/src/ml_tools/kerasmodel.py @@ -563,7 +563,10 @@ def train_model( self.log_dir = self.log_base / run_name self.log_dir.mkdir(parents=True, exist_ok=True) if fine_tune is not None: - self.load_model(fine_tune, weights=weights) + self.load_model(fine_tune, weights=weights, training=True) + # load model loads old labels + self.labels = new_labels + self.adjust_final_layer() else: diff --git a/src/ml_tools/rawdb.py b/src/ml_tools/rawdb.py index f3e5557c..00c14593 100644 --- a/src/ml_tools/rawdb.py +++ b/src/ml_tools/rawdb.py @@ -118,17 +118,18 @@ def get_clip_tracks(self, tag_precedence): lat = None lng = None country_code = None - try: - lat = location.get("lat") - lng = location.get("lng") - if lat is not None and lng is not None: - for country, location in BuildConfig.COUNTRY_LOCATIONS.items(): - if location.contains(lng, lat): - country_code = country - break - except: - logging.error("Could not parse lat lng", exc_info=True) - pass + if location is not None: + try: + lat = location.get("lat") + lng = location.get("lng") + if lat is not None and lng is not None: + for country, location in BuildConfig.COUNTRY_LOCATIONS.items(): + if location.contains(lng, lat): + country_code = country + break + except: + logging.error("Could not parse lat lng", exc_info=True) + pass clip_header = ClipHeader( clip_id=int(metadata["id"]), diff --git a/src/modelevaluate.py b/src/modelevaluate.py index 7c11df91..94ae85e9 100644 --- a/src/modelevaluate.py +++ b/src/modelevaluate.py @@ -452,7 +452,13 @@ def evaluate_dir( for data in clip_data: label = data[1] preprocessed = data[3] + if len(preprocessed) == 0: + logging.info("No data found for %s", data[0]) + y_true.append(label_mapping.get(label, label)) + y_pred.append("None") + continue output = model.predict(preprocessed) + prediction = TrackPrediction(data[0], model.labels) masses = np.array(data[4]) masses = masses[:, None] From 4def7a8e9aa56daaef1f0ea6533a507493ab55d8 Mon Sep 17 00:00:00 2001 From: gferraro Date: Fri, 25 Oct 2024 13:49:47 +0200 Subject: [PATCH 103/117] try limit memory --- src/ml_tools/tfwriter.py | 92 +++++++++++++++++++++++----------------- 1 file changed, 52 insertions(+), 40 deletions(-) diff --git a/src/ml_tools/tfwriter.py b/src/ml_tools/tfwriter.py index 519f4ffb..9b51b29a 100644 --- a/src/ml_tools/tfwriter.py +++ b/src/ml_tools/tfwriter.py @@ -39,12 +39,12 @@ import math -def process_job(queue, labels, base_dir, save_data, extra_args): +def process_job(queue, labels, base_dir, save_data, writer_i, extra_args): import gc pid = os.getpid() - writer_i = 1 + # writer_i = 1 name = f"{writer_i}-{pid}.tfrecord" options = tf.io.TFRecordOptions(compression_type="GZIP") @@ -66,14 +66,14 @@ def process_job(queue, labels, base_dir, save_data, extra_args): saved += save_data(samples, writer, labels, extra_args) files += 1 del samples - if saved > 250000 / num_frames: - logging.info("Closing old writer") - writer.close() - writer_i += 1 - name = f"{writer_i}-{pid}.tfrecord" - logging.info("Opening %s", name) - saved = 0 - writer = tf.io.TFRecordWriter(str(base_dir / name), options=options) + # if saved > 250000 / num_frames: + # logging.info("Closing old writer") + # writer.close() + # writer_i += 1 + # name = f"{writer_i}-{pid}.tfrecord" + # logging.info("Opening %s", name) + # saved = 0 + # writer = tf.io.TFRecordWriter(str(base_dir / name), options=options) if i % int(25000 / num_frames) == 0: logging.info("Saved %s ", files) gc.collect() @@ -106,37 +106,49 @@ def create_tf_records( "writing to output path: %s for %s samples", output_path, len(samples_by_source) ) num_processes = 8 + writer_i = 0 + index = 0 + jobs_per_process = 300 * num_processes try: - job_queue = Queue() - processes = [] - for i in range(num_processes): - p = Process( - target=process_job, - args=(job_queue, labels, output_path, save_data, extra_args), - ) - processes.append(p) - p.start() - added = 0 - for source_file in source_files: - job_queue.put((samples_by_source[source_file])) - added += 1 - while job_queue.qsize() > num_processes * 10: - logging.info("Sleeping for %s", 10) - # give it a change to catch up - time.sleep(10) - - logging.info("Processing %d", job_queue.qsize()) - for i in range(len(processes)): - job_queue.put(("DONE")) - for process in processes: - try: - process.join() - except KeyboardInterrupt: - logging.info("KeyboardInterrupt, terminating.") - for process in processes: - process.terminate() - exit() - logging.info("Saved %s", len(dataset.samples_by_id)) + while index < len(source_files): + job_queue = Queue() + processes = [] + for i in range(num_processes): + p = Process( + target=process_job, + args=( + job_queue, + labels, + output_path, + save_data, + writer_i, + extra_args, + ), + ) + processes.append(p) + p.start() + added = 0 + writer_i += 1 + for source_file in source_files[index : index + jobs_per_process]: + job_queue.put((samples_by_source[source_file])) + added += 1 + while job_queue.qsize() > num_processes * 10: + logging.info("Sleeping for %s", 10) + # give it a change to catch up + time.sleep(10) + index += jobs_per_process + logging.info("Processing %d", job_queue.qsize()) + for i in range(len(processes)): + job_queue.put(("DONE")) + for process in processes: + try: + process.join() + except KeyboardInterrupt: + logging.info("KeyboardInterrupt, terminating.") + for process in processes: + process.terminate() + exit() + logging.info("Saved %s", len(dataset.samples_by_id)) except: logging.error("Error saving track info", exc_info=True) From 50faca5b73aa579094c88c7159f76ef6dd884e88 Mon Sep 17 00:00:00 2001 From: gferraro Date: Mon, 28 Oct 2024 14:39:18 +0100 Subject: [PATCH 104/117] dont validate bins for after date test clips --- src/build.py | 13 +++++++++++-- src/ml_tools/tfwriter.py | 4 ++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/src/build.py b/src/build.py index c8c51cb8..8a53f26a 100644 --- a/src/build.py +++ b/src/build.py @@ -571,7 +571,7 @@ def add_samples( dataset.add_samples(samples) -def validate_datasets(datasets, test_bins, date): +def validate_datasets(datasets, test_bins, after_date): # check that clips are only in one dataset # that only test set has clips after date # that test set is the only dataset with test_clips @@ -580,7 +580,7 @@ def validate_datasets(datasets, test_bins, date): # for track in dataset.tracks: # assert track.start_time < date - for i, dataset in enumerate(datasets): + for i, dataset in enumerate(datasets[:2]): dont_check = set( [ sample.bin_id @@ -608,6 +608,15 @@ def validate_datasets(datasets, test_bins, date): if sample.label in split_by_clip ] ) + if dataset.name == "test" and after_date is not None: + dont_check_other = set( + [ + sample.bin_id + for sample in other.samples_by_id.values() + if sample.rec_time > after_date + ] + ) + dont_check = dont_check + dont_check_other other_bins = set([sample.bin_id for sample in other.samples_by_id.values()]) other_bins = other_bins - dont_check other_clips = set( diff --git a/src/ml_tools/tfwriter.py b/src/ml_tools/tfwriter.py index 9b51b29a..8658fb18 100644 --- a/src/ml_tools/tfwriter.py +++ b/src/ml_tools/tfwriter.py @@ -46,7 +46,7 @@ def process_job(queue, labels, base_dir, save_data, writer_i, extra_args): # writer_i = 1 name = f"{writer_i}-{pid}.tfrecord" - + logging.info("Writing to %s", name) options = tf.io.TFRecordOptions(compression_type="GZIP") writer = tf.io.TFRecordWriter(str(base_dir / name), options=options) i = 0 @@ -108,7 +108,7 @@ def create_tf_records( num_processes = 8 writer_i = 0 index = 0 - jobs_per_process = 300 * num_processes + jobs_per_process = 3000 * num_processes try: while index < len(source_files): job_queue = Queue() From 37c68e1291c953f7d85abd17c2578d9795078c5f Mon Sep 17 00:00:00 2001 From: gferraro Date: Mon, 28 Oct 2024 14:50:21 +0100 Subject: [PATCH 105/117] added start time --- src/build.py | 2 +- src/ml_tools/datasetstructures.py | 5 +++-- src/ml_tools/rawdb.py | 2 ++ 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/build.py b/src/build.py index 8a53f26a..b61ea270 100644 --- a/src/build.py +++ b/src/build.py @@ -608,7 +608,7 @@ def validate_datasets(datasets, test_bins, after_date): if sample.label in split_by_clip ] ) - if dataset.name == "test" and after_date is not None: + if other.name == "test" and after_date is not None: dont_check_other = set( [ sample.bin_id diff --git a/src/ml_tools/datasetstructures.py b/src/ml_tools/datasetstructures.py index bc42aec0..731e65a2 100644 --- a/src/ml_tools/datasetstructures.py +++ b/src/ml_tools/datasetstructures.py @@ -141,7 +141,7 @@ def __init__( ffc_frames=None, sample_frames_indices=None, station_id=None, - rec_time=None, + start_time=None, source_file=None, camera=None, confidence=None, @@ -153,7 +153,7 @@ def __init__( ): self.fp_frames = fp_frames - + self.start_time = start_time # regions that megadetector found nothing in self.mega_missed_regions = mega_missed_regions self.station_id = station_id @@ -410,6 +410,7 @@ def get_segments( skip_ffc=skip_ffc, frame_min_mass=frame_min_mass, fp_frames=self.fp_frames if filter_by_fp else None, + rec_time=self.start_time, ) # GP could get this from the tracks when writing # but might be best to keep samples independent for ease diff --git a/src/ml_tools/rawdb.py b/src/ml_tools/rawdb.py index 00c14593..e99200f6 100644 --- a/src/ml_tools/rawdb.py +++ b/src/ml_tools/rawdb.py @@ -23,6 +23,7 @@ from cptv_rs_python_bindings import CptvReader from ml_tools.rectangle import Rectangle from config.buildconfig import BuildConfig +from datetime import timedelta special_datasets = [ "tag_frames", @@ -234,6 +235,7 @@ def get_clip_tracks(self, tag_precedence): mega_missed_regions=track_meta.get("mega_missed_regions"), station_id=clip_header.station_id, fp_frames=fp_frames, + start_time=clip_header.rec_time + timedelta(seconds=start / FPS), # frame_temp_median=frame_temp_median, ) clip_header.tracks.append(header) From 600af583dd4ec0b294506d7e8f1e25e156608458 Mon Sep 17 00:00:00 2001 From: gferraro Date: Tue, 29 Oct 2024 08:36:30 +0100 Subject: [PATCH 106/117] union set --- src/build.py | 2 +- src/ml_tools/tfwriter.py | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/src/build.py b/src/build.py index b61ea270..ef416f2c 100644 --- a/src/build.py +++ b/src/build.py @@ -616,7 +616,7 @@ def validate_datasets(datasets, test_bins, after_date): if sample.rec_time > after_date ] ) - dont_check = dont_check + dont_check_other + dont_check = dont_check | dont_check_other other_bins = set([sample.bin_id for sample in other.samples_by_id.values()]) other_bins = other_bins - dont_check other_clips = set( diff --git a/src/ml_tools/tfwriter.py b/src/ml_tools/tfwriter.py index 8658fb18..18f628e1 100644 --- a/src/ml_tools/tfwriter.py +++ b/src/ml_tools/tfwriter.py @@ -132,10 +132,7 @@ def create_tf_records( for source_file in source_files[index : index + jobs_per_process]: job_queue.put((samples_by_source[source_file])) added += 1 - while job_queue.qsize() > num_processes * 10: - logging.info("Sleeping for %s", 10) - # give it a change to catch up - time.sleep(10) + index += jobs_per_process logging.info("Processing %d", job_queue.qsize()) for i in range(len(processes)): From 43180b7de38b65066db3636c0d5f4068a7de0083 Mon Sep 17 00:00:00 2001 From: gferraro Date: Tue, 5 Nov 2024 20:25:35 +0100 Subject: [PATCH 107/117] repeat frames at random rather than only last frame --- src/classify/clipclassifier.py | 12 +++++++++- src/classify/trackprediction.py | 13 ++++++---- src/ml_tools/datasetstructures.py | 5 ++-- src/ml_tools/imageprocessing.py | 12 ++-------- src/ml_tools/interpreter.py | 5 +++- src/ml_tools/preprocess.py | 8 +++++++ src/ml_tools/previewer.py | 8 +++---- src/track/clip.py | 1 - src/track/track.py | 40 ++++++++++++++++++------------- 9 files changed, 64 insertions(+), 40 deletions(-) diff --git a/src/classify/clipclassifier.py b/src/classify/clipclassifier.py index 9bcaa135..9e7dd279 100644 --- a/src/classify/clipclassifier.py +++ b/src/classify/clipclassifier.py @@ -245,7 +245,17 @@ def save_metadata( prediction = predictions.prediction_for(track.get_id()) if prediction is None: continue - + # DEBUGGING STUFF REMOVE ME + # logging.info("Track predictions %s", track) + # for p in prediction.predictions: + # logging.info( + # "Have %s sum %s smoothed %s mass %s", + # p, + # np.sum(p.prediction), + # np.round(p.smoothed_prediction), + # p.mass, + # ) + # logging.info("smoothed %s", np.round(100 * prediction.class_best_score)) prediction_meta = prediction.get_metadata() prediction_meta["model_id"] = model_id prediction_info.append(prediction_meta) diff --git a/src/classify/trackprediction.py b/src/classify/trackprediction.py index 94afe43b..a9af5056 100644 --- a/src/classify/trackprediction.py +++ b/src/classify/trackprediction.py @@ -110,18 +110,23 @@ def __init__(self, track_id, labels, keep_all=True, start_frame=None): self.masses = [] def classified_clip( - self, predictions, smoothed_predictions, prediction_frames, top_score=None + self, + predictions, + smoothed_predictions, + prediction_frames, + masses, + top_score=None, ): self.num_frames_classified = len(predictions) - for prediction, smoothed_prediction, frames in zip( - predictions, smoothed_predictions, prediction_frames + for prediction, smoothed_prediction, frames, mass in zip( + predictions, smoothed_predictions, prediction_frames, masses ): prediction = Prediction( prediction, smoothed_prediction, frames, np.amax(frames), - None, + mass, ) self.predictions.append(prediction) diff --git a/src/ml_tools/datasetstructures.py b/src/ml_tools/datasetstructures.py index 731e65a2..6a7fbd61 100644 --- a/src/ml_tools/datasetstructures.py +++ b/src/ml_tools/datasetstructures.py @@ -1058,6 +1058,7 @@ def get_segments( segment_count = int(segment_count) if max_segments is not None: segment_count = min(max_segments, segment_count) + # take any segment_width frames, this could be done each epoch whole_indices = frame_indices random_frames = segment_type in [ @@ -1074,8 +1075,7 @@ def get_segments( np.random.shuffle(frame_indices) for i in range(segment_count): # always get atleast one segment, not doing annymore - # if i > 0: - if (len(frame_indices) < segment_width and len(segments) > 1) or len( + if (len(frame_indices) < segment_width / 2.0 and len(segments) > 1) or len( frame_indices ) < segment_width / 4: break @@ -1089,6 +1089,7 @@ def get_segments( replace=False, ) frames = section[indices] + # might need to change that gp 11/05 - 2024 frame_indices = frame_indices[segment_frame_spacing:] elif random_frames: # frame indices already randomized so just need to grab some diff --git a/src/ml_tools/imageprocessing.py b/src/ml_tools/imageprocessing.py index 4eeebcac..42312dee 100644 --- a/src/ml_tools/imageprocessing.py +++ b/src/ml_tools/imageprocessing.py @@ -34,8 +34,6 @@ def resize_and_pad( resize_dim = (width, height) if pad is None: pad = np.min(frame) - else: - pad = 0 resized = np.full(new_dim, pad, dtype=frame.dtype) offset_x = 0 @@ -80,20 +78,14 @@ def resize_cv(image, dim, interpolation=cv2.INTER_LINEAR, extra_h=0, extra_v=0): ) -def square_clip(data, frames_per_row, tile_dim, normalize=True): +def square_clip(data, frames_per_row, tile_dim, frame_samples, normalize=True): # lay each frame out side by side in rows new_frame = np.zeros((frames_per_row * tile_dim[0], frames_per_row * tile_dim[1])) i = 0 success = False for x in range(frames_per_row): for y in range(frames_per_row): - if i >= len(data): - frame = data[-1] - else: - frame = data[i] - - # cv2.imshow("frame", np.uint8(frame)) - # cv2.waitKey(0) + frame = data[frame_samples[i]] if normalize: frame, stats = normalize(frame, new_max=255) if not stats[0]: diff --git a/src/ml_tools/interpreter.py b/src/ml_tools/interpreter.py index 12a5c57b..2b299181 100644 --- a/src/ml_tools/interpreter.py +++ b/src/ml_tools/interpreter.py @@ -146,6 +146,7 @@ def classify_track(self, clip, track, segment_frames=None): # self.model.predict(preprocessed) top_score = None smoothed_predictions = None + if self.params.smooth_predictions: masses = np.array(masses) top_score = np.sum(masses) @@ -155,6 +156,7 @@ def classify_track(self, clip, track, segment_frames=None): output, smoothed_predictions, prediction_frames, + masses, top_score=top_score, ) track_prediction.classify_time = time.time() - start @@ -213,6 +215,7 @@ def preprocess_frames( diff_frame = region.subimage(f.thermal) - region.subimage( clip.background ) + new_max = np.amax(diff_frame) new_min = np.amin(diff_frame) if min_diff is None or new_min < min_diff: @@ -299,7 +302,7 @@ def preprocess_segments( from_last=predict_from_last, max_segments=max_segments, dont_filter=dont_filter, - filter_by_fp = False, + filter_by_fp=False, ) frame_indices = set() for segment in segments: diff --git a/src/ml_tools/preprocess.py b/src/ml_tools/preprocess.py index fe02199a..9ab61c8f 100644 --- a/src/ml_tools/preprocess.py +++ b/src/ml_tools/preprocess.py @@ -147,6 +147,13 @@ def preprocess_movement( ): frame_types = {} data = [] + frame_samples = list(np.arange(len(preprocess_frames))) + if len(preprocess_frames) < frames_per_row * 5: + extra_samples = np.random.choice( + frame_samples, frames_per_row * 5 - len(preprocess_frames) + ) + frame_samples.extend(extra_samples) + frame_samples.sort() for channel in channels: if isinstance(channel, str): channel = TrackChannels[channel] @@ -158,6 +165,7 @@ def preprocess_movement( channel_segment, frames_per_row, (frame_size, frame_size), + frame_samples, normalize=False, ) # already done normalization diff --git a/src/ml_tools/previewer.py b/src/ml_tools/previewer.py index ddd203ea..34255047 100644 --- a/src/ml_tools/previewer.py +++ b/src/ml_tools/previewer.py @@ -91,8 +91,8 @@ def export_clip_preview(self, filename, clip: Clip, predictions=None): if self.debug: footer = Previewer.stats_footer(clip.stats) if predictions and ( - self.preview_type == self.PREVIEW_CLASSIFIED - or self.preview_type == self.PREVIEW_TRACKING + self.preview_type == PREVIEW_CLASSIFIED + or self.preview_type == PREVIEW_TRACKING ): self.create_track_descriptions(clip, predictions) @@ -103,14 +103,14 @@ def export_clip_preview(self, filename, clip: Clip, predictions=None): res_x = clip.res_x res_y = clip.res_y - if self.preview_type == self.PREVIEW_TRACKING: + if self.preview_type == PREVIEW_TRACKING: res_x *= 2 res_y *= 2 mpeg = MPEGCreator(str(filename)) frame_scale = 4 for frame_number, frame in enumerate(clip.frame_buffer): - if self.preview_type == self.PREVIEW_RAW: + if self.preview_type == PREVIEW_RAW: image = self.convert_and_resize( frame.thermal, clip.stats.min_temp, clip.stats.max_temp, clip.type ) diff --git a/src/track/clip.py b/src/track/clip.py index e21ccbc8..72cc489d 100644 --- a/src/track/clip.py +++ b/src/track/clip.py @@ -185,7 +185,6 @@ def calculate_background(self, frame_reader): self.update_background(frame.pix) self._background_calculated() return - first_frame = frame initial_frames = None initial_diff = None diff --git a/src/track/track.py b/src/track/track.py index 5550391c..165ee39d 100644 --- a/src/track/track.py +++ b/src/track/track.py @@ -444,6 +444,7 @@ def get_segments( max_segments=None, ffc_frames=None, dont_filter=False, + filter_by_fp=False, ): if from_last is not None: if from_last == 0: @@ -476,23 +477,28 @@ def get_segments( ) segments.append(segment) else: - segments, _ = get_segments( - self.clip_id, - self._id, - start_frame, - segment_frame_spacing=segment_frame_spacing, - segment_width=segment_width, - regions=regions, - ffc_frames=ffc_frames, - repeats=repeats, - # frame_temp_median=frame_temp_median, - min_frames=min_frames, - segment_frames=None, - segment_type=segment_type, - max_segments=max_segments, - dont_filter=dont_filter, - ) - return segments + all_segments = [] + for seg_type in [SegmentType.ALL_RANDOM, SegmentType.ALL_SECTIONS]: + segments, _ = get_segments( + self.clip_id, + self._id, + start_frame, + segment_frame_spacing=segment_frame_spacing, + segment_width=segment_width, + regions=regions, + ffc_frames=ffc_frames, + repeats=repeats, + # frame_temp_median=frame_temp_median, + min_frames=min_frames, + segment_frames=None, + segment_type=seg_type, + max_segments=max_segments, + dont_filter=dont_filter, + # segment_type=seg_type, + ) + all_segments.extend(segments) + + return all_segments @classmethod def from_region(cls, clip, region, tracker_version=None, tracking_config=None): From 7ca8817b43f7fd67aa3569e61441d451b855a831 Mon Sep 17 00:00:00 2001 From: gferraro Date: Tue, 5 Nov 2024 20:26:58 +0100 Subject: [PATCH 108/117] less jobs --- src/ml_tools/tfwriter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ml_tools/tfwriter.py b/src/ml_tools/tfwriter.py index 18f628e1..d40cf8ac 100644 --- a/src/ml_tools/tfwriter.py +++ b/src/ml_tools/tfwriter.py @@ -108,7 +108,7 @@ def create_tf_records( num_processes = 8 writer_i = 0 index = 0 - jobs_per_process = 3000 * num_processes + jobs_per_process = 600 * num_processes try: while index < len(source_files): job_queue = Queue() From 46b431b641a6e746abb835c3874a119b0071ef14 Mon Sep 17 00:00:00 2001 From: gferraro Date: Thu, 7 Nov 2024 18:10:18 +0100 Subject: [PATCH 109/117] add multiple segment type option --- src/build.py | 3 +- src/ml_tools/dataset.py | 46 +--- src/ml_tools/datasetstructures.py | 343 ++++++++++++++++-------------- src/ml_tools/hyperparams.py | 16 +- src/ml_tools/interpreter.py | 3 +- src/ml_tools/kerasmodel.py | 1 - src/ml_tools/tfwriter.py | 31 +-- src/ml_tools/thermalwriter.py | 2 +- src/ml_tools/tools.py | 4 +- src/modelevaluate.py | 8 +- src/track/track.py | 38 ++-- 11 files changed, 226 insertions(+), 269 deletions(-) diff --git a/src/build.py b/src/build.py index ef416f2c..3c766af5 100644 --- a/src/build.py +++ b/src/build.py @@ -896,7 +896,7 @@ def main(): { "segment_frame_spacing": master_dataset.segment_spacing * 9, "segment_width": master_dataset.segment_length, - "segment_type": master_dataset.segment_type, + "segment_types": master_dataset.segment_types, "segment_min_avg_mass": master_dataset.segment_min_avg_mass, "max_segments": master_dataset.max_segments, "dont_filter_segment": True, @@ -932,6 +932,7 @@ def main(): "counts": dataset_counts, "by_label": False, "config": attrs.asdict(config), + "segment_types": master_dataset.segment_types, } with open(meta_filename, "w") as f: diff --git a/src/ml_tools/dataset.py b/src/ml_tools/dataset.py index 8556c5f9..7e633b32 100644 --- a/src/ml_tools/dataset.py +++ b/src/ml_tools/dataset.py @@ -83,7 +83,7 @@ def __init__( self.excluded_tags = config.build.excluded_tags self.min_frame_mass = config.build.min_frame_mass self.filter_by_lq = config.build.filter_by_lq - self.segment_type = SegmentType.ALL_RANDOM + self.segment_types = [SegmentType.ALL_RANDOM] self.max_segments = config.build.max_segments self.country = config.build.country self.max_frames = config.build.max_frames @@ -100,7 +100,7 @@ def __init__( self.segment_spacing = 1 self.segment_min_avg_mass = 10 self.min_frame_mass = 16 - self.segment_type = SegmentType.ALL_RANDOM + self.segment_types = [SegmentType.ALL_RANDOM] self.max_frames = 75 self.country_rectangle = BuildConfig.COUNTRY_LOCATIONS.get(self.country) @@ -244,7 +244,7 @@ def load_clip(self, db_clip, dont_filter_segment=False): track_header.get_segments( segment_width, segment_frame_spacing, - self.segment_type, + self.segment_types, self.segment_min_avg_mass, max_segments=self.max_segments, dont_filter=dont_filter_segment, @@ -504,46 +504,6 @@ def regroup( def has_data(self): return len(self.samples_by_id) > 0 - # - # def recalculate_segments(self, segment_type=SegmentType.ALL_RANDOM): - # self.samples_by_bin.clear() - # self.samples_by_label.clear() - # del self.samples[:] - # del self.samples - # self.samples = [] - # self.samples_by_label = {} - # self.samples_by_bin = {} - # logging.info("%s generating segments type %s", self.name, segment_type) - # start = time.time() - # empty_tracks = [] - # filtered_stats = 0 - # - # for track in self.tracks: - # segment_frame_spacing = int( - # round(self.segment_spacing * track.frames_per_second) - # ) - # segment_width = self.segment_length - # track.calculate_segments( - # segment_frame_spacing, - # segment_width, - # segment_type, - # segment_min_mass=segment_min_avg_mass, - # ) - # filtered_stats = filtered_stats + track.filtered_stats["segment_mass"] - # if len(track.segments) == 0: - # empty_tracks.append(track) - # continue - # for sample in track.segments: - # self.add_clip_sample_mappings(sample) - # - # self.rebuild_cdf() - # logging.info( - # "%s #segments %s filtered stats are %s took %s", - # self.name, - # len(self.samples), - # filtered_stats, - # time.time() - start, - # ) def remove_sample_by_id(self, id, bin_id): del self.samples_by_id[id] try: diff --git a/src/ml_tools/datasetstructures.py b/src/ml_tools/datasetstructures.py index 6a7fbd61..daa41741 100644 --- a/src/ml_tools/datasetstructures.py +++ b/src/ml_tools/datasetstructures.py @@ -367,7 +367,7 @@ def get_segments( self, segment_width, segment_frame_spacing=9, - segment_type=SegmentType.ALL_RANDOM, + segment_types=[SegmentType.ALL_RANDOM], segment_min_mass=None, repeats=1, max_segments=None, @@ -389,7 +389,7 @@ def get_segments( # in python3.7+ can just take the values and it guarantees order it was added to dict regions = self.bounds_history - self.samples, self.filtered_stats = get_segments( + self.samples, filtered_stats = get_segments( self.clip_id, self.track_id, self.start_frame, @@ -402,7 +402,7 @@ def get_segments( lower_mass=self.lower_mass, repeats=repeats, min_frames=min_frames, - segment_type=segment_type, + segment_types=segment_types, max_segments=max_segments, station_id=self.station_id, source_file=self.source_file, @@ -412,6 +412,7 @@ def get_segments( fp_frames=self.fp_frames if filter_by_fp else None, rec_time=self.start_time, ) + self.filtered_stats.update(filtered_stats) # GP could get this from the tracks when writing # but might be best to keep samples independent for ease for s in self.samples: @@ -974,8 +975,7 @@ def get_segments( lower_mass=0, repeats=1, min_frames=None, - segment_frames=None, - segment_type=SegmentType.ALL_RANDOM, + segment_types=[SegmentType.ALL_RANDOM], max_segments=None, location=None, station_id=None, @@ -986,9 +986,8 @@ def get_segments( skip_ffc=True, frame_min_mass=None, fp_frames=None, + repeat_frame_indices=True, ): - if segment_type == SegmentType.ALL_RANDOM_NOMIN: - segment_min_mass = None if min_frames is None: min_frames = segment_width / 4.0 segments = [] @@ -997,163 +996,189 @@ def get_segments( has_no_mass = np.sum(mass_history) == 0 - frame_indices = [ - region.frame_number - for region in regions - if (has_no_mass or region.mass > 0) - and ( - ffc_frames is None - or skip_ffc is False - or region.frame_number not in ffc_frames - ) - and not region.blank - and region.width > 0 - and region.height > 0 - and ((has_no_mass or frame_min_mass is None) or region.mass >= frame_min_mass) - ] - if fp_frames is not None and label not in FP_LABELS: - frame_indices = [f for f in frame_indices if f not in fp_frames] - if len(frame_indices) == 0: - logging.warn("Nothing to load for %s - %s", clip_id, track_id) - return [], filtered_stats - if segment_min_mass is not None: - segment_min_mass = min( - segment_min_mass, - np.median(mass_history[frame_indices - start_frame]), - ) - else: - segment_min_mass = 1 - # remove blank frames - - if segment_type == SegmentType.TOP_RANDOM: - # take top 50 mass frames - frame_indices = sorted( - frame_indices, - key=lambda f_i: mass_history[f_i - start_frame], - reverse=True, - ) - frame_indices = frame_indices[:50] - frame_indices.sort() - if segment_type == SegmentType.TOP_SEQUENTIAL: - return get_top_mass_segments( - clip_id, - track_id, - label, - camera, - segment_width, - segment_frame_spacing, - mass_history, - ffc_frames, - regions, - start_frame, - lower_mass, - segment_min_mass, - source_file=source_file, - ) - if len(frame_indices) < min_frames: - filtered_stats["too short"] += 1 - return segments, filtered_stats - frame_indices = np.array(frame_indices) - segment_count = max(1, len(frame_indices) // segment_frame_spacing) - segment_count = int(segment_count) - if max_segments is not None: - segment_count = min(max_segments, segment_count) - - # take any segment_width frames, this could be done each epoch - whole_indices = frame_indices - random_frames = segment_type in [ - SegmentType.IMPORTANT_RANDOM, - SegmentType.ALL_RANDOM, - SegmentType.ALL_RANDOM_NOMIN, - SegmentType.TOP_RANDOM, - None, - ] - for _ in range(repeats): - frame_indices = whole_indices.copy() - if random_frames: - # random_frames and not random_sections: - np.random.shuffle(frame_indices) - for i in range(segment_count): - # always get atleast one segment, not doing annymore - if (len(frame_indices) < segment_width / 2.0 and len(segments) > 1) or len( - frame_indices - ) < segment_width / 4: - break + for segment_type in segment_types: + s_min_mass = segment_min_mass + if segment_type == SegmentType.ALL_RANDOM_NOMIN: + s_min_mass = None - if segment_type == SegmentType.ALL_SECTIONS: - # random frames from section 2.2 * segment_width - section = frame_indices[: int(segment_width * 2.2)] - indices = np.random.choice( - len(section), - min(segment_width, len(section)), - replace=False, - ) - frames = section[indices] - # might need to change that gp 11/05 - 2024 - frame_indices = frame_indices[segment_frame_spacing:] - elif random_frames: - # frame indices already randomized so just need to grab some - frames = frame_indices[:segment_width] - frame_indices = frame_indices[segment_width:] - else: - segment_start = i * segment_frame_spacing - segment_end = segment_start + segment_width - segment_end = min(len(frame_indices), segment_end) - frames = frame_indices[segment_start:segment_end] - - remaining = segment_width - len(frames) - # sample another same frames again if need be - if remaining > 0: - extra_frames = np.random.choice( - frames, - min(remaining, len(frames)), - replace=False, - ) - frames = np.concatenate([frames, extra_frames]) - frames.sort() - relative_frames = frames - start_frame - mass_slice = mass_history[relative_frames] - segment_mass = np.sum(mass_slice) - segment_avg_mass = segment_mass / len(mass_slice) - filtered = False - if segment_min_mass and segment_avg_mass < segment_min_mass: - if dont_filter: - filtered = True - else: - filtered_stats["segment_mass"] += 1 - continue - - # temp_slice = frame_temp_median[relative_frames] - region_slice = regions[relative_frames] - movement_data = None - if segment_avg_mass < 50: - segment_weight_factor = 0.75 - elif segment_avg_mass < 100: - segment_weight_factor = 1 - else: - segment_weight_factor = 1.2 - - for z, f in enumerate(frames): - assert region_slice[z].frame_number == f - segment = SegmentHeader( + frame_indices = [ + region.frame_number + for region in regions + if (has_no_mass or region.mass > 0) + and ( + ffc_frames is None + or skip_ffc is False + or region.frame_number not in ffc_frames + ) + and not region.blank + and region.width > 0 + and region.height > 0 + and ( + (has_no_mass or frame_min_mass is None) or region.mass >= frame_min_mass + ) + ] + if fp_frames is not None and label not in FP_LABELS: + frame_indices = [f for f in frame_indices if f not in fp_frames] + if len(frame_indices) == 0: + logging.warn("Nothing to load for %s - %s", clip_id, track_id) + return [], filtered_stats + if s_min_mass is not None: + s_min_mass = min( + s_min_mass, + np.median(mass_history[frame_indices - start_frame]), + ) + else: + s_min_mass = 1 + # remove blank frames + + if segment_type == SegmentType.TOP_RANDOM: + # take top 50 mass frames + frame_indices = sorted( + frame_indices, + key=lambda f_i: mass_history[f_i - start_frame], + reverse=True, + ) + frame_indices = frame_indices[:50] + frame_indices.sort() + if segment_type == SegmentType.TOP_SEQUENTIAL: + new_segments, filtered = get_top_mass_segments( clip_id, track_id, - start_frame=start_frame, - frames=segment_width, - weight=segment_weight_factor, - mass=segment_mass, - label=label, - regions=region_slice, - frame_indices=frames, - movement_data=movement_data, - camera=camera, - location=location, - station_id=station_id, - rec_time=rec_time, + label, + camera, + segment_width, + segment_frame_spacing, + mass_history, + ffc_frames, + regions, + start_frame, + lower_mass, + s_min_mass, source_file=source_file, - filtered=filtered, ) - segments.append(segment) + segments.extend(new_segments) + filtered_stats.merge(filtered) + continue + if len(frame_indices) < min_frames: + filtered_stats["too short"] += 1 + continue + + frame_indices = np.array(frame_indices) + segment_count = max(1, len(frame_indices) // segment_frame_spacing) + segment_count = int(segment_count) + # probably only counts for all random + if max_segments is not None and segment_type not in [SegmentType.ALL_SECTIONS]: + segment_count = min(max_segments, segment_count) + + # take any segment_width frames, this could be done each epoch + whole_indices = frame_indices + random_frames = segment_type in [ + SegmentType.IMPORTANT_RANDOM, + SegmentType.ALL_RANDOM, + SegmentType.ALL_RANDOM_NOMIN, + SegmentType.TOP_RANDOM, + None, + ] + for _ in range(repeats): + frame_indices = whole_indices.copy() + if random_frames: + # random_frames and not random_sections: + np.random.shuffle(frame_indices) + for i in range(segment_count): + # always get atleast one segment, not doing annymore + if ( + len(frame_indices) < segment_width / 2.0 and len(segments) > 1 + ) or len(frame_indices) < segment_width / 4: + break + + if segment_type == SegmentType.ALL_SECTIONS: + # random frames from section 2.2 * segment_width + section = frame_indices[: int(segment_width * 2.2)] + + indices = np.random.choice( + len(section), + min(segment_width, len(section)), + replace=False, + ) + frames = section[indices] + # might need to change that gp 11/05 - 2024 + frame_indices = frame_indices[segment_width:] + elif random_frames: + # frame indices already randomized so just need to grab some + frames = frame_indices[:segment_width] + frame_indices = frame_indices[segment_width:] + else: + segment_start = i * segment_frame_spacing + segment_end = segment_start + segment_width + segment_end = min(len(frame_indices), segment_end) + frames = frame_indices[segment_start:segment_end] + + remaining = segment_width - len(frames) + # sample another same frames again if need be + if remaining > 0: + extra_frames = np.random.choice( + frames, + min(remaining, len(frames)), + replace=False, + ) + frames = np.concatenate([frames, extra_frames]) + frames.sort() + relative_frames = frames - start_frame + mass_slice = mass_history[relative_frames] + segment_mass = np.sum(mass_slice) + segment_avg_mass = segment_mass / len(mass_slice) + filtered = False + if s_min_mass and segment_avg_mass < s_min_mass: + if dont_filter: + filtered = True + else: + filtered_stats["segment_mass"] += 1 + continue + + # temp_slice = frame_temp_median[relative_frames] + region_slice = regions[relative_frames] + movement_data = None + if segment_avg_mass < 50: + segment_weight_factor = 0.75 + elif segment_avg_mass < 100: + segment_weight_factor = 1 + else: + segment_weight_factor = 1.2 + + for z, f in enumerate(frames): + assert region_slice[z].frame_number == f + + if repeat_frame_indices: + # i think this can be default, means we dont need to handle + # short segments elsewhere + if len(frames) < segment_width: + extra_samples = np.random.choice( + frames, segment_width - len(frames) + ) + frames = list(frames) + frames.extend(extra_samples) + frames.sort() + + segment = SegmentHeader( + clip_id, + track_id, + start_frame=start_frame, + frames=segment_width, + weight=segment_weight_factor, + mass=segment_mass, + label=label, + regions=region_slice, + frame_indices=frames, + movement_data=movement_data, + camera=camera, + location=location, + station_id=station_id, + rec_time=rec_time, + source_file=source_file, + filtered=filtered, + ) + segments.append(segment) + return segments, filtered_stats diff --git a/src/ml_tools/hyperparams.py b/src/ml_tools/hyperparams.py index cd6ddb79..b1868fd0 100644 --- a/src/ml_tools/hyperparams.py +++ b/src/ml_tools/hyperparams.py @@ -24,7 +24,7 @@ def insert_defaults(self): self["square_width"] = self.square_width self["frame_size"] = self.frame_size self["segment_width"] = self.segment_width - self["segment_type"] = self.segment_type + self["segment_types"] = self.segment_types self["multi_label"] = True self["diff_norm"] = self.diff_norm self["thermal_diff_norm"] = self.thermal_diff_norm @@ -89,12 +89,14 @@ def segment_width(self): return self.get("segment_width", 25 if self.use_segments else 1) @property - def segment_type(self): - segment_type = self.get("segment_type", SegmentType.ALL_RANDOM.name) - if isinstance(segment_type, str): - return SegmentType[segment_type] - else: - return segment_type + def segment_types(self): + + segment_types = self.get("segment_type", [SegmentType.ALL_RANDOM]) + # convert string to enum type + if isinstance(segment_types[0], str): + for i in range(len(segment_types)): + segment_types[i] = SegmentType[segment_types[i]] + return segment_types @property def mvm(self): diff --git a/src/ml_tools/interpreter.py b/src/ml_tools/interpreter.py index 2b299181..bdac4f53 100644 --- a/src/ml_tools/interpreter.py +++ b/src/ml_tools/interpreter.py @@ -21,6 +21,7 @@ def load_json(self, filename): self.version = metadata.get("version", None) self.labels = metadata["labels"] self.params = HyperParams() + print("Hypers are ", metadata.get("hyperparams", {})) self.params.update(metadata.get("hyperparams", {})) self.data_type = metadata.get("type", "thermal") @@ -298,7 +299,7 @@ def preprocess_segments( ffc_frames=[] if dont_filter else clip.ffc_frames, repeats=1, segment_frames=segment_frames, - segment_type=self.params.segment_type, + segment_types=self.params.segment_types, from_last=predict_from_last, max_segments=max_segments, dont_filter=dont_filter, diff --git a/src/ml_tools/kerasmodel.py b/src/ml_tools/kerasmodel.py index 4e5acf8a..45cbb466 100644 --- a/src/ml_tools/kerasmodel.py +++ b/src/ml_tools/kerasmodel.py @@ -1083,7 +1083,6 @@ def plot_confusion_matrix(cm, class_names): counts = cm.copy() threshold = counts.max() / 2.0 - print("Threshold is", threshold, " for ", cm.max()) # Normalize the confusion matrix. cm = np.around(cm.astype("float") / cm.sum(axis=1)[:, np.newaxis], decimals=2) diff --git a/src/ml_tools/tfwriter.py b/src/ml_tools/tfwriter.py index d40cf8ac..983308f2 100644 --- a/src/ml_tools/tfwriter.py +++ b/src/ml_tools/tfwriter.py @@ -12,31 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -from PIL import Image from pathlib import Path from multiprocessing import Process, Queue - -import collections -import hashlib -import io -import json -import multiprocessing import os -import time -from absl import app -from absl import flags from absl import logging import numpy as np -from PIL import Image, ImageOps - import tensorflow as tf -from . import tfrecord_util -from ml_tools import tools -from ml_tools.imageprocessing import normalize, rotate -from track.cliptracker import get_diff_back_filtered -import cv2 -import random -import math def process_job(queue, labels, base_dir, save_data, writer_i, extra_args): @@ -44,7 +25,6 @@ def process_job(queue, labels, base_dir, save_data, writer_i, extra_args): pid = os.getpid() - # writer_i = 1 name = f"{writer_i}-{pid}.tfrecord" logging.info("Writing to %s", name) options = tf.io.TFRecordOptions(compression_type="GZIP") @@ -66,15 +46,8 @@ def process_job(queue, labels, base_dir, save_data, writer_i, extra_args): saved += save_data(samples, writer, labels, extra_args) files += 1 del samples - # if saved > 250000 / num_frames: - # logging.info("Closing old writer") - # writer.close() - # writer_i += 1 - # name = f"{writer_i}-{pid}.tfrecord" - # logging.info("Opening %s", name) - # saved = 0 - # writer = tf.io.TFRecordWriter(str(base_dir / name), options=options) - if i % int(25000 / num_frames) == 0: + + if i % int(2500 / num_frames) == 0: logging.info("Saved %s ", files) gc.collect() writer.flush() diff --git a/src/ml_tools/thermalwriter.py b/src/ml_tools/thermalwriter.py index 7a123460..891edb54 100644 --- a/src/ml_tools/thermalwriter.py +++ b/src/ml_tools/thermalwriter.py @@ -227,7 +227,7 @@ def get_data(clip_samples, extra_args): segment_frame_spacing=extra_args.get( "segment_frame_spacing", 9 ), - segment_type=extra_args.get("segment_type"), + segment_types=extra_args.get("segment_types"), segment_min_mass=extra_args.get("segment_min_avg_mass"), dont_filter=extra_args.get("dont_filter_segment", False), skip_ffc=extra_args.get("skip_ffc", True), diff --git a/src/ml_tools/tools.py b/src/ml_tools/tools.py index 38dd9e90..ce604906 100644 --- a/src/ml_tools/tools.py +++ b/src/ml_tools/tools.py @@ -15,6 +15,7 @@ from pathlib import Path from ml_tools.rectangle import Rectangle from dateutil import parser +from enum import Enum EPISON = 1e-5 @@ -54,7 +55,8 @@ def default(self, obj): return obj.meta_dictionary() elif isinstance(obj, Path): return str(obj) - + elif isinstance(obj, Enum): + return str(obj.name) # Let the base class default method raise the TypeError return json.JSONEncoder.default(self, obj) diff --git a/src/modelevaluate.py b/src/modelevaluate.py index 94ae85e9..fb07124f 100644 --- a/src/modelevaluate.py +++ b/src/modelevaluate.py @@ -463,14 +463,14 @@ def evaluate_dir( masses = np.array(data[4]) masses = masses[:, None] top_score = None - # if model.params.multi_label is True: - # # every label could be 1 for each prediction - # top_score = len(output) + if model.params.multi_label is True: + # # every label could be 1 for each prediction + top_score = np.sum(masses) # smoothed = output # else: smoothed = output * masses prediction.classified_clip( - output, smoothed, data[2], top_score=top_score + output, smoothed, data[2], masses, top_score=top_score ) y_true.append(label_mapping.get(label, label)) predicted_labels = [prediction.predicted_tag()] diff --git a/src/track/track.py b/src/track/track.py index 165ee39d..b8264c35 100644 --- a/src/track/track.py +++ b/src/track/track.py @@ -439,7 +439,7 @@ def get_segments( repeats=1, min_frames=0, segment_frames=None, - segment_type=SegmentType.ALL_RANDOM, + segment_types=[SegmentType.ALL_RANDOM], from_last=None, max_segments=None, ffc_frames=None, @@ -477,28 +477,22 @@ def get_segments( ) segments.append(segment) else: - all_segments = [] - for seg_type in [SegmentType.ALL_RANDOM, SegmentType.ALL_SECTIONS]: - segments, _ = get_segments( - self.clip_id, - self._id, - start_frame, - segment_frame_spacing=segment_frame_spacing, - segment_width=segment_width, - regions=regions, - ffc_frames=ffc_frames, - repeats=repeats, - # frame_temp_median=frame_temp_median, - min_frames=min_frames, - segment_frames=None, - segment_type=seg_type, - max_segments=max_segments, - dont_filter=dont_filter, - # segment_type=seg_type, - ) - all_segments.extend(segments) + segments, _ = get_segments( + self.clip_id, + self._id, + start_frame, + segment_frame_spacing=segment_frame_spacing, + segment_width=segment_width, + regions=regions, + ffc_frames=ffc_frames, + repeats=repeats, + min_frames=min_frames, + segment_types=segment_types, + max_segments=max_segments, + dont_filter=dont_filter, + ) - return all_segments + return segments @classmethod def from_region(cls, clip, region, tracker_version=None, tracking_config=None): From e9b51a977237cd848df3a58cf6048f6292240857 Mon Sep 17 00:00:00 2001 From: gferraro Date: Mon, 11 Nov 2024 18:19:42 +0100 Subject: [PATCH 110/117] try random section --- src/classify/clipclassifier.py | 2 ++ src/ml_tools/datasetstructures.py | 37 ++++++++++++++++++++++--------- src/ml_tools/interpreter.py | 1 + src/track/track.py | 2 ++ 4 files changed, 32 insertions(+), 10 deletions(-) diff --git a/src/classify/clipclassifier.py b/src/classify/clipclassifier.py index 9e7dd279..4de7ce0c 100644 --- a/src/classify/clipclassifier.py +++ b/src/classify/clipclassifier.py @@ -182,6 +182,8 @@ def classify_clip(self, clip, model, meta_data, reuse_frames=None): predictions.model_load_time = time.time() - start for i, track in enumerate(clip.tracks): + logging.info("Track id is %s", track.get_id()) + segment_frames = None if reuse_frames: tracks = meta_data.get("tracks") diff --git a/src/ml_tools/datasetstructures.py b/src/ml_tools/datasetstructures.py index daa41741..554fc7d5 100644 --- a/src/ml_tools/datasetstructures.py +++ b/src/ml_tools/datasetstructures.py @@ -987,6 +987,7 @@ def get_segments( frame_min_mass=None, fp_frames=None, repeat_frame_indices=True, + min_segments=None, ): if min_frames is None: min_frames = segment_width / 4.0 @@ -1059,7 +1060,9 @@ def get_segments( segments.extend(new_segments) filtered_stats.merge(filtered) continue - if len(frame_indices) < min_frames: + if len(frame_indices) < min_frames and ( + min_segments == 0 or min_segments is None + ): filtered_stats["too short"] += 1 continue @@ -1069,7 +1072,6 @@ def get_segments( # probably only counts for all random if max_segments is not None and segment_type not in [SegmentType.ALL_SECTIONS]: segment_count = min(max_segments, segment_count) - # take any segment_width frames, this could be done each epoch whole_indices = frame_indices random_frames = segment_type in [ @@ -1079,17 +1081,31 @@ def get_segments( SegmentType.TOP_RANDOM, None, ] + random_mask = True for _ in range(repeats): - frame_indices = whole_indices.copy() - if random_frames: - # random_frames and not random_sections: - np.random.shuffle(frame_indices) + used_indices = [] + if not random_mask: + frame_indices = whole_indices.copy() + + if random_frames: + # random_frames and not random_sections: + np.random.shuffle(frame_indices) + for i in range(segment_count): + if random_mask: + mask_start = i * 25 + frame_indices = list(whole_indices[0:mask_start].copy()) + frame_indices.extend(whole_indices[mask_start + 25 :].copy()) + frame_indices = [f for f in frame_indices if f not in used_indices] + frame_indices = np.uint32(frame_indices) + np.random.shuffle(frame_indices) + # always get atleast one segment, not doing annymore - if ( - len(frame_indices) < segment_width / 2.0 and len(segments) > 1 - ) or len(frame_indices) < segment_width / 4: - break + if len(frame_indices) == 0 or len(segments) >= min_segments: + if ( + len(frame_indices) < segment_width / 2.0 and len(segments) > 1 + ) or len(frame_indices) < segment_width / 4: + break if segment_type == SegmentType.ALL_SECTIONS: # random frames from section 2.2 * segment_width @@ -1106,6 +1122,7 @@ def get_segments( elif random_frames: # frame indices already randomized so just need to grab some frames = frame_indices[:segment_width] + used_indices.extend(frames) frame_indices = frame_indices[segment_width:] else: segment_start = i * segment_frame_spacing diff --git a/src/ml_tools/interpreter.py b/src/ml_tools/interpreter.py index bdac4f53..b2dc166d 100644 --- a/src/ml_tools/interpreter.py +++ b/src/ml_tools/interpreter.py @@ -304,6 +304,7 @@ def preprocess_segments( max_segments=max_segments, dont_filter=dont_filter, filter_by_fp=False, + min_segments=1, ) frame_indices = set() for segment in segments: diff --git a/src/track/track.py b/src/track/track.py index b8264c35..f265b014 100644 --- a/src/track/track.py +++ b/src/track/track.py @@ -445,6 +445,7 @@ def get_segments( ffc_frames=None, dont_filter=False, filter_by_fp=False, + min_segments=1, ): if from_last is not None: if from_last == 0: @@ -490,6 +491,7 @@ def get_segments( segment_types=segment_types, max_segments=max_segments, dont_filter=dont_filter, + min_segments=min_segments, ) return segments From 25ae03b70aa43203dd120a26dafcfc9a7ec8d391 Mon Sep 17 00:00:00 2001 From: gferraro Date: Mon, 11 Nov 2024 18:23:17 +0100 Subject: [PATCH 111/117] add min path --- src/ml_tools/datasetstructures.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/ml_tools/datasetstructures.py b/src/ml_tools/datasetstructures.py index 554fc7d5..dcb6e200 100644 --- a/src/ml_tools/datasetstructures.py +++ b/src/ml_tools/datasetstructures.py @@ -379,6 +379,7 @@ def get_segments( from_last=None, frame_min_mass=None, filter_by_fp=True, + min_segments=None, ): if segment_frames is not None: raise Exception("Have not implement this path") @@ -411,6 +412,7 @@ def get_segments( frame_min_mass=frame_min_mass, fp_frames=self.fp_frames if filter_by_fp else None, rec_time=self.start_time, + min_segments=min_segments, ) self.filtered_stats.update(filtered_stats) # GP could get this from the tracks when writing From 7cd7efc5ffd82c36c929b94ee834ac8df4791e8e Mon Sep 17 00:00:00 2001 From: gferraro Date: Mon, 11 Nov 2024 18:37:24 +0100 Subject: [PATCH 112/117] fix to small tracks --- src/ml_tools/datasetstructures.py | 15 ++++++++++----- src/ml_tools/interpreter.py | 4 +++- src/modelevaluate.py | 3 +-- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/ml_tools/datasetstructures.py b/src/ml_tools/datasetstructures.py index dcb6e200..ad4b1d1b 100644 --- a/src/ml_tools/datasetstructures.py +++ b/src/ml_tools/datasetstructures.py @@ -1095,11 +1095,16 @@ def get_segments( for i in range(segment_count): if random_mask: - mask_start = i * 25 - frame_indices = list(whole_indices[0:mask_start].copy()) - frame_indices.extend(whole_indices[mask_start + 25 :].copy()) - frame_indices = [f for f in frame_indices if f not in used_indices] - frame_indices = np.uint32(frame_indices) + if len(whole_indices) < 40: + frame_indices = whole_indices.copy() + else: + mask_start = i * 25 + frame_indices = list(whole_indices[0:mask_start].copy()) + frame_indices.extend(whole_indices[mask_start + 25 :].copy()) + frame_indices = [ + f for f in frame_indices if f not in used_indices + ] + frame_indices = np.uint32(frame_indices) np.random.shuffle(frame_indices) # always get atleast one segment, not doing annymore diff --git a/src/ml_tools/interpreter.py b/src/ml_tools/interpreter.py index b2dc166d..44426163 100644 --- a/src/ml_tools/interpreter.py +++ b/src/ml_tools/interpreter.py @@ -126,6 +126,7 @@ def preprocess(self, clip, track, **args): predict_from_last, segment_frames=segment_frames, dont_filter=args.get("dont_filter", False), + min_segments=args.get("min_segments"), ) else: frames, preprocessed, masses = self.preprocess_frames( @@ -290,6 +291,7 @@ def preprocess_segments( predict_from_last=None, segment_frames=None, dont_filter=False, + min_segments=None, ): from ml_tools.preprocess import preprocess_frame, preprocess_movement @@ -304,7 +306,7 @@ def preprocess_segments( max_segments=max_segments, dont_filter=dont_filter, filter_by_fp=False, - min_segments=1, + min_segments=min_segments, ) frame_indices = set() for segment in segments: diff --git a/src/modelevaluate.py b/src/modelevaluate.py index fb07124f..b665972c 100644 --- a/src/modelevaluate.py +++ b/src/modelevaluate.py @@ -381,9 +381,8 @@ def load_clip_data(cptv_file): for track in clip.tracks: try: frames, preprocessed, masses = worker_model.preprocess( - clip_db, track, frames_per_classify=25, dont_filter=True + clip_db, track, frames_per_classify=25, dont_filter=True, min_segments=1 ) - data.append( ( f"{track.clip_id}-{track.get_id()}", From 0ca2c9344b9a29d05a366cc0456310eeea0e8468 Mon Sep 17 00:00:00 2001 From: gferraro Date: Tue, 12 Nov 2024 09:52:15 +0100 Subject: [PATCH 113/117] added mask segment type as default --- src/ml_tools/dataset.py | 2 +- src/ml_tools/datasetstructures.py | 34 +++++++++++++++++++++---------- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/src/ml_tools/dataset.py b/src/ml_tools/dataset.py index 7e633b32..87e8ccaa 100644 --- a/src/ml_tools/dataset.py +++ b/src/ml_tools/dataset.py @@ -83,7 +83,7 @@ def __init__( self.excluded_tags = config.build.excluded_tags self.min_frame_mass = config.build.min_frame_mass self.filter_by_lq = config.build.filter_by_lq - self.segment_types = [SegmentType.ALL_RANDOM] + self.segment_types = [SegmentType.ALL_RANDOM_MASKED] self.max_segments = config.build.max_segments self.country = config.build.country self.max_frames = config.build.max_frames diff --git a/src/ml_tools/datasetstructures.py b/src/ml_tools/datasetstructures.py index ad4b1d1b..57d04dcc 100644 --- a/src/ml_tools/datasetstructures.py +++ b/src/ml_tools/datasetstructures.py @@ -30,6 +30,7 @@ class SegmentType(Enum): ALL_SECTIONS = 5 TOP_RANDOM = 6 ALL_RANDOM_NOMIN = 7 + ALL_RANDOM_MASKED = 8 class BaseSample(ABC): @@ -1071,9 +1072,13 @@ def get_segments( frame_indices = np.array(frame_indices) segment_count = max(1, len(frame_indices) // segment_frame_spacing) segment_count = int(segment_count) + mask_length = 25 + # probably only counts for all random if max_segments is not None and segment_type not in [SegmentType.ALL_SECTIONS]: segment_count = min(max_segments, segment_count) + # adjust size of mask if we take less segments + mask_length = max(mask_length, len(frame_indices) // segment_count) # take any segment_width frames, this could be done each epoch whole_indices = frame_indices random_frames = segment_type in [ @@ -1081,12 +1086,13 @@ def get_segments( SegmentType.ALL_RANDOM, SegmentType.ALL_RANDOM_NOMIN, SegmentType.TOP_RANDOM, + SegmentType.ALL_RANDOM_MASKED, None, ] - random_mask = True + for _ in range(repeats): used_indices = [] - if not random_mask: + if segment_type != SegmentType.ALL_RANDOM_MASKED or len(whole_indices) < 40: frame_indices = whole_indices.copy() if random_frames: @@ -1094,21 +1100,27 @@ def get_segments( np.random.shuffle(frame_indices) for i in range(segment_count): - if random_mask: - if len(whole_indices) < 40: - frame_indices = whole_indices.copy() - else: - mask_start = i * 25 - frame_indices = list(whole_indices[0:mask_start].copy()) - frame_indices.extend(whole_indices[mask_start + 25 :].copy()) + if segment_type == SegmentType.ALL_RANDOM_MASKED: + if len(whole_indices) > 40: + mask_start = i * mask_length + frame_indices = whole_indices[0:mask_start] + frame_indices = np.concatenate( + [frame_indices, whole_indices[mask_start + mask_length :]], + axis=0, + ) + # maybe some faster way of doing this... frame_indices = [ f for f in frame_indices if f not in used_indices ] frame_indices = np.uint32(frame_indices) - np.random.shuffle(frame_indices) + np.random.shuffle(frame_indices) # always get atleast one segment, not doing annymore - if len(frame_indices) == 0 or len(segments) >= min_segments: + if ( + len(frame_indices) == 0 + or min_segments is None + or len(segments) >= min_segments + ): if ( len(frame_indices) < segment_width / 2.0 and len(segments) > 1 ) or len(frame_indices) < segment_width / 4: From d39902ec044e1d1b6e427c9b00baac99afffdadd Mon Sep 17 00:00:00 2001 From: gferraro Date: Tue, 12 Nov 2024 10:03:12 +0100 Subject: [PATCH 114/117] tidy up --- src/classify/clipclassifier.py | 13 ------------- src/ml_tools/hyperparams.py | 1 - src/ml_tools/tools.py | 14 +++++++++++--- src/rebuildDate.py | 2 +- 4 files changed, 12 insertions(+), 18 deletions(-) diff --git a/src/classify/clipclassifier.py b/src/classify/clipclassifier.py index 4de7ce0c..9f6945a7 100644 --- a/src/classify/clipclassifier.py +++ b/src/classify/clipclassifier.py @@ -182,8 +182,6 @@ def classify_clip(self, clip, model, meta_data, reuse_frames=None): predictions.model_load_time = time.time() - start for i, track in enumerate(clip.tracks): - logging.info("Track id is %s", track.get_id()) - segment_frames = None if reuse_frames: tracks = meta_data.get("tracks") @@ -247,17 +245,6 @@ def save_metadata( prediction = predictions.prediction_for(track.get_id()) if prediction is None: continue - # DEBUGGING STUFF REMOVE ME - # logging.info("Track predictions %s", track) - # for p in prediction.predictions: - # logging.info( - # "Have %s sum %s smoothed %s mass %s", - # p, - # np.sum(p.prediction), - # np.round(p.smoothed_prediction), - # p.mass, - # ) - # logging.info("smoothed %s", np.round(100 * prediction.class_best_score)) prediction_meta = prediction.get_metadata() prediction_meta["model_id"] = model_id prediction_info.append(prediction_meta) diff --git a/src/ml_tools/hyperparams.py b/src/ml_tools/hyperparams.py index b1868fd0..db558eff 100644 --- a/src/ml_tools/hyperparams.py +++ b/src/ml_tools/hyperparams.py @@ -90,7 +90,6 @@ def segment_width(self): @property def segment_types(self): - segment_types = self.get("segment_type", [SegmentType.ALL_RANDOM]) # convert string to enum type if isinstance(segment_types[0], str): diff --git a/src/ml_tools/tools.py b/src/ml_tools/tools.py index ce604906..cad64667 100644 --- a/src/ml_tools/tools.py +++ b/src/ml_tools/tools.py @@ -194,9 +194,17 @@ def saveclassify_image(data, filename): # saves image channels side by side, expected data to be values in the range of 0->1 Path(filename).parent.mkdir(parents=True, exist_ok=True) r = Image.fromarray(np.uint8(data[:, :, 0])) - g = Image.fromarray(np.uint8(data[:, :, 1])) - b = g - # b = Image.fromarray(np.uint8(data[:, :, 2])) + _, _, channels = data.shape + + if channels == 1: + g = r + else: + g = Image.fromarray(np.uint8(data[:, :, 1])) + + if channels == 2: + b = r + else: + b = Image.fromarray(np.uint8(data[:, :, 2])) concat = np.concatenate((r, g, b), axis=1) # horizontally img = Image.fromarray(np.uint8(concat)) img.save(filename + ".png") diff --git a/src/rebuildDate.py b/src/rebuildDate.py index 7693842d..661e2d60 100644 --- a/src/rebuildDate.py +++ b/src/rebuildDate.py @@ -9,7 +9,7 @@ from dateutil.parser import parse as parse_date parser = argparse.ArgumentParser() -parser.add_argument("data_dir", help="Directory of hdf5 files") +parser.add_argument("data_dir", help="Directory of cptv files") args = parser.parse_args() args.data_dir = Path(args.data_dir) latest_date = None From 48a25e3731fb9e5e98d1621abb0ee639cae729e4 Mon Sep 17 00:00:00 2001 From: gferraro Date: Wed, 13 Nov 2024 16:07:36 +0100 Subject: [PATCH 115/117] add check for none --- src/ml_tools/interpreter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ml_tools/interpreter.py b/src/ml_tools/interpreter.py index 44426163..140aef35 100644 --- a/src/ml_tools/interpreter.py +++ b/src/ml_tools/interpreter.py @@ -334,7 +334,7 @@ def preprocess_segments( ) continue f = clip.get_frame(region.frame_number) - if region.blank or region.width <= 0 or region.height <= 0: + if region.blank or region.width <= 0 or region.height <= 0 or f is None: continue f.float_arrays() From 52bfd995dfec9cbcf73ad7436524a675afcfab50 Mon Sep 17 00:00:00 2001 From: gferraro Date: Wed, 13 Nov 2024 16:22:00 +0100 Subject: [PATCH 116/117] tidy up --- src/config/buildconfig.py | 2 +- src/ml_tools/datasetstructures.py | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/config/buildconfig.py b/src/config/buildconfig.py index ae5e9baf..cf3812a9 100644 --- a/src/config/buildconfig.py +++ b/src/config/buildconfig.py @@ -43,7 +43,7 @@ class BuildConfig(DefaultConfig): max_frames = attr.ib() EXCLUDED_TAGS = ["poor tracking", "part", "untagged", "unidentified"] - + NO_MIN_FRAMES = ["stoat", "mustelid", "weasel", "ferret"] # country bounding boxs COUNTRY_LOCATIONS = { "AU": Rectangle.from_ltrb( diff --git a/src/ml_tools/datasetstructures.py b/src/ml_tools/datasetstructures.py index 57d04dcc..9a96db8d 100644 --- a/src/ml_tools/datasetstructures.py +++ b/src/ml_tools/datasetstructures.py @@ -6,6 +6,7 @@ from track.region import Region from abc import ABC, abstractmethod from ml_tools.rectangle import Rectangle +from config.buildconfig import BuildConfig from ml_tools import imageprocessing from enum import Enum import attr @@ -385,9 +386,11 @@ def get_segments( if segment_frames is not None: raise Exception("Have not implement this path") min_frames = segment_width / 4.0 - if self.label in ["stoat", "mustelid", "weasel", "ferret"]: + if self.label in BuildConfig.NO_MIN_FRAMES: # try and always get one for these min_frames = 0 + if min_segments is None: + min_segments = 1 # in python3.7+ can just take the values and it guarantees order it was added to dict regions = self.bounds_history From 092280fdde4ddb7e98bd4543044ab57a2666ac56 Mon Sep 17 00:00:00 2001 From: gferraro Date: Wed, 13 Nov 2024 16:36:56 +0100 Subject: [PATCH 117/117] fix segment type load for old meta --- src/ml_tools/hyperparams.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/ml_tools/hyperparams.py b/src/ml_tools/hyperparams.py index db558eff..6ed2ba1d 100644 --- a/src/ml_tools/hyperparams.py +++ b/src/ml_tools/hyperparams.py @@ -92,7 +92,10 @@ def segment_width(self): def segment_types(self): segment_types = self.get("segment_type", [SegmentType.ALL_RANDOM]) # convert string to enum type - if isinstance(segment_types[0], str): + if isinstance(segment_types, str): + # old metadata + segment_types = [SegmentType[segment_types]] + elif isinstance(segment_types[0], str): for i in range(len(segment_types)): segment_types[i] = SegmentType[segment_types[i]] return segment_types