diff --git a/pirequirements.txt b/pirequirements.txt
index c47dd8d7..0614859f 100644
--- a/pirequirements.txt
+++ b/pirequirements.txt
@@ -9,7 +9,7 @@ scipy==1.9.3
 python-dateutil
 scikit-learn==1.1.3
 tables==3.8.0
-h5py==3.8.0
+h5py==3.10.0
 pyyaml==6.0
 pillow==10.0.1
 attrs==24.2.0
@@ -26,4 +26,4 @@ dbus-python==1.3.2
 importlib_resources==5.10.2
 opencv-python==4.8.0.76
 inotify_simple==1.3.5
-python-cptv==0.0.3
\ No newline at end of file
+python-cptv==0.0.5
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index e038aaa7..1ac05980 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -48,7 +48,7 @@ dependencies = [
   "importlib_resources==5.10.2",
   "opencv-python==4.8.0.76",
   "inotify_simple==1.3.5",
-  "python-cptv==0.0.3"
+  "python-cptv==0.0.5"
 ]
 
 [project.scripts]
diff --git a/requirements.txt b/requirements.txt
index 38b432c0..8971715d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-tensorflow~=2.14.0
+tensorflow~=2.17.0
 matplotlib~=3.0
 pytz
 cptv~=1.5.4
@@ -7,7 +7,7 @@ scipy
 python-dateutil
 scikit-learn
 tables~=3.8.0
-h5py~=3.9.0
+h5py~=3.10.0
 pyyaml>=4.2b1
 pillow~=10.0.1
 attrs~=24.2.0
@@ -26,4 +26,4 @@ joblib
 #requires sudo apt-get install libopencv-dev used for ir track extraction on server
 # pybgs==3.2.0.post1 this was used for ir
 inotify_simple==1.3.5
-python-cptv==0.0.3
\ No newline at end of file
+python-cptv==0.0.5
\ No newline at end of file
diff --git a/src/autobuild-cron b/src/autobuild-cron
new file mode 100644
index 00000000..083f5c69
--- /dev/null
+++ b/src/autobuild-cron
@@ -0,0 +1,5 @@
+#run the first of every month
+SHELL=/bin/bash
+BASH_ENV=~/.bashrc_conda
+
+* * 1 * * cp ( cd /home/cp/cacophony/classifier-pipeline/src && ./autobuild.sh /data2/cptv-files) 2>&1 | logger --tag classifier-auto-build
\ No newline at end of file
diff --git a/src/autobuild.sh b/src/autobuild.sh
index ca7360a4..21f0c3b0 100755
--- a/src/autobuild.sh
+++ b/src/autobuild.sh
@@ -1,11 +1,15 @@
-#!/bin/sh
-
+#!/bin/bash
 set -e
 set -x
+conda init bash
+conda activate tf
 config="classifier-thermal.yaml"
-month_ago=$(python3 rebuildDate.py -c $config)
+echo "Saving into $1"
+month_ago=$(python3 rebuildDate.py $1)
 echo $month_ago
-python3 ../../cptv-download/cptv-download.py -l 0 -i 'poor tracking' -i 'untagged' -i 'part' -i 'untagged-by-humans' -i 'unknown' -i 'unidentified' -m 'human-tagged' --start-date "$month_ago" "../clips$month_ago" useremail@email.com userpassword
-echo "Downloading into ../clips$month_ago"
-python3 load.py -target "../clips$month_ago"  -c $config
-python3 build.py -c $config
+python3 ../../cptv-download/cptv-download.py -l 0 -i 'poor tracking' -i 'untagged' -i 'part' -i 'untagged-by-humans' -i 'unknown' -i 'unidentified' -m 'human-tagged' --start-date "$month_ago" "$1" useremail@email.com userpassword
+echo "Downloading into $1"
+python3 build.py -c $config --ext ".cptv" $1
+dt=$(date '+%d%m%Y-%H%M%S');
+export XLA_FLAGS=--xla_gpu_cuda_data_dir=/home/cp/miniconda3/envs/tf/lib/
+python3 train.py -c $config $dt 
\ No newline at end of file
diff --git a/src/build.py b/src/build.py
index 5c48a8c2..3c766af5 100644
--- a/src/build.py
+++ b/src/build.py
@@ -19,8 +19,8 @@
 from ml_tools.tfwriter import create_tf_records
 from ml_tools.irwriter import save_data as save_ir_data
 from ml_tools.thermalwriter import save_data as save_thermal_data
-
-
+from ml_tools.tools import CustomJSONEncoder
+import attrs
 import numpy as np
 
 from pathlib import Path
@@ -57,7 +57,7 @@ def parse_args():
     )
     parser.add_argument("--split-file", help="Json file defining a split")
     parser.add_argument(
-        "--ext", default=".hdf5", help="Extension of files to load .mp4,.cptv,.hdf5"
+        "--ext", default=".cptv", help="Extension of files to load .mp4,.cptv,.hdf5"
     )
 
     parser.add_argument("-c", "--config-file", help="Path to config file to use")
@@ -571,7 +571,7 @@ def add_samples(
     dataset.add_samples(samples)
 
 
-def validate_datasets(datasets, test_bins, date):
+def validate_datasets(datasets, test_bins, after_date):
     # check that clips are only in one dataset
     # that only test set has clips after date
     # that test set is the only dataset with test_clips
@@ -580,7 +580,7 @@ def validate_datasets(datasets, test_bins, date):
     #     for track in dataset.tracks:
     #         assert track.start_time < date
 
-    for i, dataset in enumerate(datasets):
+    for i, dataset in enumerate(datasets[:2]):
         dont_check = set(
             [
                 sample.bin_id
@@ -608,6 +608,15 @@ def validate_datasets(datasets, test_bins, date):
                     if sample.label in split_by_clip
                 ]
             )
+            if other.name == "test" and after_date is not None:
+                dont_check_other = set(
+                    [
+                        sample.bin_id
+                        for sample in other.samples_by_id.values()
+                        if sample.rec_time > after_date
+                    ]
+                )
+                dont_check = dont_check | dont_check_other
             other_bins = set([sample.bin_id for sample in other.samples_by_id.values()])
             other_bins = other_bins - dont_check
             other_clips = set(
@@ -717,6 +726,42 @@ def dump_split_ids(datasets, out_file="datasplit.json"):
     return
 
 
+def rough_balance(datasets):
+    dev_threshold = 2000
+    logging.info("Roughly Balancing")
+    print_counts(*datasets)
+
+    for dataset in datasets:
+        lbl_counts = {}
+        counts = []
+        for label in dataset.labels:
+            label_count = len(dataset.samples_by_label.get(label, []))
+            lbl_counts[label] = label_count
+            counts.append(label_count)
+        counts.sort()
+        std_dev = np.std(counts)
+        logging.info("Counts are %s std dev %s", counts, std_dev)
+        if std_dev < dev_threshold or len(counts) <= 1:
+            logging.info("Not balancing")
+            continue
+        if len(counts) <= 2:
+            cap_at = counts[-2]
+        elif len(counts) < 7:
+            cap_at = counts[-2]
+        else:
+            cap_at = counts[-2]
+        logging.info("Capping dataset %s at %s", dataset.name, cap_at)
+        for lbl, count in lbl_counts.items():
+            if count <= cap_at:
+                continue
+            samples_to_remove = count - cap_at
+            by_labels = dataset.samples_by_label[lbl]
+            np.random.shuffle(by_labels)
+            for i in range(samples_to_remove):
+                dataset.remove_sample(by_labels[i])
+    print_counts(*datasets)
+
+
 def main():
     init_logging()
     args = parse_args()
@@ -782,6 +827,8 @@ def main():
         print("Splitting data set into train / validation")
 
         datasets = split_randomly(master_dataset, config, args.date, test_clips)
+
+        rough_balance(datasets)
         validate_datasets(datasets, test_clips, args.date)
         dump_split_ids(datasets, record_dir / "datasplit.json")
 
@@ -849,15 +896,20 @@ def main():
                     {
                         "segment_frame_spacing": master_dataset.segment_spacing * 9,
                         "segment_width": master_dataset.segment_length,
-                        "segment_type": master_dataset.segment_type,
+                        "segment_types": master_dataset.segment_types,
                         "segment_min_avg_mass": master_dataset.segment_min_avg_mass,
                         "max_segments": master_dataset.max_segments,
                         "dont_filter_segment": True,
                         "skip_ffc": True,
-                        "tag_precedence": config.load.tag_precedence,
+                        "tag_precedence": config.build.tag_precedence,
                         "min_mass": master_dataset.min_frame_mass,
+                        "thermal_diff_norm": config.build.thermal_diff_norm,
+                        "filter_by_lq": master_dataset.filter_by_lq,
+                        "max_frames": master_dataset.max_frames,
                     }
                 )
+            # dont filter the test set,
+            extra_args["filter_by_fp"] = dataset.name != "test"
             create_tf_records(
                 dataset,
                 dir,
@@ -879,10 +931,12 @@ def main():
         "type": config.train.type,
         "counts": dataset_counts,
         "by_label": False,
+        "config": attrs.asdict(config),
+        "segment_types": master_dataset.segment_types,
     }
 
     with open(meta_filename, "w") as f:
-        json.dump(meta_data, f, indent=4)
+        json.dump(meta_data, f, indent=4, cls=CustomJSONEncoder)
 
 
 if __name__ == "__main__":
diff --git a/src/classify/clipclassifier.py b/src/classify/clipclassifier.py
index 0600b49d..9f6945a7 100644
--- a/src/classify/clipclassifier.py
+++ b/src/classify/clipclassifier.py
@@ -11,13 +11,8 @@
 from track.clip import Clip
 from track.cliptrackextractor import ClipTrackExtractor, is_affected_by_ffc
 from ml_tools import tools
-from ml_tools.kerasmodel import KerasModel
 from track.irtrackextractor import IRTrackExtractor
 from ml_tools.previewer import Previewer
-from track.track import Track
-
-from cptv import CPTVReader
-from datetime import datetime
 from ml_tools.interpreter import get_interpreter
 
 
@@ -134,7 +129,7 @@ def process_file(self, filename, cache=None, reuse_frames=None):
         clip = Clip(track_extractor.config, filename)
         clip.load_metadata(
             meta_data,
-            self.config.load.tag_precedence,
+            self.config.build.tag_precedence,
         )
         track_extractor.parse_clip(clip)
 
@@ -250,7 +245,6 @@ def save_metadata(
                 prediction = predictions.prediction_for(track.get_id())
                 if prediction is None:
                     continue
-
                 prediction_meta = prediction.get_metadata()
                 prediction_meta["model_id"] = model_id
                 prediction_info.append(prediction_meta)
diff --git a/src/classify/trackprediction.py b/src/classify/trackprediction.py
index 464569d5..a9af5056 100644
--- a/src/classify/trackprediction.py
+++ b/src/classify/trackprediction.py
@@ -79,6 +79,9 @@ def clarity(self):
         best = np.argsort(self.prediction)
         return self.prediction[best[-1]] - self.prediction[best[-2]]
 
+    def __str__(self):
+        return f"{self.frames} conf: {np.round(100*self.prediction)}"
+
 
 class TrackPrediction:
     """
@@ -107,18 +110,23 @@ def __init__(self, track_id, labels, keep_all=True, start_frame=None):
         self.masses = []
 
     def classified_clip(
-        self, predictions, smoothed_predictions, prediction_frames, top_score=None
+        self,
+        predictions,
+        smoothed_predictions,
+        prediction_frames,
+        masses,
+        top_score=None,
     ):
         self.num_frames_classified = len(predictions)
-        for prediction, smoothed_prediction, frames in zip(
-            predictions, smoothed_predictions, prediction_frames
+        for prediction, smoothed_prediction, frames, mass in zip(
+            predictions, smoothed_predictions, prediction_frames, masses
         ):
             prediction = Prediction(
                 prediction,
                 smoothed_prediction,
                 frames,
                 np.amax(frames),
-                None,
+                mass,
             )
             self.predictions.append(prediction)
 
@@ -162,11 +170,10 @@ def classified_frames(self, frame_numbers, predictions, mass):
             self.class_best_score += smoothed_prediction
 
     def classified_frame(self, frame_number, predictions, mass):
-        self.prediction_frames.append([frame_number])
         self.last_frame_classified = frame_number
         self.num_frames_classified += 1
         self.masses.append(mass)
-        smoothed_prediction = prediction * prediction * mass
+        smoothed_prediction = predictions**2 * mass
 
         prediction = Prediction(
             predictions,
diff --git a/src/config/buildconfig.py b/src/config/buildconfig.py
index 0d203f95..cf3812a9 100644
--- a/src/config/buildconfig.py
+++ b/src/config/buildconfig.py
@@ -22,6 +22,7 @@
 import logging
 from os import path
 from .defaultconfig import DefaultConfig
+from ml_tools.rectangle import Rectangle
 
 
 @attr.s
@@ -34,6 +35,45 @@ class BuildConfig(DefaultConfig):
     min_frame_mass = attr.ib()
     filter_by_lq = attr.ib()
     max_segments = attr.ib()
+    thermal_diff_norm = attr.ib()
+    tag_precedence = attr.ib()
+    excluded_tags = attr.ib()
+    country = attr.ib()
+    use_segments = attr.ib()
+    max_frames = attr.ib()
+
+    EXCLUDED_TAGS = ["poor tracking", "part", "untagged", "unidentified"]
+    NO_MIN_FRAMES = ["stoat", "mustelid", "weasel", "ferret"]
+    # country bounding boxs
+    COUNTRY_LOCATIONS = {
+        "AU": Rectangle.from_ltrb(
+            113.338953078, -10.6681857235, 153.569469029, -43.6345972634
+        ),
+        "NZ": Rectangle.from_ltrb(
+            166.509144322, -34.4506617165, 178.517093541, -46.641235447
+        ),
+    }
+
+    DEFAULT_GROUPS = {
+        0: [
+            "bird",
+            "false-positive",
+            "hedgehog",
+            "possum",
+            "rodent",
+            "mustelid",
+            "cat",
+            "kiwi",
+            "dog",
+            "leporidae",
+            "human",
+            "insect",
+            "pest",
+        ],
+        1: ["unidentified", "other"],
+        2: ["part", "bad track"],
+        3: ["default"],
+    }
 
     @classmethod
     def load(cls, build):
@@ -46,6 +86,12 @@ def load(cls, build):
             min_frame_mass=build["min_frame_mass"],
             filter_by_lq=build["filter_by_lq"],
             max_segments=build["max_segments"],
+            thermal_diff_norm=build["thermal_diff_norm"],
+            tag_precedence=build["tag_precedence"],
+            excluded_tags=build["excluded_tags"],
+            country=build["country"],
+            use_segments=build["use_segments"],
+            max_frames=build["max_frames"],
         )
 
     @classmethod
@@ -58,7 +104,13 @@ def get_defaults(cls):
             segment_min_avg_mass=10,
             min_frame_mass=10,
             filter_by_lq=False,
-            max_segments=5,
+            max_segments=3,
+            thermal_diff_norm=False,
+            tag_precedence=BuildConfig.DEFAULT_GROUPS,
+            excluded_tags=BuildConfig.EXCLUDED_TAGS,
+            country=None,
+            use_segments=True,
+            max_frames=75,
         )
 
     def validate(self):
diff --git a/src/config/config.py b/src/config/config.py
index e78feb40..78ca23be 100644
--- a/src/config/config.py
+++ b/src/config/config.py
@@ -5,7 +5,6 @@
 import logging
 import yaml
 
-from .loadconfig import LoadConfig
 from .trackingconfig import TrackingConfig
 from .trainconfig import TrainConfig
 from .classifyconfig import ClassifyConfig
@@ -31,7 +30,6 @@ class Config(DefaultConfig):
         "wallaby",
     ]
     base_folder = attr.ib()
-    load = attr.ib()
     labels = attr.ib()
     build = attr.ib()
     tracking = attr.ib()
@@ -66,7 +64,6 @@ def load_from_stream(cls, stream):
         return cls(
             base_folder=Path(base_folder),
             tracking=TrackingConfig.load(raw["tracking"]),
-            load=LoadConfig.load(raw["load"]),
             train=TrainConfig.load(raw["train"], base_folder),
             classify=ClassifyConfig.load(raw["classify"]),
             reprocess=raw["reprocess"],
@@ -89,7 +86,6 @@ def get_defaults(cls):
             worker_threads=0,
             build=BuildConfig.get_defaults(),
             tracking=TrackingConfig.get_defaults(),
-            load=LoadConfig.get_defaults(),
             train=TrainConfig.get_defaults(),
             classify=ClassifyConfig.get_defaults(),
             debug=False,
@@ -101,7 +97,6 @@ def validate(self):
         self.build.validate()
         for tracker in self.tracking.values():
             tracker.validate()
-        self.load.validate()
         self.train.validate()
         self.classify.validate()
         return True
diff --git a/src/config/loadconfig.py b/src/config/loadconfig.py
deleted file mode 100644
index bb28f7d3..00000000
--- a/src/config/loadconfig.py
+++ /dev/null
@@ -1,83 +0,0 @@
-"""
-classifier-pipeline - this is a server side component that manipulates cptv
-files and to create a classification model of animals present
-Copyright (C) 2018, The Cacophony Project
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program. If not, see <http://www.gnu.org/licenses/>.
-"""
-
-import attr
-
-from .defaultconfig import DefaultConfig
-
-
-@attr.s
-class LoadConfig(DefaultConfig):
-    EXCLUDED_TAGS = ["poor tracking", "part", "untagged", "unidentified"]
-
-    DEFAULT_GROUPS = {
-        0: [
-            "bird",
-            "false-positive",
-            "hedgehog",
-            "possum",
-            "rodent",
-            "mustelid",
-            "cat",
-            "kiwi",
-            "dog",
-            "leporidae",
-            "human",
-            "insect",
-            "pest",
-        ],
-        1: ["unidentified", "other"],
-        2: ["part", "bad track"],
-        3: ["default"],
-    }
-
-    enable_compression = attr.ib()
-    include_filtered_channel = attr.ib()
-    preview = attr.ib()
-    tag_precedence = attr.ib()
-    cache_to_disk = attr.ib()
-    high_quality_optical_flow = attr.ib()
-    excluded_tags = attr.ib()
-
-    @classmethod
-    def load(cls, config):
-        return cls(
-            enable_compression=config["enable_compression"],
-            include_filtered_channel=config["include_filtered_channel"],
-            preview=config["preview"],
-            tag_precedence=config["tag_precedence"],
-            cache_to_disk=config["cache_to_disk"],
-            high_quality_optical_flow=config["high_quality_optical_flow"],
-            excluded_tags=config["excluded_tags"],
-        )
-
-    @classmethod
-    def get_defaults(cls):
-        return cls(
-            enable_compression=False,
-            include_filtered_channel=True,
-            preview=None,
-            tag_precedence=LoadConfig.DEFAULT_GROUPS,
-            cache_to_disk=False,
-            high_quality_optical_flow=True,
-            excluded_tags=LoadConfig.EXCLUDED_TAGS,
-        )
-
-    def validate(self):
-        return True
diff --git a/src/ml_tools/dataset.py b/src/ml_tools/dataset.py
index b7690999..87e8ccaa 100644
--- a/src/ml_tools/dataset.py
+++ b/src/ml_tools/dataset.py
@@ -20,7 +20,7 @@
 from ml_tools import tools
 from track.region import Region
 import json
-from config.loadconfig import LoadConfig
+from config.buildconfig import BuildConfig
 from pathlib import Path
 
 
@@ -64,13 +64,13 @@ def __init__(
         self.label_caps = {}
         self.use_segments = True
         if config:
-            self.tag_precedence = config.load.tag_precedence
+            self.tag_precedence = config.build.tag_precedence
             self.type = config.train.type
             if config.train.type == "IR":
                 self.use_segments = False
                 self.segment_length = 1
             else:
-                self.use_segments = config.train.hyper_params.get("use_segments", True)
+                self.use_segments = config.build.use_segments
                 if self.use_segments:
                     self.segment_length = config.build.segment_length
                 else:
@@ -80,13 +80,16 @@ def __init__(
             self.banned_clips = config.build.banned_clips
             self.included_labels = config.labels
             self.segment_min_avg_mass = config.build.segment_min_avg_mass
-            self.excluded_tags = config.load.excluded_tags
+            self.excluded_tags = config.build.excluded_tags
             self.min_frame_mass = config.build.min_frame_mass
             self.filter_by_lq = config.build.filter_by_lq
-            self.segment_type = SegmentType.ALL_RANDOM
+            self.segment_types = [SegmentType.ALL_RANDOM_MASKED]
             self.max_segments = config.build.max_segments
+            self.country = config.build.country
+            self.max_frames = config.build.max_frames
         else:
-            self.tag_precedence = LoadConfig.DEFAULT_GROUPS
+            self.country = "NZ"
+            self.tag_precedence = BuildConfig.DEFAULT_GROUPS
             self.filter_by_lq = False
             # number of seconds each segment should be
             if self.use_segments:
@@ -97,7 +100,15 @@ def __init__(
             self.segment_spacing = 1
             self.segment_min_avg_mass = 10
             self.min_frame_mass = 16
-            self.segment_type = SegmentType.ALL_RANDOM
+            self.segment_types = [SegmentType.ALL_RANDOM]
+            self.max_frames = 75
+
+        self.country_rectangle = BuildConfig.COUNTRY_LOCATIONS.get(self.country)
+        logging.info(
+            "Filtering by country %s have boundying %s",
+            self.country,
+            self.country_rectangle,
+        )
         self.max_frame_mass = None
         self.filtered_stats = {
             "confidence": 0,
@@ -204,7 +215,12 @@ def load_clip(self, db_clip, dont_filter_segment=False):
         except:
             logging.error("Could not load %s", db_clip, exc_info=True)
             return 0
-        if clip_header is None or filter_clip(clip_header):
+        if clip_header is None or filter_clip(
+            clip_header,
+            clip_header.location,
+            self.country_rectangle,
+            self.filtered_stats,
+        ):
             return 0
         filtered = 0
         added = 0
@@ -228,7 +244,7 @@ def load_clip(self, db_clip, dont_filter_segment=False):
                 track_header.get_segments(
                     segment_width,
                     segment_frame_spacing,
-                    self.segment_type,
+                    self.segment_types,
                     self.segment_min_avg_mass,
                     max_segments=self.max_segments,
                     dont_filter=dont_filter_segment,
@@ -488,46 +504,6 @@ def regroup(
     def has_data(self):
         return len(self.samples_by_id) > 0
 
-    #
-    # def recalculate_segments(self, segment_type=SegmentType.ALL_RANDOM):
-    #     self.samples_by_bin.clear()
-    #     self.samples_by_label.clear()
-    #     del self.samples[:]
-    #     del self.samples
-    #     self.samples = []
-    #     self.samples_by_label = {}
-    #     self.samples_by_bin = {}
-    #     logging.info("%s generating segments  type %s", self.name, segment_type)
-    #     start = time.time()
-    #     empty_tracks = []
-    #     filtered_stats = 0
-    #
-    #     for track in self.tracks:
-    #         segment_frame_spacing = int(
-    #             round(self.segment_spacing * track.frames_per_second)
-    #         )
-    #         segment_width = self.segment_length
-    #         track.calculate_segments(
-    #             segment_frame_spacing,
-    #             segment_width,
-    #             segment_type,
-    #             segment_min_mass=segment_min_avg_mass,
-    #         )
-    #         filtered_stats = filtered_stats + track.filtered_stats["segment_mass"]
-    #         if len(track.segments) == 0:
-    #             empty_tracks.append(track)
-    #             continue
-    #         for sample in track.segments:
-    #             self.add_clip_sample_mappings(sample)
-    #
-    #     self.rebuild_cdf()
-    #     logging.info(
-    #         "%s #segments %s filtered stats are %s took  %s",
-    #         self.name,
-    #         len(self.samples),
-    #         filtered_stats,
-    #         time.time() - start,
-    #     )
     def remove_sample_by_id(self, id, bin_id):
         del self.samples_by_id[id]
         try:
@@ -585,7 +561,9 @@ def filter_track(track_header, excluded_tags, filtered_stats={}):
         return True
 
     if track_header.human_tags is not None:
-        found_tags = [tag for tag in track_header.human_tags if tag in excluded_tags]
+        found_tags = [
+            tag[0] for tag in track_header.human_tags if tag[0] in excluded_tags
+        ]
         if len(found_tags) > 0:
             filter_tags = filtered_stats.setdefault("tag_names", set())
             filter_tags |= set(found_tags)
@@ -614,12 +592,37 @@ def filter_track(track_header, excluded_tags, filtered_stats={}):
     return False
 
 
-def filter_clip(clip, filtered_stats={}):
+def filter_clip(clip, location, location_bounds, filtered_stats=None, after_date=None):
     # remove tracks of trapped animals
     if (clip.events is not None and "trap" in clip.events.lower()) or (
         clip.trap is not None and "trap" in clip.trap.lower()
     ):
-        self.filtered_stats["trap"] += 1
+        if filtered_stats is not None:
+            if "trap" in filtered_stats:
+                filtered_stats["trap"] += 1
+            else:
+                filtered_stats["trap"] = 1
         logging.info("Filtered because in trap")
         return True
+
+    if (
+        location is not None
+        and location_bounds is not None
+        and not location_bounds.contains(*location)
+    ):
+        if filtered_stats is not None:
+            if "location" in filtered_stats:
+                filtered_stats["location"] += 1
+            else:
+                filtered_stats["location"] = 1
+        return True
+
+    if after_date is not None and clip.rec_time <= after_date:
+        if filtered_stats is not None:
+            if "date" in filtered_stats:
+                filtered_stats["date"] += 1
+            else:
+                filtered_stats["date"] = 1
+        return True
+
     return False
diff --git a/src/ml_tools/datasetstructures.py b/src/ml_tools/datasetstructures.py
index a48e247f..9a96db8d 100644
--- a/src/ml_tools/datasetstructures.py
+++ b/src/ml_tools/datasetstructures.py
@@ -6,6 +6,7 @@
 from track.region import Region
 from abc import ABC, abstractmethod
 from ml_tools.rectangle import Rectangle
+from config.buildconfig import BuildConfig
 from ml_tools import imageprocessing
 from enum import Enum
 import attr
@@ -17,6 +18,9 @@
 FRAME_SIZE = 32
 MIN_SIZE = 4
 
+# hard coded for now
+FP_LABELS = ["other", "unidentified", "rain", "false-positive", "water", "insect"]
+
 
 class SegmentType(Enum):
     IMPORTANT_RANDOM = 0
@@ -27,6 +31,7 @@ class SegmentType(Enum):
     ALL_SECTIONS = 5
     TOP_RANDOM = 6
     ALL_RANDOM_NOMIN = 7
+    ALL_RANDOM_MASKED = 8
 
 
 class BaseSample(ABC):
@@ -114,6 +119,7 @@ class ClipHeader:
     trap = attr.ib()
     tracks = attr.ib()
     ffc_frames = attr.ib()
+    country_code = attr.ib()
     frame_temp_median = attr.ib(default=None)
 
     def get_samples(self):
@@ -137,14 +143,19 @@ def __init__(
         ffc_frames=None,
         sample_frames_indices=None,
         station_id=None,
-        rec_time=None,
+        start_time=None,
         source_file=None,
         camera=None,
         confidence=None,
         human_tags=None,
         remapped_lbl=None,
         mega_missed_regions=None,
+        skip_ffc=True,
+        fp_frames=None,
     ):
+
+        self.fp_frames = fp_frames
+        self.start_time = start_time
         # regions that megadetector found nothing in
         self.mega_missed_regions = mega_missed_regions
         self.station_id = station_id
@@ -173,12 +184,8 @@ def __init__(
         self.frame_crop = None
         self.num_frames = num_frames
         self.important_predicted = 0
-
-        mass_history = np.uint16(
-            [region.mass for region in self.regions_by_frame.values()]
-        )
         mass_history = [
-            region.frame_number
+            region.mass
             for region in self.regions_by_frame.values()
             if region.mass > 0
             and (
@@ -241,31 +248,62 @@ def add_sample(self, sample):
         self.samples.append(sample)
 
     def calculate_sample_frames(
-        self, min_mass=None, max_mass=None, ffc_frames=None, skip_last=None
+        self,
+        min_mass=None,
+        max_mass=None,
+        ffc_frames=None,
+        skip_last=None,
+        max_frames=None,
     ):
+        crop_rectangle = Rectangle(2, 2, 160 - 2 * 2, 140 - 2 * 2)
+
+        logging.debug(
+            "Calculating sample with min %s and max %s ffc %s and skip %s",
+            min_mass,
+            max_mass,
+            ffc_frames,
+            skip_last,
+        )
         frame_numbers = list(self.regions_by_frame.keys())
+        previous_mass = None
+
         if skip_last is not None:
             skip_x = int(len(frame_numbers) * skip_last)
             frame_numbers = frame_numbers[:-skip_x]
-        frame_numbers = [
-            frame
-            for frame in frame_numbers
-            if (ffc_frames is None or frame not in ffc_frames)
-            and (
-                self.mega_missed_regions is None
-                or frame not in self.mega_missed_regions
-            )
-        ]
-        frame_numbers.sort()
 
+        frame_numbers.sort()
         for frame_num in frame_numbers:
             region = self.regions_by_frame[frame_num]
-            if region.mass == 0 or region.blank:
+
+            if (
+                region.mass == 0
+                or region.blank
+                or region.width <= 0
+                or region.height <= 0
+            ):
+                continue
+            if ffc_frames is not None and frame_num in ffc_frames:
                 continue
+
+            if (
+                self.mega_missed_regions is not None
+                and frame_num in self.mega_missed_regions
+            ):
+                continue
+
             if min_mass is not None and region.mass < min_mass:
                 continue
             if max_mass is not None and region.mass > max_mass:
                 continue
+            # dont use regions on the edge if the mass deviates too much from the last known good mass
+            region.set_is_along_border(crop_rectangle)
+            if region.is_along_border:
+                if previous_mass is not None:
+                    previous_mass_thresh = previous_mass * 0.1
+                    if (abs(previous_mass - region.mass)) >= previous_mass_thresh:
+                        continue
+            else:
+                previous_mass = region.mass
             f = FrameSample(
                 self.clip_id,
                 self.track_id,
@@ -279,6 +317,8 @@ def calculate_sample_frames(
                 track_median_mass=self.median_mass,
             )
             self.samples.append(f)
+        if max_frames is not None and len(self.samples) > max_frames:
+            self.samples = np.random.choice(self.samples, max_frames, replace=False)
 
     def remove_sample(self, f):
         self.samples.remove(f)
@@ -329,7 +369,7 @@ def get_segments(
         self,
         segment_width,
         segment_frame_spacing=9,
-        segment_type=SegmentType.ALL_RANDOM,
+        segment_types=[SegmentType.ALL_RANDOM],
         segment_min_mass=None,
         repeats=1,
         max_segments=None,
@@ -339,16 +379,22 @@ def get_segments(
         location=None,
         segment_frames=None,
         from_last=None,
+        frame_min_mass=None,
+        filter_by_fp=True,
+        min_segments=None,
     ):
         if segment_frames is not None:
             raise Exception("Have not implement this path")
-        min_frames = segment_width
-        if self.label == "vehicle" or self.label == "human":
-            min_frames = segment_width / 4.0
+        min_frames = segment_width / 4.0
+        if self.label in BuildConfig.NO_MIN_FRAMES:
+            # try and always get one for these
+            min_frames = 0
+            if min_segments is None:
+                min_segments = 1
 
         # in python3.7+ can just take the values and it guarantees order it was added to dict
         regions = self.bounds_history
-        self.samples, self.filtered_stats = get_segments(
+        self.samples, filtered_stats = get_segments(
             self.clip_id,
             self.track_id,
             self.start_frame,
@@ -361,13 +407,18 @@ def get_segments(
             lower_mass=self.lower_mass,
             repeats=repeats,
             min_frames=min_frames,
-            segment_type=segment_type,
+            segment_types=segment_types,
             max_segments=max_segments,
             station_id=self.station_id,
             source_file=self.source_file,
             dont_filter=dont_filter,
             skip_ffc=skip_ffc,
+            frame_min_mass=frame_min_mass,
+            fp_frames=self.fp_frames if filter_by_fp else None,
+            rec_time=self.start_time,
+            min_segments=min_segments,
         )
+        self.filtered_stats.update(filtered_stats)
         # GP could get this from the tracks when writing
         # but might be best to keep samples independent for ease
         for s in self.samples:
@@ -922,16 +973,15 @@ def get_segments(
     track_id,
     start_frame,
     regions,
-    segment_frame_spacing=9,
     segment_width=25,
+    segment_frame_spacing=9,
     label=None,
     segment_min_mass=None,
     ffc_frames=[],
     lower_mass=0,
     repeats=1,
     min_frames=None,
-    segment_frames=None,
-    segment_type=SegmentType.ALL_RANDOM,
+    segment_types=[SegmentType.ALL_RANDOM],
     max_segments=None,
     location=None,
     station_id=None,
@@ -940,169 +990,234 @@ def get_segments(
     source_file=None,
     dont_filter=False,
     skip_ffc=True,
+    frame_min_mass=None,
+    fp_frames=None,
+    repeat_frame_indices=True,
+    min_segments=None,
 ):
-    if segment_type == SegmentType.ALL_RANDOM_NOMIN:
-        segment_min_mass = None
     if min_frames is None:
-        min_frames = 25
+        min_frames = segment_width / 4.0
     segments = []
     mass_history = np.uint16([region.mass for region in regions])
     filtered_stats = {"segment_mass": 0, "too short": 0}
 
     has_no_mass = np.sum(mass_history) == 0
-    frame_indices = [
-        region.frame_number
-        for region in regions
-        if (has_no_mass or region.mass > 0)
-        and (
-            ffc_frames is None
-            or skip_ffc is False
-            or region.frame_number not in ffc_frames
-        )
-        and not region.blank
-        and region.width > 0
-        and region.height > 0
-    ]
-    if len(frame_indices) == 0:
-        logging.warn("Nothing to load for %s - %s", clip_id, track_id)
-        return [], filtered_stats
-    if segment_min_mass is not None:
-        segment_min_mass = min(
-            segment_min_mass,
-            np.median(mass_history[frame_indices - start_frame]),
-        )
-    else:
-        segment_min_mass = 1
-        # remove blank frames
-
-    if segment_type == SegmentType.TOP_RANDOM:
-        # take top 50 mass frames
-        frame_indices = sorted(
-            frame_indices,
-            key=lambda f_i: mass_history[f_i - start_frame],
-            reverse=True,
-        )
-        frame_indices = frame_indices[:50]
-        frame_indices.sort()
-    if segment_type == SegmentType.TOP_SEQUENTIAL:
-        return get_top_mass_segments(
-            clip_id,
-            track_id,
-            label,
-            camera,
-            segment_width,
-            segment_frame_spacing,
-            mass_history,
-            ffc_frames,
-            regions,
-            start_frame,
-            lower_mass,
-            segment_min_mass,
-            source_file=source_file,
-        )
-    # if len(frame_indices) < min_frames:
-    # filtered_stats["too short"] += 1
-    # return segments, filtered_stats
-    frame_indices = np.array(frame_indices)
-    segment_count = max(1, len(frame_indices) // segment_frame_spacing)
-    segment_count = int(segment_count)
-    if max_segments is not None:
-        segment_count = min(max_segments, segment_count)
-    # take any segment_width frames, this could be done each epoch
-    whole_indices = frame_indices
-    random_frames = segment_type in [
-        SegmentType.IMPORTANT_RANDOM,
-        SegmentType.ALL_RANDOM,
-        SegmentType.ALL_RANDOM_NOMIN,
-        SegmentType.TOP_RANDOM,
-        None,
-    ]
-    for _ in range(repeats):
-        frame_indices = whole_indices.copy()
-        if random_frames:
-            # random_frames and not random_sections:
-            np.random.shuffle(frame_indices)
-        for i in range(segment_count):
-            # always get atleast one segmnet
-            if i > 0:
-                if (len(frame_indices) < segment_width and len(segments) > 1) or len(
-                    frame_indices
-                ) < (segment_width / 4.0):
-                    break
-
-            if segment_type == SegmentType.ALL_SECTIONS:
-                # random frames from section 2.2 * segment_width
-                section = frame_indices[: int(segment_width * 2.2)]
-                indices = np.random.choice(
-                    len(section),
-                    min(segment_width, len(section)),
-                    replace=False,
-                )
-                frames = section[indices]
-                frame_indices = frame_indices[segment_frame_spacing:]
-            elif random_frames:
-                # frame indices already randomized so just need to grab some
-                frames = frame_indices[:segment_width]
-                frame_indices = frame_indices[segment_width:]
-            else:
-                segment_start = i * segment_frame_spacing
-                segment_end = segment_start + segment_width
-                segment_end = min(len(frame_indices), segment_end)
-                frames = frame_indices[segment_start:segment_end]
-
-            remaining = segment_width - len(frames)
-            # sample another same frames again if need be
-            if remaining > 0:
-                extra_frames = np.random.choice(
-                    frames,
-                    min(remaining, len(frames)),
-                    replace=False,
-                )
-                frames = np.concatenate([frames, extra_frames])
-            frames.sort()
-            relative_frames = frames - start_frame
-            mass_slice = mass_history[relative_frames]
-            segment_mass = np.sum(mass_slice)
-            segment_avg_mass = segment_mass / len(mass_slice)
-            filtered = False
-            if segment_min_mass and segment_avg_mass < segment_min_mass:
-                if dont_filter:
-                    filtered = True
-                else:
-                    filtered_stats["segment_mass"] += 1
-                    continue
-
-            # temp_slice = frame_temp_median[relative_frames]
-            region_slice = regions[relative_frames]
-            movement_data = None
-            if segment_avg_mass < 50:
-                segment_weight_factor = 0.75
-            elif segment_avg_mass < 100:
-                segment_weight_factor = 1
-            else:
-                segment_weight_factor = 1.2
 
-            for z, f in enumerate(frames):
-                assert region_slice[z].frame_number == f
-            segment = SegmentHeader(
+    for segment_type in segment_types:
+        s_min_mass = segment_min_mass
+        if segment_type == SegmentType.ALL_RANDOM_NOMIN:
+            s_min_mass = None
+
+        frame_indices = [
+            region.frame_number
+            for region in regions
+            if (has_no_mass or region.mass > 0)
+            and (
+                ffc_frames is None
+                or skip_ffc is False
+                or region.frame_number not in ffc_frames
+            )
+            and not region.blank
+            and region.width > 0
+            and region.height > 0
+            and (
+                (has_no_mass or frame_min_mass is None) or region.mass >= frame_min_mass
+            )
+        ]
+        if fp_frames is not None and label not in FP_LABELS:
+            frame_indices = [f for f in frame_indices if f not in fp_frames]
+        if len(frame_indices) == 0:
+            logging.warn("Nothing to load for %s - %s", clip_id, track_id)
+            return [], filtered_stats
+        if s_min_mass is not None:
+            s_min_mass = min(
+                s_min_mass,
+                np.median(mass_history[frame_indices - start_frame]),
+            )
+        else:
+            s_min_mass = 1
+            # remove blank frames
+
+        if segment_type == SegmentType.TOP_RANDOM:
+            # take top 50 mass frames
+            frame_indices = sorted(
+                frame_indices,
+                key=lambda f_i: mass_history[f_i - start_frame],
+                reverse=True,
+            )
+            frame_indices = frame_indices[:50]
+            frame_indices.sort()
+        if segment_type == SegmentType.TOP_SEQUENTIAL:
+            new_segments, filtered = get_top_mass_segments(
                 clip_id,
                 track_id,
-                start_frame=start_frame,
-                frames=segment_width,
-                weight=segment_weight_factor,
-                mass=segment_mass,
-                label=label,
-                regions=region_slice,
-                frame_indices=frames,
-                movement_data=movement_data,
-                camera=camera,
-                location=location,
-                station_id=station_id,
-                rec_time=rec_time,
+                label,
+                camera,
+                segment_width,
+                segment_frame_spacing,
+                mass_history,
+                ffc_frames,
+                regions,
+                start_frame,
+                lower_mass,
+                s_min_mass,
                 source_file=source_file,
-                filtered=filtered,
             )
-            segments.append(segment)
+            segments.extend(new_segments)
+            filtered_stats.merge(filtered)
+            continue
+        if len(frame_indices) < min_frames and (
+            min_segments == 0 or min_segments is None
+        ):
+            filtered_stats["too short"] += 1
+            continue
+
+        frame_indices = np.array(frame_indices)
+        segment_count = max(1, len(frame_indices) // segment_frame_spacing)
+        segment_count = int(segment_count)
+        mask_length = 25
+
+        # probably only counts for all random
+        if max_segments is not None and segment_type not in [SegmentType.ALL_SECTIONS]:
+            segment_count = min(max_segments, segment_count)
+            # adjust size of mask if we take less segments
+            mask_length = max(mask_length, len(frame_indices) // segment_count)
+        # take any segment_width frames, this could be done each epoch
+        whole_indices = frame_indices
+        random_frames = segment_type in [
+            SegmentType.IMPORTANT_RANDOM,
+            SegmentType.ALL_RANDOM,
+            SegmentType.ALL_RANDOM_NOMIN,
+            SegmentType.TOP_RANDOM,
+            SegmentType.ALL_RANDOM_MASKED,
+            None,
+        ]
+
+        for _ in range(repeats):
+            used_indices = []
+            if segment_type != SegmentType.ALL_RANDOM_MASKED or len(whole_indices) < 40:
+                frame_indices = whole_indices.copy()
+
+                if random_frames:
+                    # random_frames and not random_sections:
+                    np.random.shuffle(frame_indices)
+
+            for i in range(segment_count):
+                if segment_type == SegmentType.ALL_RANDOM_MASKED:
+                    if len(whole_indices) > 40:
+                        mask_start = i * mask_length
+                        frame_indices = whole_indices[0:mask_start]
+                        frame_indices = np.concatenate(
+                            [frame_indices, whole_indices[mask_start + mask_length :]],
+                            axis=0,
+                        )
+                        # maybe some faster way of doing this...
+                        frame_indices = [
+                            f for f in frame_indices if f not in used_indices
+                        ]
+                        frame_indices = np.uint32(frame_indices)
+                        np.random.shuffle(frame_indices)
+
+                # always get atleast one segment, not doing annymore
+                if (
+                    len(frame_indices) == 0
+                    or min_segments is None
+                    or len(segments) >= min_segments
+                ):
+                    if (
+                        len(frame_indices) < segment_width / 2.0 and len(segments) > 1
+                    ) or len(frame_indices) < segment_width / 4:
+                        break
+
+                if segment_type == SegmentType.ALL_SECTIONS:
+                    # random frames from section 2.2 * segment_width
+                    section = frame_indices[: int(segment_width * 2.2)]
+
+                    indices = np.random.choice(
+                        len(section),
+                        min(segment_width, len(section)),
+                        replace=False,
+                    )
+                    frames = section[indices]
+                    # might need to change that gp 11/05 - 2024
+                    frame_indices = frame_indices[segment_width:]
+                elif random_frames:
+                    # frame indices already randomized so just need to grab some
+                    frames = frame_indices[:segment_width]
+                    used_indices.extend(frames)
+                    frame_indices = frame_indices[segment_width:]
+                else:
+                    segment_start = i * segment_frame_spacing
+                    segment_end = segment_start + segment_width
+                    segment_end = min(len(frame_indices), segment_end)
+                    frames = frame_indices[segment_start:segment_end]
+
+                remaining = segment_width - len(frames)
+                # sample another same frames again if need be
+                if remaining > 0:
+                    extra_frames = np.random.choice(
+                        frames,
+                        min(remaining, len(frames)),
+                        replace=False,
+                    )
+                    frames = np.concatenate([frames, extra_frames])
+                frames.sort()
+                relative_frames = frames - start_frame
+                mass_slice = mass_history[relative_frames]
+                segment_mass = np.sum(mass_slice)
+                segment_avg_mass = segment_mass / len(mass_slice)
+                filtered = False
+                if s_min_mass and segment_avg_mass < s_min_mass:
+                    if dont_filter:
+                        filtered = True
+                    else:
+                        filtered_stats["segment_mass"] += 1
+                        continue
+
+                # temp_slice = frame_temp_median[relative_frames]
+                region_slice = regions[relative_frames]
+                movement_data = None
+                if segment_avg_mass < 50:
+                    segment_weight_factor = 0.75
+                elif segment_avg_mass < 100:
+                    segment_weight_factor = 1
+                else:
+                    segment_weight_factor = 1.2
+
+                for z, f in enumerate(frames):
+                    assert region_slice[z].frame_number == f
+
+                if repeat_frame_indices:
+                    # i think this can be default, means we dont need to handle
+                    # short segments elsewhere
+                    if len(frames) < segment_width:
+                        extra_samples = np.random.choice(
+                            frames, segment_width - len(frames)
+                        )
+                        frames = list(frames)
+                        frames.extend(extra_samples)
+                        frames.sort()
+
+                segment = SegmentHeader(
+                    clip_id,
+                    track_id,
+                    start_frame=start_frame,
+                    frames=segment_width,
+                    weight=segment_weight_factor,
+                    mass=segment_mass,
+                    label=label,
+                    regions=region_slice,
+                    frame_indices=frames,
+                    movement_data=movement_data,
+                    camera=camera,
+                    location=location,
+                    station_id=station_id,
+                    rec_time=rec_time,
+                    source_file=source_file,
+                    filtered=filtered,
+                )
+                segments.append(segment)
+
     return segments, filtered_stats
 
 
diff --git a/src/ml_tools/forestmodel.py b/src/ml_tools/forestmodel.py
index a8ccd95a..c3e17a66 100644
--- a/src/ml_tools/forestmodel.py
+++ b/src/ml_tools/forestmodel.py
@@ -228,7 +228,7 @@ def forest_features(
 
     for i, frame in enumerate(track_frames):
         region = regions[i]
-        if region.blank or region.width == 0 or region.height == 0:
+        if region.blank or region.width > 0 or region.height > 0:
             prev_count = 0
             continue
 
diff --git a/src/ml_tools/hyperparams.py b/src/ml_tools/hyperparams.py
index 946f4454..6ed2ba1d 100644
--- a/src/ml_tools/hyperparams.py
+++ b/src/ml_tools/hyperparams.py
@@ -24,10 +24,11 @@ def insert_defaults(self):
         self["square_width"] = self.square_width
         self["frame_size"] = self.frame_size
         self["segment_width"] = self.segment_width
-
-        self["segment_type"] = self.segment_type
-        self["multi_label"] = False
+        self["segment_types"] = self.segment_types
+        self["multi_label"] = True
         self["diff_norm"] = self.diff_norm
+        self["thermal_diff_norm"] = self.thermal_diff_norm
+
         self["smooth_predictions"] = self.smooth_predictions
         self["channels"] = self.channels
 
@@ -51,6 +52,18 @@ def output_dim(self):
     def smooth_predictions(self):
         return self.get("smooth_predictions", True)
 
+    @property
+    def excluded_labels(self):
+        return self.get("excluded_labels", None)
+
+    @property
+    def remapped_labels(self):
+        return self.get("remapped_labels", None)
+
+    @property
+    def thermal_diff_norm(self):
+        return self.get("thermal_diff_norm", False)
+
     @property
     def diff_norm(self):
         return self.get("diff_norm", True)
@@ -76,12 +89,16 @@ def segment_width(self):
         return self.get("segment_width", 25 if self.use_segments else 1)
 
     @property
-    def segment_type(self):
-        segment_type = self.get("segment_type", SegmentType.ALL_RANDOM.name)
-        if isinstance(segment_type, str):
-            return SegmentType[segment_type]
-        else:
-            return segment_type
+    def segment_types(self):
+        segment_types = self.get("segment_type", [SegmentType.ALL_RANDOM])
+        # convert string to enum type
+        if isinstance(segment_types, str):
+            # old metadata
+            segment_types = [SegmentType[segment_types]]
+        elif isinstance(segment_types[0], str):
+            for i in range(len(segment_types)):
+                segment_types[i] = SegmentType[segment_types[i]]
+        return segment_types
 
     @property
     def mvm(self):
@@ -105,7 +122,7 @@ def label_smoothing(self):
 
     @property
     def base_training(self):
-        return self.get("base_training", False)
+        return self.get("base_training", True)
 
     @property
     def retrain_layer(self):
@@ -151,6 +168,13 @@ def square_width(self):
     def frame_size(self):
         return self.get("frame_size", 32)
 
+    def set_use_segments(self, use_segments):
+        self["use_segments"] = use_segments
+        if use_segments:
+            self["square_width"] = 5
+        else:
+            self["square_width"] = 1
+
     #
     # @property
     # def red_type(self):
diff --git a/src/ml_tools/imageprocessing.py b/src/ml_tools/imageprocessing.py
index 6b2e4fbf..42312dee 100644
--- a/src/ml_tools/imageprocessing.py
+++ b/src/ml_tools/imageprocessing.py
@@ -5,6 +5,7 @@
 from PIL import Image
 from scipy import ndimage
 from PIL import Image
+import logging
 
 
 def resize_and_pad(
@@ -19,18 +20,20 @@ def resize_and_pad(
     extra_v=0,
 ):
     scale_percent = (new_dim[:2] / np.array(frame.shape[:2])).min()
-    width = int(frame.shape[1] * scale_percent)
-    height = int(frame.shape[0] * scale_percent)
+    width = round(frame.shape[1] * scale_percent)
+    height = round(frame.shape[0] * scale_percent)
     width = max(width, 1)
     height = max(height, 1)
+
+    width = min(width, new_dim[0])
+    height = min(height, new_dim[1])
+
     if len(frame.shape) == 3:
         resize_dim = (width, height, frame.shape[2])
     else:
         resize_dim = (width, height)
     if pad is None:
         pad = np.min(frame)
-    else:
-        pad = 0
 
     resized = np.full(new_dim, pad, dtype=frame.dtype)
     offset_x = 0
@@ -40,16 +43,15 @@ def resize_and_pad(
     offset_x = (new_dim[1] - frame_width) // 2
     offset_y = (new_dim[0] - frame_height) // 2
     if keep_edge and crop_region is not None:
-        if region.left == crop_region.left:
+        if region.left <= crop_region.left:
             offset_x = 0
-
-        elif region.right == crop_region.right:
+        elif region.right >= crop_region.right:
             offset_x = new_dim[1] - frame_width
 
-        if region.top == crop_region.top:
+        if region.top <= crop_region.top:
             offset_y = 0
 
-        elif region.bottom == crop_region.bottom:
+        elif region.bottom >= crop_region.bottom:
             offset_y = new_dim[0] - frame_height
     if len(resized.shape) == 3:
         resized[
@@ -76,20 +78,14 @@ def resize_cv(image, dim, interpolation=cv2.INTER_LINEAR, extra_h=0, extra_v=0):
     )
 
 
-def square_clip(data, frames_per_row, tile_dim, normalize=True):
+def square_clip(data, frames_per_row, tile_dim, frame_samples, normalize=True):
     # lay each frame out side by side in rows
     new_frame = np.zeros((frames_per_row * tile_dim[0], frames_per_row * tile_dim[1]))
     i = 0
     success = False
     for x in range(frames_per_row):
         for y in range(frames_per_row):
-            if i >= len(data):
-                frame = data[-1]
-            else:
-                frame = data[i]
-
-            # cv2.imshow("frame", np.uint8(frame))
-            # cv2.waitKey(0)
+            frame = data[frame_samples[i]]
             if normalize:
                 frame, stats = normalize(frame, new_max=255)
                 if not stats[0]:
@@ -159,7 +155,6 @@ def normalize(data, min=None, max=None, new_max=1):
         max = np.amax(data)
     if min is None:
         min = np.amin(data)
-    # print("normalizing with", max, min, new_max)
     if max == min:
         if max == 0:
             return np.zeros((data.shape)), (False, max, min)
diff --git a/src/ml_tools/interpreter.py b/src/ml_tools/interpreter.py
index 2763957b..140aef35 100644
--- a/src/ml_tools/interpreter.py
+++ b/src/ml_tools/interpreter.py
@@ -18,9 +18,10 @@ def load_json(self, filename):
         filename = filename.with_suffix(".json")
         logging.info("Loading metadata from %s", filename)
         metadata = json.load(open(filename, "r"))
-
+        self.version = metadata.get("version", None)
         self.labels = metadata["labels"]
         self.params = HyperParams()
+        print("Hypers are ", metadata.get("hyperparams", {}))
         self.params.update(metadata.get("hyperparams", {}))
         self.data_type = metadata.get("type", "thermal")
 
@@ -49,35 +50,31 @@ def get_preprocess_fn(self):
         else:
             import tensorflow as tf
 
-            if pretrained_model == "resnet":
+            if model_name == "resnet":
                 return tf.keras.applications.resnet.preprocess_input
-            elif pretrained_model == "nasnet":
+            elif model_name == "nasnet":
                 return tf.keras.applications.nasnet.preprocess_input
-            elif pretrained_model == "resnetv2":
+            elif model_name == "resnetv2":
                 return tf.keras.applications.resnet_v2.preprocess_input
 
-            elif pretrained_model == "resnet152":
+            elif model_name == "resnet152":
                 return tf.keras.applications.resnet.preprocess_input
 
-            elif pretrained_model == "vgg16":
+            elif model_name == "vgg16":
                 return tf.keras.applications.vgg16.preprocess_input
 
-            elif pretrained_model == "vgg19":
+            elif model_name == "vgg19":
                 return tf.keras.applications.vgg19.preprocess_input
 
-            elif pretrained_model == "mobilenet":
+            elif model_name == "mobilenet":
                 return tf.keras.applications.mobilenet_v2.preprocess_input
 
-            elif pretrained_model == "densenet121":
+            elif model_name == "densenet121":
                 return tf.keras.applications.densenet.preprocess_input
 
-            elif pretrained_model == "inceptionresnetv2":
+            elif model_name == "inceptionresnetv2":
                 return tf.keras.applications.inception_resnet_v2.preprocess_input
-        logging.warn(
-            "pretrained model %s has no preprocessing function", pretrained_model
-        )
-        return None
-        logging.info("No preprocess defined for %s", model_name)
+        logging.warn("pretrained model %s has no preprocessing function", model_name)
         return None
 
     def preprocess(self, clip, track, **args):
@@ -129,6 +126,7 @@ def preprocess(self, clip, track, **args):
                 predict_from_last,
                 segment_frames=segment_frames,
                 dont_filter=args.get("dont_filter", False),
+                min_segments=args.get("min_segments"),
             )
         else:
             frames, preprocessed, masses = self.preprocess_frames(
@@ -150,6 +148,7 @@ def classify_track(self, clip, track, segment_frames=None):
         # self.model.predict(preprocessed)
         top_score = None
         smoothed_predictions = None
+
         if self.params.smooth_predictions:
             masses = np.array(masses)
             top_score = np.sum(masses)
@@ -159,6 +158,7 @@ def classify_track(self, clip, track, segment_frames=None):
             output,
             smoothed_predictions,
             prediction_frames,
+            masses,
             top_score=top_score,
         )
         track_prediction.classify_time = time.time() - start
@@ -183,9 +183,12 @@ def preprocess_frames(
         data = []
         frames_used = []
         filtered_norm_limits = None
-        if self.params.diff_norm:
+        thermal_norm_limits = None
+        if self.params.diff_norm or self.params.thermal_diff_norm:
             min_diff = None
             max_diff = 0
+            thermal_max_diff = None
+            thermal_min_diff = None
             for i, region in enumerate(reversed(track.bounds_history)):
                 if region.blank:
                     continue
@@ -201,16 +204,32 @@ def preprocess_frames(
                     continue
 
                 f.float_arrays()
-                diff_frame = region.subimage(f.thermal) - region.subimage(
-                    clip.background
-                )
-                new_max = np.amax(diff_frame)
-                new_min = np.amin(diff_frame)
-                if min_diff is None or new_min < min_diff:
-                    min_diff = new_min
-                if new_max > max_diff:
-                    max_diff = new_max
-            filtered_norm_limits = (min_diff, max_diff)
+
+                if self.params.thermal_diff_norm:
+                    diff_frame = f.thermal - np.median(f.thermal)
+                    new_max = np.amax(diff_frame)
+                    new_min = np.amin(diff_frame)
+                    if thermal_min_diff is None or new_min < thermal_min_diff:
+                        thermal_min_diff = new_min
+                    if thermal_max_diff is None or new_max > thermal_max_diff:
+                        thermal_max_diff = new_max
+                if self.params.diff_norm:
+                    diff_frame = region.subimage(f.thermal) - region.subimage(
+                        clip.background
+                    )
+
+                    new_max = np.amax(diff_frame)
+                    new_min = np.amin(diff_frame)
+                    if min_diff is None or new_min < min_diff:
+                        min_diff = new_min
+                    if new_max > max_diff:
+                        max_diff = new_max
+            if self.params.thermal_diff_norm:
+                thermal_norm_limits = (thermal_min_diff, thermal_max_diff)
+
+            if self.params.diff_norm:
+                filtered_norm_limits = (min_diff, max_diff)
+
         for i, region in enumerate(reversed(track.bounds_history)):
             if region.blank:
                 continue
@@ -249,6 +268,7 @@ def preprocess_frames(
                 clip.background,
                 clip.crop_rectangle,
                 filtered_norm_limits=filtered_norm_limits,
+                thermal_norm_limits=thermal_norm_limits,
             )
             preprocessed = preprocess_single_frame(
                 cropped_frame,
@@ -271,6 +291,7 @@ def preprocess_segments(
         predict_from_last=None,
         segment_frames=None,
         dont_filter=False,
+        min_segments=None,
     ):
         from ml_tools.preprocess import preprocess_frame, preprocess_movement
 
@@ -280,10 +301,12 @@ def preprocess_segments(
             ffc_frames=[] if dont_filter else clip.ffc_frames,
             repeats=1,
             segment_frames=segment_frames,
-            segment_type=self.params.segment_type,
+            segment_types=self.params.segment_types,
             from_last=predict_from_last,
             max_segments=max_segments,
             dont_filter=dont_filter,
+            filter_by_fp=False,
+            min_segments=min_segments,
         )
         frame_indices = set()
         for segment in segments:
@@ -293,30 +316,52 @@ def preprocess_segments(
 
         # should really be over whole track buts let just do the indices we predict of
         #  seems to make little different to just doing a min max normalization
+        thermal_norm_limits = None
         filtered_norm_limits = None
-        if self.params.diff_norm:
+        if self.params.diff_norm or self.params.thermal_diff_norm:
             min_diff = None
             max_diff = 0
-            for frame_index in frame_indices:
-                region = track.bounds_history[frame_index - track.start_frame]
-                f = clip.get_frame(region.frame_number)
-                if f is None:
-                    logging.warn("Could not get frame {}", region.frame_number)
+            thermal_max_diff = None
+            thermal_min_diff = None
+            for i, region in enumerate(reversed(track.bounds_history)):
+                if region.blank:
                     continue
-                if region.blank or region.width <= 0 or region.height <= 0:
+                if region.width == 0 or region.height == 0:
+                    logging.warn(
+                        "No width or height for frame %s regoin %s",
+                        region.frame_number,
+                        region,
+                    )
+                    continue
+                f = clip.get_frame(region.frame_number)
+                if region.blank or region.width <= 0 or region.height <= 0 or f is None:
                     continue
 
                 f.float_arrays()
-                diff_frame = region.subimage(f.thermal) - region.subimage(
-                    clip.background
-                )
-                new_max = np.amax(diff_frame)
-                new_min = np.amin(diff_frame)
-                if min_diff is None or new_min < min_diff:
-                    min_diff = new_min
-                if new_max > max_diff:
-                    max_diff = new_max
-            filtered_norm_limits = (min_diff, max_diff)
+
+                if self.params.thermal_diff_norm:
+                    diff_frame = f.thermal - np.median(f.thermal)
+                    new_max = np.amax(diff_frame)
+                    new_min = np.amin(diff_frame)
+                    if thermal_min_diff is None or new_min < thermal_min_diff:
+                        thermal_min_diff = new_min
+                    if thermal_max_diff is None or new_max > thermal_max_diff:
+                        thermal_max_diff = new_max
+                if self.params.diff_norm:
+                    diff_frame = region.subimage(f.thermal) - region.subimage(
+                        clip.background
+                    )
+                    new_max = np.amax(diff_frame)
+                    new_min = np.amin(diff_frame)
+                    if min_diff is None or new_min < min_diff:
+                        min_diff = new_min
+                    if new_max > max_diff:
+                        max_diff = new_max
+            if self.params.thermal_diff_norm:
+                thermal_norm_limits = (thermal_min_diff, thermal_max_diff)
+
+            if self.params.diff_norm:
+                filtered_norm_limits = (min_diff, max_diff)
         for frame_index in frame_indices:
             region = track.bounds_history[frame_index - track.start_frame]
 
@@ -341,6 +386,7 @@ def preprocess_segments(
                 clip.background,
                 clip.crop_rectangle,
                 filtered_norm_limits=filtered_norm_limits,
+                thermal_norm_limits=thermal_norm_limits,
             )
             track_data[frame.frame_number] = cropped_frame
         features = None
@@ -365,6 +411,7 @@ def preprocess_segments(
                 self.params.frame_size,
                 self.params.channels,
                 self.preprocess_fn,
+                sample=f"{clip.get_id()}-{track.get_id()}",
             )
             if frames is None:
                 logging.warn("No frames to predict on")
diff --git a/src/ml_tools/kerasmodel.py b/src/ml_tools/kerasmodel.py
index 2313c789..45cbb466 100644
--- a/src/ml_tools/kerasmodel.py
+++ b/src/ml_tools/kerasmodel.py
@@ -66,8 +66,8 @@ def __init__(self, train_config=None, labels=None, data_dir=None):
         self.label_probabilities = None
         self.class_weights = None
         self.ds_by_label = True
-        self.excluded_labels = []
-        self.remapped_labels = []
+        self.excluded_labels = None
+        self.remapped_labels = None
         self.orig_labels = None
 
     def load_training_meta(self, base_dir):
@@ -81,6 +81,9 @@ def load_training_meta(self, base_dir):
         self.ds_by_label = meta.get("by_label", True)
         self.excluded_labels = meta.get("excluded_labels")
         self.remapped_labels = meta.get("remapped_labels")
+        self.params.set_use_segments(
+            meta.get("config").get("build", {}).get("use_segments", True)
+        )
 
     def shape(self):
         if self.model is None:
@@ -362,6 +365,59 @@ def build_model(
             ],
         )
 
+    def adjust_final_layer(self):
+        # Adjust final layer to a new set of labels, by removing it and re adding
+        # new_model = tf.keras.models.Sequential(self.model.layers[:-3])
+        self.model = tf.keras.Model(
+            inputs=self.model.input, outputs=self.model.layers[-2].output
+        )
+
+        # model = tf.keras.Model(inputs=self.model.input, outputs=x)
+
+        activation = "softmax"
+        if self.params.multi_label:
+            activation = "sigmoid"
+
+        retrain_from = self.params.retrain_layer
+        if retrain_from:
+            for i, layer in enumerate(self.model.layers):
+                if isinstance(layer, tf.keras.layers.BatchNormalization):
+                    # apparently this shouldn't matter as we set base_training = False
+                    layer.trainable = False
+                    logging.info("dont train %s %s", i, layer.name)
+                else:
+                    layer.trainable = i >= retrain_from
+        else:
+            self.model.trainable = self.params.base_training
+
+        # add final layer after as always want this trainable
+        logging.info(
+            "Adding new final layer with %s activation and %s labels ",
+            activation,
+            len(self.labels),
+        )
+        preds = tf.keras.layers.Dense(
+            len(self.labels), activation=activation, name="prediction"
+        )(self.model.output)
+
+        self.model = tf.keras.models.Model(self.model.inputs, outputs=preds)
+        if self.params.multi_label:
+            acc = tf.metrics.binary_accuracy
+        else:
+            acc = tf.metrics.categorical_accuracy
+        logging.info("Using acc %s", acc)
+        self.model.summary()
+        self.model.compile(
+            optimizer=optimizer(self.params),
+            loss=loss(self.params),
+            metrics=[
+                acc,
+                tf.keras.metrics.AUC(),
+                tf.keras.metrics.Recall(),
+                tf.keras.metrics.Precision(),
+            ],
+        )
+
     def load_model(self, model_file, training=False, weights=None):
         model_file = Path(model_file)
         super().__init__(model_file)
@@ -374,7 +430,7 @@ def load_model(self, model_file, training=False, weights=None):
         self.model.trainable = training
 
         if weights is not None:
-            self.model.load_weights(weights).expect_partial()
+            self.model.load_weights(weights)
             logging.info("Loaded weight %s", weights)
         # print(self.model.summary())
 
@@ -450,41 +506,40 @@ def close(self):
         gc.collect()
 
     def train_model(
-        self, epochs, run_name, weights=None, rebalance=False, resample=False
+        self,
+        epochs,
+        run_name,
+        weights=None,
+        rebalance=False,
+        resample=False,
+        fine_tune=None,
     ):
         logging.info(
             "%s Training model for %s epochs with weights %s", run_name, epochs, weights
         )
-        self.excluded_labels, self.remapped_labels = get_excluded(
-            self.data_type, self.params.multi_label
-        )
+        if self.params.excluded_labels is not None:
+            self.excluded_labels = self.params.excluded_labels
+        else:
+            self.excluded_labels, self.remapped_labels = get_excluded(
+                self.data_type, self.params.multi_label
+            )
+        if self.params.remapped_labels is not None:
+            self.remapped_labels = self.params.remapped_labels
+        else:
+            self.remapped_labels, self.remapped_labels = get_excluded(
+                self.data_type, self.params.multi_label
+            )
         train_files = self.data_dir / "train"
         validate_files = self.data_dir / "validation"
         logging.info(
             "Excluding %s remapping %s", self.excluded_labels, self.remapped_labels
         )
 
-        if self.params.multi_label:
+        if self.params.multi_label and "land-bird" not in self.labels:
             self.labels.append("land-bird")
         self.orig_labels = self.labels.copy()
-        for l in self.excluded_labels:
-            if l in self.labels:
-                self.labels.remove(l)
-        for l in self.remapped_labels.keys():
-            if l in self.labels:
-                self.labels.remove(l)
-        self.log_dir = self.log_base / run_name
-        self.log_dir.mkdir(parents=True, exist_ok=True)
-
-        if not self.model:
-            self.build_model(
-                dense_sizes=self.params.dense_sizes,
-                retrain_from=self.params.retrain_layer,
-                dropout=self.params.dropout,
-                run_name=run_name,
-            )
-        self.model.summary()
 
+        self.preprocess_fn = self.get_preprocess_fn()
         self.train, remapped, new_labels, epoch_size = get_dataset(
             train_files,
             self.data_type,
@@ -503,6 +558,31 @@ def train_model(
             num_frames=self.params.square_width**2,
             channels=self.params.channels,
         )
+        self.labels = new_labels
+
+        self.log_dir = self.log_base / run_name
+        self.log_dir.mkdir(parents=True, exist_ok=True)
+        if fine_tune is not None:
+            self.load_model(fine_tune, weights=weights, training=True)
+            # load model loads old labels
+            self.labels = new_labels
+
+            self.adjust_final_layer()
+        else:
+
+            if not self.model:
+                self.build_model(
+                    dense_sizes=self.params.dense_sizes,
+                    retrain_from=self.params.retrain_layer,
+                    dropout=self.params.dropout,
+                    run_name=run_name,
+                )
+
+            if weights is not None:
+                self.model.load_weights(weights)
+
+        self.model.summary()
+
         self.remapped = remapped
         self.validate, remapped, _, _ = get_dataset(
             validate_files,
@@ -519,18 +599,14 @@ def train_model(
             multi_label=self.params.multi_label,
             num_frames=self.params.square_width**2,
             channels=self.params.channels,
-            # dist=self.dataset_counts["validation"],
         )
-
-        if weights is not None:
-            self.model.load_weights(weights)
         if rebalance:
             self.class_weights = get_weighting(self.train, self.labels)
-            logging.info(
-                "Training on %s  with class weights %s",
-                self.labels,
-                self.class_weights,
-            )
+        logging.info(
+            "Training on %s  with class weights %s",
+            self.labels,
+            self.class_weights,
+        )
 
         self.save_metadata(run_name)
         self.save(run_name)
@@ -577,12 +653,12 @@ def train_model(
         self.save(run_name, history=history, test_results=test_accuracy)
 
     def checkpoints(self, run_name):
-        checkpoint_file = self.checkpoint_folder / run_name / "cp.ckpt"
+        checkpoint_file = self.checkpoint_folder / run_name / "cp.weights.h5"
 
         cp_callback = tf.keras.callbacks.ModelCheckpoint(
             filepath=checkpoint_file, save_weights_only=True, verbose=1
         )
-        val_loss = self.checkpoint_folder / run_name / "val_loss"
+        val_loss = self.checkpoint_folder / run_name / "val_loss.weights.h5"
 
         checkpoint_loss = tf.keras.callbacks.ModelCheckpoint(
             val_loss,
@@ -592,7 +668,7 @@ def checkpoints(self, run_name):
             save_weights_only=True,
             mode="auto",
         )
-        val_acc = self.checkpoint_folder / run_name / "val_acc"
+        val_acc = self.checkpoint_folder / run_name / "val_acc.weights.h5"
 
         checkpoint_acc = tf.keras.callbacks.ModelCheckpoint(
             val_acc,
@@ -607,7 +683,7 @@ def checkpoints(self, run_name):
             mode="max",
         )
 
-        val_precision = self.checkpoint_folder / run_name / "val_recall"
+        val_precision = self.checkpoint_folder / run_name / "val_recall.weights.h5"
 
         checkpoint_recall = tf.keras.callbacks.ModelCheckpoint(
             val_precision,
@@ -624,6 +700,7 @@ def checkpoints(self, run_name):
                 if self.params.multi_label
                 else "val_categorical_accuracy"
             ),
+            mode="max",
         )
         # havent found much use in this just takes training time
         # file_writer_cm = tf.summary.create_file_writer(
@@ -648,6 +725,7 @@ def checkpoints(self, run_name):
                 if self.params.multi_label
                 else "val_categorical_accuracy"
             ),
+            mode="max",
             verbose=1,
         )
         return [
@@ -797,21 +875,15 @@ def confusion_tracks(self, dataset, filename, threshold=0.8):
         ]
         for y, pred in pred_per_track.values():
             pred.normalize_score()
-            no_smoothing = np.mean(pred.predictions, axis=0)
+            preds = np.array([p.prediction for p in pred.predictions])
+
+            no_smoothing = np.mean(preds, axis=0)
             masses = np.array(pred.masses)[:, None]
             old_smoothing = pred.class_best_score
-            new_smooth = pred.predictions * masses
+            new_smooth = preds * masses
             new_smooth = np.sum(new_smooth, axis=0)
             new_smooth /= np.sum(masses)
-            # logging.info(
-            #     "Smoothing %s with masses %s", np.round(100 * pred.predictions), masses
-            # )
-            # logging.info(
-            #     "N smooth %s old %s new %s",
-            #     np.round(100 * no_smoothing),
-            #     np.round(100 * old_smoothing),
-            #     np.round(100 * new_smooth),
-            # )
+
             for i, pred_type in enumerate([no_smoothing, old_smoothing, new_smooth]):
                 best_pred = np.argmax(pred_type)
                 confidence = pred_type[best_pred]
@@ -1011,7 +1083,6 @@ def plot_confusion_matrix(cm, class_names):
     counts = cm.copy()
     threshold = counts.max() / 2.0
 
-    print("Threshold is", threshold, " for ", cm.max())
     # Normalize the confusion matrix.
 
     cm = np.around(cm.astype("float") / cm.sum(axis=1)[:, np.newaxis], decimals=2)
diff --git a/src/ml_tools/preprocess.py b/src/ml_tools/preprocess.py
index b3186127..9ab61c8f 100644
--- a/src/ml_tools/preprocess.py
+++ b/src/ml_tools/preprocess.py
@@ -61,6 +61,7 @@ def preprocess_frame(
     crop_rectangle=None,
     calculate_filtered=True,
     filtered_norm_limits=None,
+    thermal_norm_limits=None,
 ):
     median = np.median(frame.thermal)
     cropped_frame = frame.crop_by_region(region, only_thermal=True)
@@ -79,7 +80,8 @@ def preprocess_frame(
         True,
     )
     cropped_frame.thermal -= median
-    np.clip(cropped_frame.thermal, 0, None, out=cropped_frame.thermal)
+    if thermal_norm_limits is None:
+        np.clip(cropped_frame.thermal, 0, None, out=cropped_frame.thermal)
     if calculate_filtered and filtered_norm_limits is not None:
         cropped_frame.filtered, stats = imageprocessing.normalize(
             cropped_frame.filtered,
@@ -88,8 +90,12 @@ def preprocess_frame(
             new_max=255,
         )
         if frame.thermal is not None:
+            thermal_min = None
+            thermal_max = None
+            if thermal_norm_limits is not None:
+                thermal_min, thermal_max = thermal_norm_limits
             cropped_frame.thermal, _ = imageprocessing.normalize(
-                cropped_frame.thermal, new_max=255
+                cropped_frame.thermal, min=thermal_min, max=thermal_max, new_max=255
             )
     else:
         cropped_frame.normalize()
@@ -115,6 +121,13 @@ def preprocess_single_frame(
         data,
         axis=2,
     )
+    # global index
+    # index += 1
+    # tools.saveclassify_image(
+    #     image,
+    #     f"samples/{save_info}-{index}",
+    # )
+
     if preprocess_fn:
         image = preprocess_fn(image)
     return image
@@ -134,6 +147,13 @@ def preprocess_movement(
 ):
     frame_types = {}
     data = []
+    frame_samples = list(np.arange(len(preprocess_frames)))
+    if len(preprocess_frames) < frames_per_row * 5:
+        extra_samples = np.random.choice(
+            frame_samples, frames_per_row * 5 - len(preprocess_frames)
+        )
+        frame_samples.extend(extra_samples)
+        frame_samples.sort()
     for channel in channels:
         if isinstance(channel, str):
             channel = TrackChannels[channel]
@@ -145,6 +165,7 @@ def preprocess_movement(
             channel_segment,
             frames_per_row,
             (frame_size, frame_size),
+            frame_samples,
             normalize=False,
         )
         # already done normalization
@@ -161,7 +182,7 @@ def preprocess_movement(
     # index += 1
     # tools.saveclassify_image(
     #     data,
-    #     f"samples/{index}",
+    #     f"samples/{sample}-{index}",
     # )
 
     if preprocess_fn:
diff --git a/src/ml_tools/previewer.py b/src/ml_tools/previewer.py
index ddd203ea..34255047 100644
--- a/src/ml_tools/previewer.py
+++ b/src/ml_tools/previewer.py
@@ -91,8 +91,8 @@ def export_clip_preview(self, filename, clip: Clip, predictions=None):
         if self.debug:
             footer = Previewer.stats_footer(clip.stats)
         if predictions and (
-            self.preview_type == self.PREVIEW_CLASSIFIED
-            or self.preview_type == self.PREVIEW_TRACKING
+            self.preview_type == PREVIEW_CLASSIFIED
+            or self.preview_type == PREVIEW_TRACKING
         ):
             self.create_track_descriptions(clip, predictions)
 
@@ -103,14 +103,14 @@ def export_clip_preview(self, filename, clip: Clip, predictions=None):
 
         res_x = clip.res_x
         res_y = clip.res_y
-        if self.preview_type == self.PREVIEW_TRACKING:
+        if self.preview_type == PREVIEW_TRACKING:
             res_x *= 2
             res_y *= 2
 
         mpeg = MPEGCreator(str(filename))
         frame_scale = 4
         for frame_number, frame in enumerate(clip.frame_buffer):
-            if self.preview_type == self.PREVIEW_RAW:
+            if self.preview_type == PREVIEW_RAW:
                 image = self.convert_and_resize(
                     frame.thermal, clip.stats.min_temp, clip.stats.max_temp, clip.type
                 )
diff --git a/src/ml_tools/rawdb.py b/src/ml_tools/rawdb.py
index 27175e23..e99200f6 100644
--- a/src/ml_tools/rawdb.py
+++ b/src/ml_tools/rawdb.py
@@ -20,8 +20,10 @@
 from ml_tools.datasetstructures import TrackHeader, ClipHeader
 from track.track import Track
 from track.cliptrackextractor import is_affected_by_ffc
-from cptv import CPTVReader
+from cptv_rs_python_bindings import CptvReader
 from ml_tools.rectangle import Rectangle
+from config.buildconfig import BuildConfig
+from datetime import timedelta
 
 special_datasets = [
     "tag_frames",
@@ -31,6 +33,8 @@
     "overlay",
 ]
 
+FPS = 9
+
 
 class RawDatabase:
     def __init__(self, database_filename):
@@ -62,19 +66,22 @@ def load_frames(self):
         background = None
         tracker_version = self.meta_data.get("tracker_version")
         frame_i = 0
-        with open(self.file, "rb") as f:
-            reader = CPTVReader(f)
-            for frame in reader:
-                if frame.background_frame:
-                    background = frame.pix
-                    # bug in previous tracker version where background was first frame
-                    if tracker_version >= 10:
-                        continue
-                ffc = is_affected_by_ffc(frame)
-                if ffc:
-                    ffc_frames.append(frame_i)
-                cptv_frames.append(frame.pix)
-                frame_i += 1
+        reader = CptvReader(str(self.file))
+        header = reader.get_header()
+        while True:
+            frame = reader.next_frame()
+            if frame is None:
+                break
+            if frame.background_frame:
+                background = frame.pix
+                # bug in previous tracker version where background was first frame
+                if tracker_version >= 10:
+                    continue
+            ffc = is_affected_by_ffc(frame)
+            if ffc:
+                ffc_frames.append(frame_i)
+            cptv_frames.append(frame.pix)
+            frame_i += 1
         frames = np.uint16(cptv_frames)
         if background is None:
             background = np.mean(frames, axis=0)
@@ -108,12 +115,28 @@ def get_clip_tracks(self, tag_precedence):
         self.crop_rectangle = Rectangle(
             edge_pixels, edge_pixels, resx - edge_pixels * 2, resy - edge_pixels * 2
         )
+        location = metadata.get("location")
+        lat = None
+        lng = None
+        country_code = None
+        if location is not None:
+            try:
+                lat = location.get("lat")
+                lng = location.get("lng")
+                if lat is not None and lng is not None:
+                    for country, location in BuildConfig.COUNTRY_LOCATIONS.items():
+                        if location.contains(lng, lat):
+                            country_code = country
+                            break
+            except:
+                logging.error("Could not parse lat lng", exc_info=True)
+                pass
 
         clip_header = ClipHeader(
             clip_id=int(metadata["id"]),
             station_id=metadata.get("stationId"),
             source_file=self.file,
-            location=metadata.get("location"),
+            location=None if lat is None or lng is None else (lng, lat),
             camera=metadata.get("deviceId"),
             rec_time=parse_date(metadata["recordingDateTime"]),
             frames_per_second=10 if self.file.suffix == "mp4" else 9,
@@ -121,8 +144,13 @@ def get_clip_tracks(self, tag_precedence):
             trap=metadata.get("trap", ""),
             tracks=[],
             ffc_frames=self.ffc_frames,
+            country_code=country_code,
         )
         tracks = metadata.get("Tracks", [])
+        fp_labels = metadata.get("fp_model_labels")
+        fp_index = None
+        if fp_labels is not None:
+            fp_index = fp_labels.index("false-positive")
         meta = []
         for track_meta in tracks:
             tags = track_meta.get("tags", [])
@@ -179,6 +207,21 @@ def get_clip_tracks(self, tag_precedence):
                 if start is None:
                     start = region.frame_number
                 end = region.frame_number
+
+            fp_meta = track_meta.get("fp_model_predictions")
+            fp_frames = None
+            if fp_meta is not None:
+                fp_frames = []
+                for pred in fp_meta.get("predictions", []):
+                    scores = pred["prediction"]
+                    best_arg = np.argmax(scores)
+                    confidence = scores[best_arg]
+                    if best_arg == fp_index and confidence > 75:
+                        frame_i = pred["frames"]
+                        if isinstance(frame_i, int):
+                            fp_frames.append(frame_i)
+                        else:
+                            fp_frames.append(frame_i[0])
             header = TrackHeader(
                 clip_id=clip_header.clip_id,
                 track_id=int(track_meta["id"]),
@@ -190,11 +233,17 @@ def get_clip_tracks(self, tag_precedence):
                 human_tags=human_tags,
                 source_file=self.file,
                 mega_missed_regions=track_meta.get("mega_missed_regions"),
+                station_id=clip_header.station_id,
+                fp_frames=fp_frames,
+                start_time=clip_header.rec_time + timedelta(seconds=start / FPS),
                 # frame_temp_median=frame_temp_median,
             )
             clip_header.tracks.append(header)
         return clip_header
 
+    def get_id(self):
+        return self.meta_data_file
+
     def get_clip_meta(self, tag_precedence):
         return self.get_clip_tracks(tag_precedence)
         #
diff --git a/src/ml_tools/rectangle.py b/src/ml_tools/rectangle.py
index 225a754f..4191654c 100644
--- a/src/ml_tools/rectangle.py
+++ b/src/ml_tools/rectangle.py
@@ -106,6 +106,10 @@ def enlarge(self, border, max=None):
         if max:
             self.crop(max)
 
+    def contains(self, x, y):
+        """Is this point contained in the rectangle"""
+        return self.left <= x and self.right >= x and self.top >= y and self.bottom <= y
+
     @property
     def area(self):
         return int(self.width) * self.height
diff --git a/src/ml_tools/tfdataset.py b/src/ml_tools/tfdataset.py
index f791ba07..4914ea5b 100644
--- a/src/ml_tools/tfdataset.py
+++ b/src/ml_tools/tfdataset.py
@@ -61,47 +61,71 @@ def get_distribution(dataset, num_labels, batched=True, one_hot=True, extra_meta
 
 
 def get_dataset(load_function, base_dir, labels, **args):
-    land_birds = [
-        "pukeko",
-        "california quail",
-        "brown quail",
-        "black swan",
-        "quail",
-        "pheasant",
-        "penguin",
-        "duck",
-        "chicken",
-        "rooster",
-    ]
+    model_labels = args.get("model_labels")
+
     excluded_labels = args.get("excluded_labels", [])
     to_remap = args.get("remapped_labels", {})
-    logging.info("Excluding %s", excluded_labels)
     remapped = {}
     keys = []
     values = []
-    # excluded_labels.append("insect")
-    # excluded_labels.append("cat")
-    new_labels = labels.copy()
-    for excluded in excluded_labels:
-        if excluded in labels:
-            new_labels.remove(excluded)
-    for remapped_lbl in to_remap.keys():
-        if remapped_lbl in labels:
-            new_labels.remove(remapped_lbl)
-    for l in labels:
-        keys.append(labels.index(l))
-        if l not in new_labels:
-            remapped[l] = -1
-            values.append(-1)
-            logging.info("Excluding %s", l)
-        else:
-            remapped[l] = [l]
-            values.append(new_labels.index(l))
-    for k, v in to_remap.items():
-        if k in labels and v in labels:
-            remapped[v].append(k)
-            values[labels.index(k)] = new_labels.index(v)
-            del remapped[k]
+    shuffle_size = 4096
+    if args.get("num_frames", 25) == 1:
+        shuffle_size *= 20
+    if model_labels is not None:
+        new_labels = model_labels
+
+        logging.info("Mapping DS labels %s to model labels %s", labels, model_labels)
+        # if we are loading a model with different labels we need to map the dataset labels
+        # to the equivalent model labels
+        for l_i, og_lbl in enumerate(labels):
+            keys.append(l_i)
+            try:
+                lbl = og_lbl
+                if lbl in to_remap:
+                    lbl = to_remap[lbl]
+
+                mdl_i = model_labels.index(lbl)
+                if lbl not in remapped:
+                    remapped[lbl] = []
+                remapped[lbl].append(og_lbl)
+                values.append(mdl_i)
+            except:
+                remapped[og_lbl] = -1
+                values.append(-1)
+
+    else:
+
+        logging.info("Excluding %s", excluded_labels)
+
+        # get new labels after excluding and removing remapped labels
+        new_labels = labels.copy()
+        for excluded in excluded_labels:
+            if excluded in new_labels:
+                new_labels.remove(excluded)
+        for remapped_lbl in to_remap.keys():
+            if remapped_lbl in new_labels:
+                new_labels.remove(remapped_lbl)
+
+        # initialize remapped dictionary, setting labels that have been removed to -1, these values will be filtered later
+        for l in labels:
+            keys.append(labels.index(l))
+            if l not in new_labels:
+                remapped[l] = [-1]
+                values.append(-1)
+                logging.info("Excluding %s", l)
+            else:
+                remapped[l] = [l]
+                values.append(new_labels.index(l))
+
+        # add the remapped labels to the correct place
+        for k, v in to_remap.items():
+            if k in excluded_labels:
+                continue
+            if k in labels and v in new_labels:
+                remapped[v].append(k)
+                values[labels.index(k)] = new_labels.index(v)
+                del remapped[k]
+
     remap_lookup = tf.lookup.StaticHashTable(
         initializer=tf.lookup.KeyValueTensorInitializer(
             keys=tf.constant(keys),
@@ -111,7 +135,7 @@ def get_dataset(load_function, base_dir, labels, **args):
         name="remapped_y",
     )
     num_labels = len(new_labels)
-    logging.info("New labels are %s", new_labels)
+    logging.info("New labels are %s from original %s", new_labels, labels)
     for k, v in zip(keys, values):
         logging.info(
             "Mapping %s to %s", labels[k], new_labels[v] if v >= 0 else "nothing"
@@ -144,6 +168,8 @@ def get_dataset(load_function, base_dir, labels, **args):
         logging.info("RESAMPLING")
         # seems the only way to get even distribution
         label_ds = []
+        unbalanced_ds = []
+        dont_balance = ["vehicle"]
         for i, l in enumerate(new_labels):
             l_mask = np.zeros((len(new_labels)))
             l_mask[i] = 1
@@ -151,15 +177,27 @@ def get_dataset(load_function, base_dir, labels, **args):
 
             l_filter = lambda x, y: tf.math.reduce_all(tf.math.equal(y, l_mask))
             l_dataset = dataset.filter(l_filter)
-            l_dataset = l_dataset.shuffle(40096, reshuffle_each_iteration=True)
-
-            label_ds.append(l_dataset)
+            l_dataset = l_dataset.shuffle(shuffle_size, reshuffle_each_iteration=True)
+            if l in dont_balance:
+                unbalanced_ds.append(l_dataset)
+            else:
+                label_ds.append(l_dataset)
         dataset = tf.data.Dataset.sample_from_datasets(
             label_ds,
             # weights=[1 / len(new_labels)] * len(new_labels),
             stop_on_empty_dataset=True,
             rerandomize_each_iteration=True,
         )
+        dont_balance.append(dataset)
+        dataset = tf.data.Dataset.sample_from_datasets(
+            dont_balance,
+            # weights=[1 / len(new_labels)] * len(new_labels),
+            stop_on_empty_dataset=False,
+            rerandomize_each_iteration=True,
+        )
+    if args.get("epoch_size") is not None:
+        dataset = dataset.take(args.get("epoch_size"))
+        logging.info("Setting dataset to %s", args.get("epoch_size"))
     if args.get("cache", False):
         dataset = dataset.cache()
     if (
@@ -167,9 +205,9 @@ def get_dataset(load_function, base_dir, labels, **args):
         and args.get("shuffle", True)
         and not args.get("resample")
     ):
-        logging.info("shuffling data")
+        logging.info("shuffling data with buffer %s", shuffle_size)
         dataset = dataset.shuffle(
-            4096, reshuffle_each_iteration=args.get("reshuffle", True)
+            shuffle_size, reshuffle_each_iteration=args.get("reshuffle", True)
         )
     # tf refues to run if epoch sizes change so we must decide a costant epoch size even though with reject res
     # it will chang eeach epoch, to ensure this take this repeat data and always take epoch_size elements
@@ -187,13 +225,13 @@ def get_dataset(load_function, base_dir, labels, **args):
         logging.info("Setting dataset size to %s", epoch_size)
         if not args.get("only_features", False):
             dataset = dataset.repeat(2)
+        dataset = dataset.take(epoch_size)
         scale_epoch = args.get("scale_epoch", None)
         if scale_epoch:
             epoch_size = epoch_size // scale_epoch
             dataset = dataset.take(epoch_size)
     else:
         epoch_size = 1
-    dataset = dataset.prefetch(buffer_size=AUTOTUNE)
     batch_size = args.get("batch_size", None)
     if batch_size is not None:
         dataset = dataset.batch(batch_size)
diff --git a/src/ml_tools/tfwriter.py b/src/ml_tools/tfwriter.py
index 3d9129ad..983308f2 100644
--- a/src/ml_tools/tfwriter.py
+++ b/src/ml_tools/tfwriter.py
@@ -12,41 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-from PIL import Image
 from pathlib import Path
 from multiprocessing import Process, Queue
-
-import collections
-import hashlib
-import io
-import json
-import multiprocessing
 import os
-import time
-from absl import app
-from absl import flags
 from absl import logging
 import numpy as np
-from PIL import Image, ImageOps
-
 import tensorflow as tf
-from . import tfrecord_util
-from ml_tools import tools
-from ml_tools.imageprocessing import normalize, rotate
-from track.cliptracker import get_diff_back_filtered
-import cv2
-import random
-import math
 
 
-def process_job(queue, labels, base_dir, save_data, extra_args):
+def process_job(queue, labels, base_dir, save_data, writer_i, extra_args):
     import gc
 
     pid = os.getpid()
 
-    writer_i = 1
     name = f"{writer_i}-{pid}.tfrecord"
-
+    logging.info("Writing to %s", name)
     options = tf.io.TFRecordOptions(compression_type="GZIP")
     writer = tf.io.TFRecordWriter(str(base_dir / name), options=options)
     i = 0
@@ -66,15 +46,8 @@ def process_job(queue, labels, base_dir, save_data, extra_args):
                 saved += save_data(samples, writer, labels, extra_args)
                 files += 1
                 del samples
-                if saved > 250000 / num_frames:
-                    logging.info("Closing old writer")
-                    writer.close()
-                    writer_i += 1
-                    name = f"{writer_i}-{pid}.tfrecord"
-                    logging.info("Opening %s", name)
-                    saved = 0
-                    writer = tf.io.TFRecordWriter(str(base_dir / name), options=options)
-                if i % int(25000 / num_frames) == 0:
+
+                if i % int(2500 / num_frames) == 0:
                     logging.info("Saved %s ", files)
                     gc.collect()
                     writer.flush()
@@ -101,43 +74,51 @@ def create_tf_records(
     samples_by_source = dataset.get_samples_by_source()
     source_files = list(samples_by_source.keys())
     np.random.shuffle(source_files)
-
     num_labels = len(dataset.labels)
     logging.info(
         "writing to output path: %s for %s samples", output_path, len(samples_by_source)
     )
     num_processes = 8
+    writer_i = 0
+    index = 0
+    jobs_per_process = 600 * num_processes
     try:
-        job_queue = Queue()
-        processes = []
-        for i in range(num_processes):
-            p = Process(
-                target=process_job,
-                args=(job_queue, labels, output_path, save_data, extra_args),
-            )
-            processes.append(p)
-            p.start()
-            added = 0
-        for source_file in source_files:
-            job_queue.put((samples_by_source[source_file]))
-            added += 1
-            while job_queue.qsize() > num_processes * 10:
-                logging.info("Sleeping for %s", 10)
-                # give it a change to catch up
-                time.sleep(10)
+        while index < len(source_files):
+            job_queue = Queue()
+            processes = []
+            for i in range(num_processes):
+                p = Process(
+                    target=process_job,
+                    args=(
+                        job_queue,
+                        labels,
+                        output_path,
+                        save_data,
+                        writer_i,
+                        extra_args,
+                    ),
+                )
+                processes.append(p)
+                p.start()
+                added = 0
+            writer_i += 1
+            for source_file in source_files[index : index + jobs_per_process]:
+                job_queue.put((samples_by_source[source_file]))
+                added += 1
 
-        logging.info("Processing %d", job_queue.qsize())
-        for i in range(len(processes)):
-            job_queue.put(("DONE"))
-        for process in processes:
-            try:
-                process.join()
-            except KeyboardInterrupt:
-                logging.info("KeyboardInterrupt, terminating.")
-                for process in processes:
-                    process.terminate()
-                exit()
-        logging.info("Saved %s", len(dataset.samples_by_id))
+            index += jobs_per_process
+            logging.info("Processing %d", job_queue.qsize())
+            for i in range(len(processes)):
+                job_queue.put(("DONE"))
+            for process in processes:
+                try:
+                    process.join()
+                except KeyboardInterrupt:
+                    logging.info("KeyboardInterrupt, terminating.")
+                    for process in processes:
+                        process.terminate()
+                    exit()
+            logging.info("Saved %s", len(dataset.samples_by_id))
 
     except:
         logging.error("Error saving track info", exc_info=True)
diff --git a/src/ml_tools/thermaldataset.py b/src/ml_tools/thermaldataset.py
index 728b95f1..bd4da773 100644
--- a/src/ml_tools/thermaldataset.py
+++ b/src/ml_tools/thermaldataset.py
@@ -12,6 +12,7 @@
 
 from ml_tools.featurenorms import mean_v, std_v
 from ml_tools.frame import TrackChannels
+from pathlib import Path
 
 # seed = 1341
 # tf.random.set_seed(seed)
@@ -33,12 +34,30 @@ def get_excluded():
         "pest",
         "pig",
         "sealion",
+        "bat",
+        "mammal",
+        "frog",
+        "cow",
+        # added gp forretrain
+        "wombat",
+        "gray kangaroo",
+        "echidna",
+        "fox",
+        "deer",
+        "sheep",
+        # "wombat",
     ]
 
 
 def get_remapped(multi_label=False):
     land_bird = "land-bird" if multi_label else "bird"
     return {
+        "echidna": "hedgehog",
+        "grey kangaroo": "wallaby",
+        "sambar deer": "deer",
+        "mouse": "rodent",
+        "rat": "rodent",
+        "rain": "false-positive",
         "water": "false-positive",
         "insect": "false-positive",
         "allbirds": "bird",
@@ -117,7 +136,9 @@ def load_dataset(filenames, remap_lookup, labels, args):
             extra_label_map=extra_label_map,
             include_track=args.get("include_track", False),
             num_frames=args.get("num_frames", 25),
-            channels=args.get("channels", [TrackChannels.thermal.name]),
+            channels=args.get(
+                "channels", [TrackChannels.thermal.name, TrackChannels.filtered.name]
+            ),
         ),
         num_parallel_calls=AUTOTUNE,
         deterministic=deterministic,
@@ -170,7 +191,7 @@ def read_tfrecord(
     channels=[TrackChannels.thermal.name, TrackChannels.filtered.name],
 ):
     logging.info(
-        "Read tf record with image %s lbls %s labeld %s aug  %s  prepr %s only features %s one hot %s include fetures %s",
+        "Read tf record with image %s lbls %s labeld %s aug  %s  prepr %s only features %s one hot %s include fetures %s num frames %s",
         image_size,
         num_labels,
         labeled,
@@ -179,6 +200,7 @@ def read_tfrecord(
         only_features,
         one_hot,
         include_features,
+        num_frames,
     )
     load_images = not only_features
     tfrecord_format = {
@@ -197,7 +219,6 @@ def read_tfrecord(
     if include_track:
         tfrecord_format["image/track_id"] = tf.io.FixedLenFeature((), tf.int64, -1)
         tfrecord_format["image/avg_mass"] = tf.io.FixedLenFeature((), tf.int64, -1)
-
     if include_features or only_features:
         tfrecord_format["image/features"] = tf.io.FixedLenSequenceFeature(
             [36 * 5 + 8], dtype=tf.float32, allow_missing=True
@@ -252,6 +273,7 @@ def read_tfrecord(
             if extra_label_map is not None:
                 label = tf.reduce_max(label, axis=0)
         if include_track:
+
             track_id = tf.cast(example["image/track_id"], tf.int32)
             avg_mass = tf.cast(example["image/avg_mass"], tf.int32)
             label = (label, track_id, avg_mass)
@@ -268,17 +290,6 @@ def read_tfrecord(
     return rgb_image
 
 
-def decode_image(thermals, filtereds, image_size):
-    deoced_thermals = []
-    decoded_filtered = []
-    for thermal, filtered in zip(thermals, filtereds):
-        image = tf.image.decode_png(image, channels=1)
-        filtered = tf.image.decode_png(filtered, channels=1)
-        decoded_thermal.append(image)
-        decoded_filtered.append(filtered)
-    return decoded_thermal, decoded_filtered
-
-
 def tile_images(images):
     index = 0
     image = None
@@ -299,12 +310,13 @@ def tile_images(images):
 # test stuff
 def main():
     init_logging()
-    config = Config.load_from_file()
+    config = Config.load_from_file("classifier-thermal.yaml")
     from .tfdataset import get_dataset, get_distribution
 
     # file = "/home/gp/cacophony/classifier-data/thermal-training/cp-training/training-meta.json"
-    file = f"{config.tracks_folder}/training-meta.json"
-    with open(file, "r") as f:
+    training_folder = Path(config.base_folder) / "training-data"
+    meta_f = training_folder / "training-meta.json"
+    with open(meta_f, "r") as f:
         meta = json.load(f)
     labels = meta.get("labels", [])
     datasets = []
@@ -312,7 +324,7 @@ def main():
     resampled_ds, remapped, labels, epoch_size = get_dataset(
         # dir,
         load_dataset,
-        f"{config.tracks_folder}/training-data/test",
+        training_folder / "test",
         labels,
         batch_size=32,
         image_size=(160, 160),
@@ -322,25 +334,31 @@ def main():
         include_features=False,
         remapped_labels=get_remapped(),
         excluded_labels=get_excluded(),
-        include_track=False,
+        include_track=True,
         num_frames=1,
     )
     print("Ecpoh size is", epoch_size)
-    print(get_distribution(resampled_ds, len(labels), extra_meta=False))
+    # print(get_distribution(resampled_ds, len(labels), extra_meta=False))
     # return
     #
-    for e in range(2):
+    save_dir = Path("./test-images")
+    save_dir.mkdir(exist_ok=True)
+    for e in range(1):
+        batch_i = 0
         print("epoch", e)
         for x, y in resampled_ds:
-            show_batch(x, y, labels)
-
+            show_batch(x, y, labels, save=save_dir / f"{batch_i}.jpg", tracks=True)
+            batch_i += 1
     # return
 
 
-def show_batch(image_batch, label_batch, labels):
+def show_batch(image_batch, label_batch, labels, save=None, tracks=False):
     plt.figure(figsize=(10, 10))
     print("images in batch", len(image_batch), len(label_batch))
     num_images = min(len(image_batch), 25)
+    if tracks:
+        track_batch = label_batch[1]
+        label_batch = label_batch[0]
     for n in range(num_images):
         ax = plt.subplot(5, 5, n + 1)
         img = np.uint8(image_batch[n])
@@ -352,10 +370,15 @@ def show_batch(image_batch, label_batch, labels):
         # if repeat > 0:
         # print(img.shape, " repeating", repeat)
         plt.imshow(img)
-        plt.title("C-" + str(image_batch[n]))
-        plt.title(labels[np.argmax(label_batch[n])])
+        if tracks:
+            plt.title(f"{labels[np.argmax(label_batch[n])]}-{track_batch[n]}")
+        else:
+            plt.title(labels[np.argmax(label_batch[n])])
+
         plt.axis("off")
     # return
+    if save:
+        plt.savefig(save)
     plt.show()
 
 
diff --git a/src/ml_tools/thermalwriter.py b/src/ml_tools/thermalwriter.py
index 8a3a290b..891edb54 100644
--- a/src/ml_tools/thermalwriter.py
+++ b/src/ml_tools/thermalwriter.py
@@ -23,6 +23,7 @@
       --output_file_prefix="${OUTPUT_DIR/FILE_PREFIX}" \
       --num_shards=100
 """
+import cv2
 from PIL import Image
 from pathlib import Path
 import time
@@ -54,7 +55,7 @@
 from functools import lru_cache
 
 
-def create_tf_example(sample, data, features, labels, num_frames):
+def create_tf_example(sample, data, features, labels, num_frames, country_code):
     """Converts image and annotations to a tf.Example proto.
 
     Args:
@@ -89,7 +90,7 @@ def create_tf_example(sample, data, features, labels, num_frames):
     average_dim = int(round(np.mean(average_dim) ** 0.5))
     thermals = list(data[0])
     filtereds = list(data[1])
-    image_id = sample.unique_track_id
+    image_id = sample.unique_id
     image_height, image_width = thermals[0].shape
     while len(thermals) < num_frames:
         # ensure 25 frames even if 0s
@@ -127,6 +128,9 @@ def create_tf_example(sample, data, features, labels, num_frames):
         "image/format": tfrecord_util.bytes_feature("jpeg".encode("utf8")),
         "image/class/text": tfrecord_util.bytes_feature(sample.label.encode("utf8")),
         "image/class/label": tfrecord_util.int64_feature(labels.index(sample.label)),
+        "image/country_id": tfrecord_util.bytes_feature(
+            str(country_code).encode("utf8")
+        ),
     }
 
     example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
@@ -156,9 +160,11 @@ def save_data(samples, writer, labels, extra_args):
         return 0
     saved = 0
     try:
-        for data in sample_data:
+        country_code = sample_data[1]
+        sample_data = sample_data[0]
+        for sample, images, features in sample_data:
             tf_example = create_tf_example(
-                data[0], data[1], data[2], labels, extra_args["num_frames"]
+                sample, images, features, labels, extra_args["num_frames"], country_code
             )
             writer.write(tf_example.SerializeToString())
             saved += 1
@@ -175,13 +181,14 @@ def get_data(clip_samples, extra_args):
         return None
     data = []
     crop_rectangle = tools.Rectangle(2, 2, 160 - 2 * 2, 140 - 2 * 2)
+
     if clip_samples[0].source_file.suffix == ".hdf5":
         db = TrackDatabase(clip_samples[0].source_file)
     else:
         db = RawDatabase(clip_samples[0].source_file)
         db.load_frames()
-        # going to redo segments to get rid of ffc segments
 
+    # going to redo segments to get rid of ffc segments
     clip_id = clip_samples[0].clip_id
     try:
         background = db.get_clip_background()
@@ -216,14 +223,18 @@ def get_data(clip_samples, extra_args):
                 # GP All assumes we dont have a track over multiple bins (Whcih we probably never want)
                 if extra_args.get("use_segments", True):
                     track.get_segments(
-                        extra_args.get("segment_frame_spacing", 9),
-                        extra_args.get("segment_width", 25),
-                        extra_args.get("segment_type"),
-                        extra_args.get("segment_min_avg_mass"),
-                        max_segments=extra_args.get("max_segments"),
+                        segment_width=extra_args.get("segment_width", 25),
+                        segment_frame_spacing=extra_args.get(
+                            "segment_frame_spacing", 9
+                        ),
+                        segment_types=extra_args.get("segment_types"),
+                        segment_min_mass=extra_args.get("segment_min_avg_mass"),
                         dont_filter=extra_args.get("dont_filter_segment", False),
                         skip_ffc=extra_args.get("skip_ffc", True),
                         ffc_frames=clip_meta.ffc_frames,
+                        max_segments=len(samples),
+                        frame_min_mass=extra_args.get("min_mass"),
+                        filter_by_fp=extra_args.get("filter_by_fp"),
                     )
                 else:
                     filter_by_lq = extra_args.get("filter_by_lq", False)
@@ -239,6 +250,7 @@ def get_data(clip_samples, extra_args):
                             else track.upper_mass
                         ),
                         ffc_frames=clip_meta.ffc_frames,
+                        max_frames=extra_args.get("max_frames"),
                     )
                 samples = track.samples
                 frame_temp_median = {}
@@ -270,8 +282,13 @@ def get_data(clip_samples, extra_args):
             )
 
             by_frame_number = {}
-            max_diff = 0
-            min_diff = 0
+            thermal_max_diff = None
+            thermal_min_diff = None
+            max_diff = None
+            min_diff = None
+
+            thermal_diff_norm = extra_args.get("thermal_diff_norm", False)
+
             for f in track_frames:
                 if f.region.blank or f.region.width <= 0 or f.region.height <= 0:
                     continue
@@ -281,10 +298,19 @@ def get_data(clip_samples, extra_args):
                 diff_frame = f.thermal - f.region.subimage(background)
                 new_max = np.amax(diff_frame)
                 new_min = np.amin(diff_frame)
-                if new_min < min_diff:
+                if min_diff is None or new_min < min_diff:
                     min_diff = new_min
-                if new_max > max_diff:
+                    # min_diff = max(0, new_min)
+                if max_diff is None or new_max > max_diff:
                     max_diff = new_max
+                if thermal_diff_norm:
+                    diff_frame = f.thermal - frame_temp_median[f.frame_number]
+                    new_max = np.amax(diff_frame)
+                    new_min = np.amin(diff_frame)
+                    if thermal_min_diff is None or new_min < thermal_min_diff:
+                        thermal_min_diff = new_min
+                    if thermal_max_diff is None or new_max > thermal_max_diff:
+                        thermal_max_diff = new_max
 
             # normalize by maximum difference between background and tracked region
             # probably only need to use difference on the frames used for this record
@@ -304,24 +330,38 @@ def get_data(clip_samples, extra_args):
                         frame.resize_with_aspect(
                             (32, 32), crop_rectangle, keep_edge=True
                         )
-                        frame.thermal -= temp_median
-                        np.clip(frame.thermal, a_min=0, a_max=None, out=frame.thermal)
+                        if (
+                            np.amax(frame.thermal) > 50000
+                            or np.amin(frame.thermal) < 1000
+                        ):
+                            logging.error(
+                                "Strange values for %s max %s min %s",
+                                clip_id,
+                                np.amax(frame.thermal),
+                                np.amin(frame.thermal),
+                            )
+                            raise Exception(
+                                f"Strange values for {clip_id} - {track_id} #{frame_number}"
+                            )
 
+                        frame.thermal -= temp_median
+                        if not thermal_diff_norm:
+                            np.clip(
+                                frame.thermal, a_min=0, a_max=None, out=frame.thermal
+                            )
                         frame.thermal, stats = imageprocessing.normalize(
-                            frame.thermal, new_max=255
+                            frame.thermal,
+                            min=thermal_min_diff,
+                            max=thermal_max_diff,
+                            new_max=255,
                         )
                         if not stats[0]:
                             frame.thermal = np.zeros((frame.thermal.shape))
-                            # continue
-                        # f2 = frame.filtered.copy()
-                        # frame.filtered, stats = imageprocessing.normalize(
-                        #     frame.filtered, new_max=255
-                        # )
-                        # np.clip(frame.filtered, a_min=min_diff, a_max=None, out=frame.filtered)
 
                         frame.filtered, stats = imageprocessing.normalize(
                             frame.filtered, min=min_diff, max=max_diff, new_max=255
                         )
+                        np.clip(frame.filtered, a_min=0, a_max=None, out=frame.filtered)
 
                         if not stats[0]:
                             frame.filtered = np.zeros((frame.filtered.shape))
@@ -337,4 +377,4 @@ def get_data(clip_samples, extra_args):
             "Cant get Samples for %s", clip_samples[0].source_file, exc_info=True
         )
         return None
-    return data
+    return (data, clip_meta.country_code)
diff --git a/src/ml_tools/tools.py b/src/ml_tools/tools.py
index 73355ca4..cad64667 100644
--- a/src/ml_tools/tools.py
+++ b/src/ml_tools/tools.py
@@ -6,7 +6,6 @@
 import numpy as np
 import pickle
 import json
-import dateutil
 import datetime
 import glob
 import cv2
@@ -15,6 +14,8 @@
 from PIL import Image, ImageFont, ImageDraw
 from pathlib import Path
 from ml_tools.rectangle import Rectangle
+from dateutil import parser
+from enum import Enum
 
 EPISON = 1e-5
 
@@ -52,6 +53,10 @@ def default(self, obj):
             return obj.isoformat()
         elif isinstance(obj, Rectangle):
             return obj.meta_dictionary()
+        elif isinstance(obj, Path):
+            return str(obj)
+        elif isinstance(obj, Enum):
+            return str(obj.name)
         # Let the base class default method raise the TypeError
         return json.JSONEncoder.default(self, obj)
 
@@ -92,7 +97,7 @@ def load_clip_metadata(filename):
         # add in some metadata stats
         meta = json.load(t)
     if meta.get("recordingDateTime"):
-        meta["recordingDateTime"] = dateutil.parser.parse(meta["recordingDateTime"])
+        meta["recordingDateTime"] = parser.parse(meta["recordingDateTime"])
     if meta.get("tracks") is None and meta.get("Tracks"):
         meta["tracks"] = meta["Tracks"]
     return meta
@@ -189,8 +194,17 @@ def saveclassify_image(data, filename):
     # saves image channels side by side, expected data to be values in the range of 0->1
     Path(filename).parent.mkdir(parents=True, exist_ok=True)
     r = Image.fromarray(np.uint8(data[:, :, 0]))
-    g = Image.fromarray(np.uint8(data[:, :, 1]))
-    b = Image.fromarray(np.uint8(data[:, :, 2]))
+    _, _, channels = data.shape
+
+    if channels == 1:
+        g = r
+    else:
+        g = Image.fromarray(np.uint8(data[:, :, 1]))
+
+    if channels == 2:
+        b = r
+    else:
+        b = Image.fromarray(np.uint8(data[:, :, 2]))
     concat = np.concatenate((r, g, b), axis=1)  # horizontally
     img = Image.fromarray(np.uint8(concat))
     img.save(filename + ".png")
diff --git a/src/mldataset/makedataset.py b/src/mldataset/makedataset.py
index 65368843..cbb7e75a 100644
--- a/src/mldataset/makedataset.py
+++ b/src/mldataset/makedataset.py
@@ -131,7 +131,7 @@ def process_file(self, filename, out_dir, config):
         clip = Clip(config.tracking["thermal"], filename)
         clip.load_metadata(
             metadata,
-            config.load.tag_precedence,
+            config.build.tag_precedence,
         )
 
         with h5py.File(out_file, "w") as f:
@@ -263,7 +263,7 @@ def process_file(self, filename, out_dir, config):
                     node_attrs["id"] = track_id
                     tags = track.get("tags", [])
                     tag = Track.get_best_human_tag(
-                        tags, self.config.load.tag_precedence, 0
+                        tags, self.config.build.tag_precedence, 0
                     )
 
                     master_tag = [
diff --git a/src/modelevaluate.py b/src/modelevaluate.py
index 397b2446..b665972c 100644
--- a/src/modelevaluate.py
+++ b/src/modelevaluate.py
@@ -44,9 +44,10 @@
 from ml_tools.frame import Frame
 from ml_tools import imageprocessing
 import cv2
-from config.loadconfig import LoadConfig
+from config.buildconfig import BuildConfig
 from sklearn.metrics import confusion_matrix
 from multiprocessing import Pool
+from dateutil.parser import parse as parse_date
 
 
 root_logger = logging.getLogger()
@@ -72,8 +73,9 @@
 def model_score(cm, labels):
     cm = np.around(cm.astype("float") / cm.sum(axis=1)[:, np.newaxis], decimals=2)
     cm = np.nan_to_num(cm)
-
-    fp_index = labels.index("false-positive")
+    fp_index = None
+    if "false-positive" in labels:
+        fp_index = labels.index("false-positive")
     none_index = None
     unid_index = None
     if "None" in labels:
@@ -82,7 +84,9 @@ def model_score(cm, labels):
         unid_index = labels.index("unidentified")
     score = 0
     for l_i, l in enumerate(labels):
-        fp_acc = cm[l_i][fp_index]
+        fp_acc = 0
+        if fp_index is not None:
+            fp_acc = cm[l_i][fp_index]
         none_acc = 0
         unid_acc = 0
         accuracy = cm[l_i][l_i]
@@ -157,12 +161,20 @@ def load_args():
         "--evaluate-dir",
         help="Evalute directory of cptv files",
     )
-
+    parser.add_argument(
+        "--model-metadata",
+        help="Meta data file for model, used with confusion from meta",
+    )
     parser.add_argument("-c", "--config-file", help="Path to config file to use")
 
     parser.add_argument("-d", "--date", help="Use clips after this")
 
     parser.add_argument("--split-file", help="Use split for evaluation")
+    parser.add_argument(
+        "--confusion-from-meta",
+        action="count",
+        help="Use metadata to produce a confusion matrix",
+    )
 
     parser.add_argument(
         "confusion",
@@ -213,69 +225,113 @@ def filter_diffs(track_frames, background):
     return min_diff, max_diff
 
 
-def evalute_prod_confusion(dir, confusion_file):
+# evaluate a confusion matrix from metadata of files, already evaluated by our current model on browse
+
+
+def metadata_confusion(dir, confusion_file, after_date=None, model_metadata=None):
     with open("label_paths.json", "r") as f:
         label_paths = json.load(f)
     label_mapping = get_mappings(label_paths)
-
-    labels = [
-        "bird",
-        "cat",
-        "deer",
-        "dog",
-        "false-positive",
-        "hedgehog",
-        "human",
-        "kiwi",
-        "leporidae",
-        "mustelid",
-        "penguin",
-        "possum",
-        "rodent",
-        "sheep",
-        "vehicle",
-        "wallaby",
-        "land-bird",
-        "None",
-        "unidentified",
-    ]
+    if model_metadata is not None and Path(model_metadata).exists():
+        with open(model_metadata, "r") as t:
+            # add in some metadata stats
+            model_meta = json.load(t)
+        labels = model_meta.get("labels", [])
+        excluded_labels = model_meta.get("excluded_labels", {})
+        remapped_labels = model_meta.get("remapped_labels", {})
+        for k, v in remapped_labels.items():
+            if v == "land-bird":
+                remapped_labels[k] = "bird"
+        if "None" not in labels:
+            labels.append("None")
+        if "unidentified" not in labels:
+            labels.append("unidentified")
+    else:
+        labels = [
+            "bird",
+            "cat",
+            "deer",
+            "dog",
+            "falsepositive",
+            "hedgehog",
+            "human",
+            "kiwi",
+            "leporidae",
+            "mustelid",
+            "penguin",
+            "possum",
+            "rodent",
+            "sheep",
+            "vehicle",
+            "wallaby",
+            "landbird",
+            "None",
+            "unidentified",
+        ]
+        excluded_labels, remapped_labels = get_excluded("thermal")
+    logging.info(
+        "Labels are %s excluded %s remapped %s",
+        labels,
+        excluded_labels,
+        remapped_labels,
+    )
     y_true = []
     y_pred = []
     dir = Path(dir)
     for cptv_file in dir.glob(f"**/*cptv"):
         meta_f = cptv_file.with_suffix(".txt")
+        if not meta_f.exists():
+            continue
         meta_data = None
         with open(meta_f, "r") as t:
             # add in some metadata stats
             meta_data = json.load(t)
-
+        rec_time = parse_date(meta_data["recordingDateTime"])
+        if after_date is not None and rec_time <= after_date:
+            continue
         for track in meta_data.get("Tracks", []):
             tags = track.get("tags", [])
             human_tags = [
-                tag.get("what")
-                for tag in tags
-                if tag.get("automatic") == False
-                # and tag.get("what", "") not in LoadConfig.EXCLUDED_TAGS
+                tag.get("what") for tag in tags if tag.get("automatic") == False
             ]
             human_tags = set(human_tags)
             if len(human_tags) > 1:
                 print("Conflicting tags for ", track.get("id"), cptv_file)
             if len(human_tags) == 0:
-                print("No humans in ", tags)
+                print("No humans in ", meta_f)
                 continue
             human_tag = human_tags.pop()
             human_tag = label_mapping.get(human_tag, human_tag)
-            ai_tag = [
-                tag.get("what")
-                for tag in tags
-                if tag.get("automatic") is True
-                and tag.get("data", {}).get("name") == "Inc3 RF"
-            ]
+            if human_tag in excluded_labels:
+                logging.info("Excluding %s", human_tag)
+                continue
+            if human_tag in remapped_labels:
+                logging.info(
+                    "Remapping %s to %s", human_tag, remapped_labels[human_tag]
+                )
+                human_tag = remapped_labels[human_tag]
+            # if human_tag not in labels:
+            # logging.info("Excluding %s", human_tag)
+
+            ai_tags = []
+            for tag in tags:
+                if tag.get("automatic") is True:
+                    data = tag.get("data", {})
+                    if isinstance(data, str):
+                        if data == "Master":
+                            ai_tags.append(tag["what"])
+                    elif data.get("name") == "Master":
+                        ai_tags.append(tag["what"])
+
             y_true.append(human_tag)
-            if len(ai_tag) == 0:
+            if human_tag not in labels:
+                labels.append(human_tag)
+            if len(ai_tags) == 0:
                 y_pred.append("None")
             else:
-                y_pred.append(ai_tag[0])
+                y_pred.append(ai_tags[0])
+                if ai_tags[0] not in labels:
+                    labels.append(ai_tags[0])
 
     cm = confusion_matrix(y_true, y_pred, labels=labels)
     # Log the confusion matrix as an image summary.
@@ -288,24 +344,26 @@ def evalute_prod_confusion(dir, confusion_file):
 
 EXCLUDED_TAGS = ["poor tracking", "part", "untagged", "unidentified"]
 worker_model = None
+after_date = None
 
 
-def init_worker(model):
-    global worker_model
+def init_worker(model, date):
+    global worker_model, after_date
     worker_model = model
+    after_date = date
 
 
 def load_clip_data(cptv_file):
     # for clip in dataset.clips:
     reason = {}
     clip_db = RawDatabase(cptv_file)
-    clip = clip_db.get_clip_tracks(LoadConfig.DEFAULT_GROUPS)
+    clip = clip_db.get_clip_tracks(BuildConfig.DEFAULT_GROUPS)
     if clip is None:
         logging.warn("No clip for %s", cptv_file)
         return None
 
-    if filter_clip(clip, reason):
-        logging.info("Filtering %s", cptv_file)
+    if filter_clip(clip, None, None, reason, after_date=after_date):
+        # logging.info("Filtering %s", cptv_file)
         return None
     clip.tracks = [
         track for track in clip.tracks if not filter_track(track, EXCLUDED_TAGS, reason)
@@ -321,18 +379,21 @@ def load_clip_data(cptv_file):
     thermal_medians = np.uint16(thermal_medians)
     data = []
     for track in clip.tracks:
-        frames, preprocessed, masses = worker_model.preprocess(
-            clip_db, track, frames_per_classify=25, dont_filter=True
-        )
-        data.append(
-            (
-                f"{track.clip_id}-{track.get_id()}",
-                track.label,
-                frames,
-                preprocessed,
-                masses,
+        try:
+            frames, preprocessed, masses = worker_model.preprocess(
+                clip_db, track, frames_per_classify=25, dont_filter=True, min_segments=1
             )
-        )
+            data.append(
+                (
+                    f"{track.clip_id}-{track.get_id()}",
+                    track.label,
+                    frames,
+                    preprocessed,
+                    masses,
+                )
+            )
+        except:
+            logging.error("Could not load %s", clip.clip_id, exc_info=True)
     return data
 
 
@@ -350,6 +411,7 @@ def evaluate_dir(
     split_file=None,
     split_dataset="test",
     threshold=0.5,
+    after_date=None,
 ):
     logging.info("Evaluating cptv files in %s with threshold %s", dir, threshold)
 
@@ -375,25 +437,40 @@ def evaluate_dir(
     # files = files[:8]
     start = time.time()
     # quite faster with just one process for loading and using main process for predicting
-    with Pool(processes=1, initializer=init_worker, initargs=(model,)) as pool:
+    with Pool(
+        processes=1,
+        initializer=init_worker,
+        initargs=(
+            model,
+            after_date,
+        ),
+    ) as pool:
         for clip_data in pool.imap_unordered(load_clip_data, files):
             if clip_data is None:
                 continue
             for data in clip_data:
                 label = data[1]
                 preprocessed = data[3]
+                if len(preprocessed) == 0:
+                    logging.info("No data found for %s", data[0])
+                    y_true.append(label_mapping.get(label, label))
+                    y_pred.append("None")
+                    continue
                 output = model.predict(preprocessed)
+
                 prediction = TrackPrediction(data[0], model.labels)
                 masses = np.array(data[4])
                 masses = masses[:, None]
                 top_score = None
-                # if model.params.multi_label is True:
-                #     # every label could be 1 for each prediction
-                #     top_score = len(output)
+                if model.params.multi_label is True:
+                    #     # every label could be 1 for each prediction
+                    top_score = np.sum(masses)
                 #     smoothed = output
                 # else:
-                # smoothed = output * output * masses
-                prediction.classified_clip(output, output, data[2], top_score=top_score)
+                smoothed = output * masses
+                prediction.classified_clip(
+                    output, smoothed, data[2], masses, top_score=top_score
+                )
                 y_true.append(label_mapping.get(label, label))
                 predicted_labels = [prediction.predicted_tag()]
                 confidence = prediction.max_score
@@ -407,16 +484,13 @@ def evaluate_dir(
                     predicted_tag = ",".join(predicted_labels)
                     y_pred.append(predicted_tag)
                 if y_pred[-1] != y_true[-1]:
-                    print(
+                    logging.info(
+                        "%s predicted %s but should be %s with confidence %s",
                         data[0],
-                        "Got a prediction of",
                         y_pred[-1],
-                        " should be ",
                         label,
                         np.round(100 * prediction.class_best_score),
                     )
-                # if predicted_tag not in model.labels:
-                # model.labels.append(predicted_tag)
     model.labels.append("None")
     model.labels.append("unidentified")
     cm = confusion_matrix(y_true, y_pred, labels=model.labels)
@@ -466,53 +540,69 @@ def main():
     if args.weights:
         weights = model_file / args.weights
     base_dir = Path(config.base_folder) / "training-data"
-
-    model = KerasModel(train_config=config.train)
-    model.load_model(model_file, training=False, weights=weights)
-
-    if args.evaluate_dir:
-        evaluate_dir(
-            model,
-            Path(args.evaluate_dir),
-            config,
-            args.confusion,
-            args.split_file,
-            args.dataset,
-            threshold=args.threshold,
-        )
-    elif args.dataset:
-        model.load_training_meta(base_dir)
-        if model.params.multi_label:
-            model.labels.append("land-bird")
-        excluded, remapped = get_excluded(model.data_type)
-        files = base_dir / args.dataset
-        dataset, _, new_labels, _ = get_dataset(
-            files,
-            model.data_type,
-            model.labels,
-            batch_size=64,
-            image_size=model.params.output_dim[:2],
-            preprocess_fn=model.preprocess_fn,
-            augment=False,
-            resample=False,
-            include_features=model.params.mvm,
-            one_hot=True,
-            deterministic=True,
-            shuffle=False,
-            excluded_labels=excluded,
-            remapped_labels=remapped,
-            multi_label=model.params.multi_label,
-            include_track=True,
-            cache=True,
-            channels=model.params.channels,
+    if args.evaluate_dir and args.confusion_from_meta:
+        metadata_confusion(
+            Path(args.evaluate_dir), args.confusion, args.date, args.model_metadata
         )
-        model.labels = new_labels
-        logging.info(
-            "Dataset loaded %s, using labels %s",
-            args.dataset,
-            model.labels,
-        )
-        model.confusion_tracks(dataset, args.confusion, threshold=args.threshold)
+    else:
+
+        model = KerasModel(train_config=config.train)
+        model.load_model(model_file, training=False, weights=weights)
+        if args.evaluate_dir:
+            evaluate_dir(
+                model,
+                Path(args.evaluate_dir),
+                config,
+                args.confusion,
+                args.split_file,
+                args.dataset,
+                threshold=args.threshold,
+                after_date=args.date,
+            )
+        elif args.dataset:
+            model_labels = model.labels.copy()
+            model.load_training_meta(base_dir)
+            # model.labels = model_labels
+            if model.params.multi_label:
+                model.labels.append("land-bird")
+            excluded, remapped = get_excluded(model.data_type)
+
+            if model.params.excluded_labels is not None:
+                excluded = model.params.excluded_labels
+
+            if model.params.remapped_labels is not None:
+                remapped = model.params.remapped_labels
+
+            files = base_dir / args.dataset
+            dataset, _, new_labels, _ = get_dataset(
+                files,
+                model.data_type,
+                model.labels,
+                model_labels=model_labels,
+                batch_size=64,
+                image_size=model.params.output_dim[:2],
+                preprocess_fn=model.preprocess_fn,
+                augment=False,
+                resample=False,
+                include_features=model.params.mvm,
+                one_hot=True,
+                deterministic=True,
+                shuffle=False,
+                excluded_labels=excluded,
+                remapped_labels=remapped,
+                multi_label=model.params.multi_label,
+                include_track=True,
+                cache=True,
+                channels=model.params.channels,
+                num_frames=model.params.square_width**2,
+            )
+            model.labels = new_labels
+            logging.info(
+                "Dataset loaded %s, using labels %s",
+                args.dataset,
+                model.labels,
+            )
+            model.confusion_tracks(dataset, args.confusion, threshold=args.threshold)
 
 
 if __name__ == "__main__":
diff --git a/src/rebuildDate.py b/src/rebuildDate.py
index dadf1a6a..661e2d60 100644
--- a/src/rebuildDate.py
+++ b/src/rebuildDate.py
@@ -5,15 +5,23 @@
 from config.config import Config
 from datetime import timedelta
 from datetime import date
+from pathlib import Path
+from dateutil.parser import parse as parse_date
 
 parser = argparse.ArgumentParser()
-parser.add_argument("-c", "--config-file", help="Path to config file to use")
+parser.add_argument("data_dir", help="Directory of cptv files")
 args = parser.parse_args()
+args.data_dir = Path(args.data_dir)
+latest_date = None
+for db_clip in args.data_dir.glob(f"**/*.cptv"):
+    file_name = db_clip.name
+    hyphen = file_name.index("-")
+    date_s = file_name[hyphen + 1 : hyphen + 16]
+    cptv_dt = parse_date(date_s)
+    if latest_date is None or cptv_dt > latest_date:
+        latest_date = cptv_dt
 
-config = Config.load_from_file(args.config_file)
-db_file = os.path.join(config.tracks_folder, "dataset.hdf5")
-db = TrackDatabase(db_file)
-latest_date = db.latest_date()
-month_ago = latest_date - timedelta(days=30)
+
+month_ago = latest_date - timedelta(days=30 * 6)
 month_ago = month_ago.strftime("%Y-%m-%d 00:00:00")
 print(month_ago)
diff --git a/src/track/clip.py b/src/track/clip.py
index e21ccbc8..72cc489d 100644
--- a/src/track/clip.py
+++ b/src/track/clip.py
@@ -185,7 +185,6 @@ def calculate_background(self, frame_reader):
             self.update_background(frame.pix)
             self._background_calculated()
             return
-
         first_frame = frame
         initial_frames = None
         initial_diff = None
diff --git a/src/track/track.py b/src/track/track.py
index 5550391c..f265b014 100644
--- a/src/track/track.py
+++ b/src/track/track.py
@@ -439,11 +439,13 @@ def get_segments(
         repeats=1,
         min_frames=0,
         segment_frames=None,
-        segment_type=SegmentType.ALL_RANDOM,
+        segment_types=[SegmentType.ALL_RANDOM],
         from_last=None,
         max_segments=None,
         ffc_frames=None,
         dont_filter=False,
+        filter_by_fp=False,
+        min_segments=1,
     ):
         if from_last is not None:
             if from_last == 0:
@@ -485,13 +487,13 @@ def get_segments(
                 regions=regions,
                 ffc_frames=ffc_frames,
                 repeats=repeats,
-                # frame_temp_median=frame_temp_median,
                 min_frames=min_frames,
-                segment_frames=None,
-                segment_type=segment_type,
+                segment_types=segment_types,
                 max_segments=max_segments,
                 dont_filter=dont_filter,
+                min_segments=min_segments,
             )
+
         return segments
 
     @classmethod
diff --git a/src/train.py b/src/train.py
index 5091b78b..16677575 100644
--- a/src/train.py
+++ b/src/train.py
@@ -45,6 +45,7 @@ def load_config():
     parser.add_argument("-w", "--weights", help="Fine tune using these weights")
     parser.add_argument("-i", "--ignore", help="Ignore clips in this file")
     parser.add_argument("-e", "--epochs", type=int, help="Epochs to train")
+    parser.add_argument("-f", "--fine_tune", help="Model to fine tune")
 
     parser.add_argument(
         "name",
@@ -67,6 +68,7 @@ def main():
         weights=args.weights,
         ignore=args.ignore,
         epochs=args.epochs,
+        fine_tune=args.fine_tune,
     )
 
 
diff --git a/src/train/train.py b/src/train/train.py
index 880678b0..60af5ca8 100644
--- a/src/train/train.py
+++ b/src/train/train.py
@@ -28,7 +28,7 @@ def remove_fp_segments(datasets, ignore_file):
                 print("deleting segment", segment.unique_track_id)
         for delete in delete_me:
             try:
-                datset.remove_track(delete.track_id)
+                dataset.remove_track(delete.track_id)
             except:
                 pass
             dataset.segments.remove(delete)
@@ -44,6 +44,7 @@ def train_model(
     do_grid_search=None,
     ignore=None,
     epochs=None,
+    fine_tune=None,
 ):
     init_logging()
     """Trains a model with the given hyper parameters."""
@@ -77,6 +78,7 @@ def train_model(
             weights=weights,
             resample=False,
             rebalance=False,
+            fine_tune=fine_tune,
         )
     except KeyboardInterrupt:
         pass