From da69671c0dba5b801e13d21fee52fc1e586b654c Mon Sep 17 00:00:00 2001
From: Mike Walmsley <walmsleymk1@gmail.com>
Date: Fri, 1 Oct 2021 18:44:51 +0100
Subject: [PATCH] Add a few warnings about uint vs float

---
 make_predictions.py                    | 2 +-
 train_model.py                         | 2 +-
 zoobot/data_utils/tfrecord_datasets.py | 1 +
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/make_predictions.py b/make_predictions.py
index 80496359..00cc8141 100755
--- a/make_predictions.py
+++ b/make_predictions.py
@@ -46,7 +46,7 @@
         label_cols=[],  # no labels are needed, we're only doing predictions
         input_size=initial_size,
         make_greyscale=True,
-        normalise_from_uint8=True
+        normalise_from_uint8=True  # False for tfrecords with 0-1 floats, True for png/jpg with 0-255 uints
     )
     image_ds = preprocess.preprocess_dataset(raw_image_ds, preprocessing_config)
     # image_ds will give batches of (images, paths) when label_cols=[]
diff --git a/train_model.py b/train_model.py
index 383a9165..1c075d73 100755
--- a/train_model.py
+++ b/train_model.py
@@ -103,7 +103,7 @@
         label_cols=schema.label_cols,
         input_size=initial_size,
         make_greyscale=greyscale,
-        normalise_from_uint8=False
+        normalise_from_uint8=False  # False for tfrecords with 0-1 floats, True for png/jpg with 0-255 uints
     )
     train_dataset = preprocess.preprocess_dataset(raw_train_dataset, preprocess_config)
     test_dataset = preprocess.preprocess_dataset(raw_test_dataset, preprocess_config)
diff --git a/zoobot/data_utils/tfrecord_datasets.py b/zoobot/data_utils/tfrecord_datasets.py
index 1c3fd1a3..41bb4d14 100755
--- a/zoobot/data_utils/tfrecord_datasets.py
+++ b/zoobot/data_utils/tfrecord_datasets.py
@@ -144,4 +144,5 @@ def construct_feature_spec(expected_features: Dict) -> Dict:
 def cast_bytes_of_uint8_to_float32(some_bytes):
     # bytes are uint of range 0-255 (i.e. pixels)
     # floats are 0-1 by convention (and may be clipped if not)
+    # tfrecord datasets will be saved as 0-1 floats and so do NOT need dividing again (see preprocess.py, normalise_from_uint8 should be False)
     return tf.cast(tf.io.decode_raw(some_bytes, out_type=tf.uint8), tf.float32) / 255.