From a3271fa99cc045cf2f0fcb79068a65c658fed0c1 Mon Sep 17 00:00:00 2001 From: Campbell Allen Date: Thu, 30 Mar 2023 15:41:58 +1000 Subject: [PATCH] add note on avoiding low data training runs --- app/sidekiq/retrain_zoobot_job.rb | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/app/sidekiq/retrain_zoobot_job.rb b/app/sidekiq/retrain_zoobot_job.rb index 86c9b38..3774b49 100644 --- a/app/sidekiq/retrain_zoobot_job.rb +++ b/app/sidekiq/retrain_zoobot_job.rb @@ -26,6 +26,12 @@ def perform(context_id) Export::TrainingData.new(training_data_export).run end + # this is where we could intercept the training job submission + # to avoid a training run if there isn't enough data for a viable model + # one idea would be to check the number of rows in the training data export attached file + # or even better we store the number of exported rows in the training data export model + # https://github.com/zooniverse/kade/issues/62 + # create a new training job record to track the batch training job training_job = create_training_job(training_data_export.storage_path, training_context.workflow_id) # submit the export training job to the batch training service