diff --git a/bajor/batch/predictions.py b/bajor/batch/predictions.py index 6644096..bd23a4a 100644 --- a/bajor/batch/predictions.py +++ b/bajor/batch/predictions.py @@ -88,6 +88,12 @@ def create_batch_job(job_id, manifest_url, pool_id, checkpoint_target='ZOOBOT_CH job.job_preparation_task = batchmodels.JobPreparationTask( command_line=f'/bin/bash -c \"set -ex; {create_results_dir}; {copy_code_to_shared_dir}\"', constraints=batchmodels.TaskConstraints(max_task_retry_count=3), + user_identity = batchmodels.UserIdentity( + auto_user=batchmodels.AutoUserSpecification( + scope=batchmodels.AutoUserScope.task, + elevation_level=batchmodels.ElevationLevel.admin + ) + ), # # A busted preparation task means the main task won't launch...ever! # and leave the node in a scaled state costing $$ ££ diff --git a/bajor/batch/train_finetuning.py b/bajor/batch/train_finetuning.py index 6902698..fd146d3 100644 --- a/bajor/batch/train_finetuning.py +++ b/bajor/batch/train_finetuning.py @@ -116,6 +116,12 @@ def create_batch_job(job_id, manifest_container_path, pool_id, checkpoint_target job.job_preparation_task = batchmodels.JobPreparationTask( command_line=f'/bin/bash -c \"set -ex; {setup_pytorch_kernel_cache_dir}; {create_results_dir}; {copy_code_to_shared_dir}\"', constraints=batchmodels.TaskConstraints(max_task_retry_count=3), + user_identity = batchmodels.UserIdentity( + auto_user=batchmodels.AutoUserSpecification( + scope=batchmodels.AutoUserScope.task, + elevation_level=batchmodels.ElevationLevel.admin + ) + ), # # A busted preparation task means the main task won't launch...ever! # and leave the node in a scaled state costing $$ ££