From 2253eb6814456e46fa7db6f08ea3e00592e57c3f Mon Sep 17 00:00:00 2001 From: NovicaINS Date: Fri, 20 Dec 2024 17:25:21 +0100 Subject: [PATCH] fix: fixing delete trained model to prevent deleting base trained model, fix check in processing jobs before delete training/detection run, fix map controller active class not set, fix conda and root directory path error in start detection run, fixing x unit tests feat: clean up training run files --- .../TrainingServices/TrainingRunService.cs | 58 +++++++++++-- .../Controllers/DetectionController.cs | 15 ++++ .../Controllers/TrainingRunsController.cs | 87 +++++++++++++++++-- .../Views/Shared/_Sidebar.cshtml | 2 +- .../Services/DatasetServiceTests.cs | 2 +- .../TrainingRunsControllerTests.cs | 18 ++++ 6 files changed, 167 insertions(+), 15 deletions(-) diff --git a/MainApp.BL/Services/TrainingServices/TrainingRunService.cs b/MainApp.BL/Services/TrainingServices/TrainingRunService.cs index ba63dbd..0cf7608 100644 --- a/MainApp.BL/Services/TrainingServices/TrainingRunService.cs +++ b/MainApp.BL/Services/TrainingServices/TrainingRunService.cs @@ -298,12 +298,20 @@ public async Task StartTrainingRun(Guid trainingRunId) Directory.CreateDirectory(detectionCliLogsAbsPath); } - BufferedCommandResult powerShellResults = await Cli.Wrap(_MMDetectionConfiguration.GetCondaExeAbsPath()) - .WithWorkingDirectory(_MMDetectionConfiguration.GetRootDirAbsPath()) + string? condaExeAbsPath = _MMDetectionConfiguration.GetCondaExeAbsPath(); + if (!File.Exists(condaExeAbsPath)) + return ResultDTO.Fail($"Conda.exe file does not exist on this path: {condaExeAbsPath}"); + + string? rootDirAbsPath = _MMDetectionConfiguration.GetRootDirAbsPath(); + if (!Directory.Exists(rootDirAbsPath)) + return ResultDTO.Fail($"Root directory does not exist on this path: {rootDirAbsPath}"); + + BufferedCommandResult powerShellResults = await Cli.Wrap(condaExeAbsPath) + .WithWorkingDirectory(rootDirAbsPath) .WithValidation(CommandResultValidation.None) .WithArguments(trainingCommand.ToLower()) - .WithStandardOutputPipe(PipeTarget.ToFile(Path.Combine(_MMDetectionConfiguration.GetTrainingRunCliOutDirAbsPath(), $"succ_{trainingRunId}.txt"))) - .WithStandardErrorPipe(PipeTarget.ToFile(Path.Combine(_MMDetectionConfiguration.GetTrainingRunCliOutDirAbsPath(), $"error_{trainingRunId}.txt"))) + .WithStandardOutputPipe(PipeTarget.ToFile(Path.Combine(trainingCliLogsAbsPath, $"succ_{trainingRunId}.txt"))) + .WithStandardErrorPipe(PipeTarget.ToFile(Path.Combine(trainingCliLogsAbsPath, $"error_{trainingRunId}.txt"))) .ExecuteBufferedAsync(); if (powerShellResults.IsSuccess == false) @@ -643,6 +651,34 @@ public async Task DeleteTrainingRun(Guid trainingRunId, string wwwroo } } + //delete config folder for training run from mmdetection + string? configTrainingRunFolder = _MMDetectionConfiguration.GetTrainingRunConfigDirAbsPathByRunId(trainingRunId); + if (Directory.Exists(configTrainingRunFolder)) + { + try + { + Directory.Delete(configTrainingRunFolder, recursive: true); + } + catch (Exception ex) + { + return ResultDTO.Fail($"Failed to delete folder: {ex.Message}"); + } + } + + //delete data folder for training run from mmdetection + string datasetTrainingRunFolder = _MMDetectionConfiguration.GetTrainingRunDatasetDirAbsPath(trainingRunId); + if(Directory.Exists(datasetTrainingRunFolder)) + { + try + { + Directory.Delete(datasetTrainingRunFolder, recursive: true); + } + catch (Exception ex) + { + return ResultDTO.Fail($"Failed to delete folder: {ex.Message}"); + } + } + //get trained model entity ResultDTO? resultGetTrainedModel = await _trainedModelsRepository.GetById(resultGetEntity.Data.TrainedModelId!.Value, track: true); if (!resultGetTrainedModel.IsSuccess && resultGetTrainedModel.HandleError()) @@ -661,10 +697,16 @@ public async Task DeleteTrainingRun(Guid trainingRunId, string wwwroo List? trainingModelIdsList = resultGetAllTrainingRuns.Data.Where(x => x.Id != trainingRunId).Select(x => x.TrainedModelId).ToList(); if (!trainingModelIdsList.Contains(resultGetTrainedModel.Data.Id)) { - //detele trained model from db if it is not contained in other training runs - ResultDTO? resultDeleteTrainedModel = await _trainedModelsRepository.Delete(resultGetTrainedModel.Data); - if (!resultDeleteTrainedModel.IsSuccess && resultDeleteTrainedModel.HandleError()) - return ResultDTO.Fail(resultDeleteTrainedModel.ErrMsg!); + //check if the trained model is not base model to prevent deleting base trained model + if(resultGetTrainedModel.Data.BaseModelId != null) + { + //detele trained model from db if it is not contained in other training runs + ResultDTO? resultDeleteTrainedModel = await _trainedModelsRepository.Delete(resultGetTrainedModel.Data); + if (!resultDeleteTrainedModel.IsSuccess && resultDeleteTrainedModel.HandleError()) + return ResultDTO.Fail(resultDeleteTrainedModel.ErrMsg!); + } + + } //delete training run from db diff --git a/MainApp.MVC/Areas/IntranetPortal/Controllers/DetectionController.cs b/MainApp.MVC/Areas/IntranetPortal/Controllers/DetectionController.cs index 8b9f300..61ee1c6 100644 --- a/MainApp.MVC/Areas/IntranetPortal/Controllers/DetectionController.cs +++ b/MainApp.MVC/Areas/IntranetPortal/Controllers/DetectionController.cs @@ -531,6 +531,21 @@ public async Task DeleteDetectionRun(Guid detectionRunId) } } + JobList? processingJobs = monitoringApi.ProcessingJobs(0, int.MaxValue); + if (processingJobs == null) + return ResultDTO.Fail("Processing jobs not found"); + + foreach (KeyValuePair job in processingJobs) + { + string jobId = job.Key; + using (IStorageConnection connection = JobStorage.Current.GetConnection()) + { + string storedKey = connection.GetJobParameter(jobId, "detectionRunId"); + if (storedKey == detectionRunId.ToString()) + return ResultDTO.Fail("Can not delete detection run because it is in process"); + } + } + ResultDTO resultDeleteEntity = await _detectionRunService.DeleteDetectionRun(detectionRunId, _webHostEnvironment.WebRootPath); if (!resultDeleteEntity.IsSuccess && resultDeleteEntity.HandleError()) return ResultDTO.Fail(resultDeleteEntity.ErrMsg!); diff --git a/MainApp.MVC/Areas/IntranetPortal/Controllers/TrainingRunsController.cs b/MainApp.MVC/Areas/IntranetPortal/Controllers/TrainingRunsController.cs index e0ee855..383cde9 100644 --- a/MainApp.MVC/Areas/IntranetPortal/Controllers/TrainingRunsController.cs +++ b/MainApp.MVC/Areas/IntranetPortal/Controllers/TrainingRunsController.cs @@ -58,7 +58,7 @@ public TrainingRunsController(ITrainingRunService trainingRunService, _configuration = configuration; _mapper = mapper; } - + [HttpGet] [HasAuthClaim(nameof(SD.AuthClaims.ViewTrainingRuns))] public async Task Index() @@ -81,7 +81,7 @@ public async Task Index() { return HandleErrorRedirect("ErrorViewsPath:Error", 400); } - + } [HttpGet] @@ -181,6 +181,7 @@ public async Task ScheduleTrainingRun(TrainingRunViewModel viewModel) [HasAuthClaim(nameof(SD.AuthClaims.ScheduleTrainingRun))] public async Task ExecuteTrainingRunProcess(TrainingRunDTO trainingRunDTO, TrainingRunTrainParamsDTO trainingRunTrainParamsDTO) { + bool result = false; try { //int numEpochs = 1; @@ -238,6 +239,7 @@ public async Task ExecuteTrainingRunProcess(TrainingRunDTO trainingRu if (updateTrainRunResultSuccess.IsSuccess == false && updateTrainRunResultSuccess.HandleError()) return ResultDTO.Fail(updateTrainRunResultSuccess.ErrMsg!); + result = true; return ResultDTO.Ok(); } catch (Exception ex) @@ -246,7 +248,67 @@ public async Task ExecuteTrainingRunProcess(TrainingRunDTO trainingRu } finally { - // TODO: Clean Up Training Run Files, Later + //Clean Up Training Run Files + + //1. delete data folder for training run + string? datasetTrainingRunFolder = _MMDetectionConfiguration.GetTrainingRunDatasetDirAbsPath(trainingRunDTO.Id!.Value); + if (Directory.Exists(datasetTrainingRunFolder)) + Directory.Delete(datasetTrainingRunFolder, recursive: true); + + //2. delete config folder for training run from mmdetection + string? configTrainingRunFolder = _MMDetectionConfiguration.GetTrainingRunConfigDirAbsPathByRunId(trainingRunDTO.Id!.Value); + if (Directory.Exists(configTrainingRunFolder)) + Directory.Delete(configTrainingRunFolder, recursive: true); + + //3. delete epoches all if failed, except best if successfull (.pth only) + string? trainingRunFolderPath = Path.Combine(_MMDetectionConfiguration.GetTrainingRunsBaseOutDirAbsPath(), trainingRunDTO.Id!.Value.ToString()); + if (Directory.Exists(trainingRunFolderPath)) + { + //get all .pth files + string[]? pthFiles = Directory.GetFiles(trainingRunFolderPath, "*.pth", SearchOption.TopDirectoryOnly); + if (pthFiles != null && pthFiles.Length > 0) + { + //check the result of try catch block + if (result) + { + //get best epoch + ResultDTO? resultGetBestEpoch = _trainingRunService.GetBestEpochForTrainingRun(trainingRunDTO.Id!.Value); + if (resultGetBestEpoch.IsSuccess && resultGetBestEpoch.Data != null) + { + int bestEpoch = resultGetBestEpoch.Data.BestEpochMetrics.Epoch; + foreach (string? file in pthFiles) + { + if (file != null) + { + string? fileName = Path.GetFileNameWithoutExtension(file); + if (fileName != null && fileName.StartsWith("epoch_")) + { + string? numberPart = fileName.Substring("epoch_".Length); + if (numberPart != null && int.TryParse(numberPart, out int epochNumber)) + { + //delete all .pth files except best epoch + if (epochNumber != bestEpoch && System.IO.File.Exists(file)) + { + System.IO.File.Delete(file); + } + } + + } + } + } + } + } + else + { + //result is failed so delete all .pth files + foreach (string? file in pthFiles) + { + if (System.IO.File.Exists(file)) + System.IO.File.Delete(file); + } + } + } + } } } @@ -418,6 +480,21 @@ public async Task DeleteTrainingRun(Guid trainingRunId) } } + JobList? processingJobs = monitoringApi.ProcessingJobs(0, int.MaxValue); + if (processingJobs == null) + return ResultDTO.Fail("Processing jobs not found"); + + foreach (KeyValuePair job in processingJobs) + { + string jobId = job.Key; + using (IStorageConnection connection = JobStorage.Current.GetConnection()) + { + string storedKey = connection.GetJobParameter(jobId, "trainingRunId"); + if (storedKey == trainingRunId.ToString()) + return ResultDTO.Fail("Can not delete training run because it is in process"); + } + } + ResultDTO? resultDeleteEntity = await _trainingRunService.DeleteTrainingRun(trainingRunId, _webHostEnvironment.WebRootPath); if (!resultDeleteEntity.IsSuccess && resultDeleteEntity.HandleError()) return ResultDTO.Fail(resultDeleteEntity.ErrMsg!); @@ -513,9 +590,9 @@ private async Task CreateErrMsgFile(Guid trainingRunId, string errMsg return ResultDTO.Fail("Directory path not found"); string? filePath = System.IO.Path.Combine(_webHostEnvironment.WebRootPath, trainingRunsErrorLogsFolder.Data); - if (!Directory.Exists(filePath)) + if (!Directory.Exists(filePath)) Directory.CreateDirectory(filePath); - + string fileName = $"{trainingRunId}_errMsg.txt"; string? fullFilePath = System.IO.Path.Combine(filePath, fileName); if (fullFilePath == null) diff --git a/MainApp.MVC/Areas/IntranetPortal/Views/Shared/_Sidebar.cshtml b/MainApp.MVC/Areas/IntranetPortal/Views/Shared/_Sidebar.cshtml index 5be5e5b..0652f2b 100644 --- a/MainApp.MVC/Areas/IntranetPortal/Views/Shared/_Sidebar.cshtml +++ b/MainApp.MVC/Areas/IntranetPortal/Views/Shared/_Sidebar.cshtml @@ -13,7 +13,7 @@