From 9ec7d0aa4fc76ea5f110e832b9b606a7d1b8a128 Mon Sep 17 00:00:00 2001 From: JulioJerez Date: Mon, 16 Sep 2024 02:48:56 -0700 Subject: [PATCH] using bas name --- .../demos/ndAdvancedIndustrialRobot.cpp | 5 +- .../ndSandbox/demos/ndCartpoleContinue.cpp | 88 +++++++++++++------ .../ndSandbox/demos/ndCartpoleDiscrete.cpp | 63 ++++++++++--- 3 files changed, 112 insertions(+), 44 deletions(-) diff --git a/newton-4.00/applications/ndSandbox/demos/ndAdvancedIndustrialRobot.cpp b/newton-4.00/applications/ndSandbox/demos/ndAdvancedIndustrialRobot.cpp index ccaeed7bf..4310d677c 100644 --- a/newton-4.00/applications/ndSandbox/demos/ndAdvancedIndustrialRobot.cpp +++ b/newton-4.00/applications/ndSandbox/demos/ndAdvancedIndustrialRobot.cpp @@ -1106,6 +1106,7 @@ namespace ndAdvancedRobot m_master = ndSharedPtr(new ndBrainAgentContinuePolicyGradient_TrainerMaster(hyperParameters)); m_bestActor = ndSharedPtr(new ndBrain(*m_master->GetActor())); + snprintf(name, sizeof(name), "%s.dnn", CONTROLLER_NAME); m_master->SetName(name); @@ -1331,8 +1332,10 @@ void ndAdvancedIndustrialRobot(ndDemoEntityManager* const scene) ndSharedPtr fixJoint(new ndJointFix6dof(model->GetRoot()->m_body->GetMatrix(), model->GetRoot()->m_body->GetAsBodyKinematic(), floor)); world->AddJoint(fixJoint); + char name[256]; char fileName[256]; - ndGetWorkingFileName(CONTROLLER_NAME, fileName); + snprintf(name, sizeof(name), "%s.dnn", CONTROLLER_NAME); + ndGetWorkingFileName(name, fileName); ndSharedPtr brain(ndBrainLoad::Load(fileName)); model->SetNotifyCallback(new RobotModelNotify(brain, model, true)); diff --git a/newton-4.00/applications/ndSandbox/demos/ndCartpoleContinue.cpp b/newton-4.00/applications/ndSandbox/demos/ndCartpoleContinue.cpp index efa732000..291bb43f2 100644 --- a/newton-4.00/applications/ndSandbox/demos/ndCartpoleContinue.cpp +++ b/newton-4.00/applications/ndSandbox/demos/ndCartpoleContinue.cpp @@ -24,7 +24,9 @@ namespace ndCarpole_1 { //#define ND_TRAIN_AGENT - #define CONTROLLER_NAME "cartpoleContinue-vpg.dnn" + #define CONTROLLER_NAME "cartpoleContinue" + + #define CONTROLLER_RESUME_TRANING #define D_PUSH_ACCEL ndBrainFloat (15.0f) #define D_REWARD_MIN_ANGLE ndBrainFloat (20.0f * ndDegreeToRad) @@ -137,7 +139,6 @@ namespace ndCarpole_1 }; public: - RobotModelNotify(ndSharedPtr& master, ndModelArticulation* const robot) :ndModelNotify() ,m_controller(nullptr) @@ -166,6 +167,14 @@ namespace ndCarpole_1 ndAssert(0); } + //RobotModelNotify(const RobotModelNotify& src) + // :ndModelNotify(src) + // ,m_controller(src.m_controller) + //{ + // //Init(robot); + // ndAssert(0); + //} + ~RobotModelNotify() { if (m_controller) @@ -299,17 +308,19 @@ namespace ndCarpole_1 ,m_outFile(nullptr) ,m_timer(ndGetTimeInMicroseconds()) ,m_maxScore(ndFloat32(-1.0e10f)) + ,m_saveScore(m_maxScore) ,m_discountFactor(0.99f) - ,m_horizon(ndFloat32(1.0f) / (ndFloat32(1.0f) - m_discountFactor)) + //,m_horizon(ndFloat32(1.0f) / (ndFloat32(1.0f) - m_discountFactor)) ,m_lastEpisode(0xfffffff) ,m_stopTraining(100 * 1000000) ,m_modelIsTrained(false) { - ndWorld* const world = scene->GetWorld(); - - m_outFile = fopen("cartpole-vpg.csv", "wb"); + char name[256]; + snprintf(name, sizeof(name), "%s-vpg.csv", CONTROLLER_NAME); + m_outFile = fopen(name, "wb"); fprintf(m_outFile, "vpg\n"); + m_horizon = ndFloat32(1.0f) / (ndFloat32(1.0f) - m_discountFactor); ndBrainAgentContinuePolicyGradient_TrainerMaster::HyperParameters hyperParameters; hyperParameters.m_extraTrajectorySteps = 256; @@ -321,14 +332,26 @@ namespace ndCarpole_1 m_master = ndSharedPtr(new ndBrainAgentContinuePolicyGradient_TrainerMaster(hyperParameters)); m_bestActor = ndSharedPtr< ndBrain>(new ndBrain(*m_master->GetActor())); - m_master->SetName(CONTROLLER_NAME); + snprintf(name, sizeof(name), "%s.dnn", CONTROLLER_NAME); + m_master->SetName(name); - ndModelArticulation* const visualModel = CreateModel(scene, matrix); - visualModel->AddToWorld(world); + #ifdef CONTROLLER_RESUME_TRANING + char fileName[256]; + snprintf(name, sizeof(name), "%s_critic.dnn", CONTROLLER_NAME); + ndGetWorkingFileName(name, fileName); + ndSharedPtr critic(ndBrainLoad::Load(fileName)); + m_master->GetCritic()->CopyFrom(**critic); - #ifdef ND_TRAIN_AGENT - visualModel->SetNotifyCallback(new RobotModelNotify(m_master, visualModel)); + snprintf(name, sizeof(name), "%s_actor.dnn", CONTROLLER_NAME); + ndGetWorkingFileName(name, fileName); + ndSharedPtr actor(ndBrainLoad::Load(fileName)); + m_master->GetActor()->CopyFrom(**actor); #endif + + ndWorld* const world = scene->GetWorld(); + ndModelArticulation* const visualModel = CreateModel(scene, matrix); + visualModel->AddToWorld(world); + visualModel->SetNotifyCallback(new RobotModelNotify(m_master, visualModel)); SetMaterial(visualModel); // add a hidden battery of model to generate trajectories in parallel @@ -337,25 +360,12 @@ namespace ndCarpole_1 { ndMatrix location(matrix); location.m_posit.m_x += 3.0f * (ndRand() - 0.5f); - #if 0 - //ndSharedPtr agent(new ndRobot::ndControllerTrainer(m_master)); - //ndSharedPtr model(CreateModel(scene, location, agent)); - ndModelArticulation* const model = (ndModelArticulation*)visualModel->Clone(); - - //world->AddModel(model); - ////HideModel(model); - //SetMaterial(model); - #else ndModelArticulation* const model = CreateModel(scene, location); model->AddToWorld(world); - #ifdef ND_TRAIN_AGENT - model->SetNotifyCallback(new RobotModelNotify(m_master, model)); - #endif + model->SetNotifyCallback(new RobotModelNotify(m_master, model)); SetMaterial(model); - #endif - - scene->SetAcceleratedUpdate(); } + scene->SetAcceleratedUpdate(); } ~TrainingUpdata() @@ -467,6 +477,24 @@ namespace ndCarpole_1 } } + if (rewardTrajectory > m_saveScore) + { + char fileName[256]; + m_saveScore = ndFloor(rewardTrajectory) + 2.0f; + + // save partial controller in case of crash + ndBrain* const actor = m_master->GetActor(); + char name[256]; + snprintf(name, sizeof(name), "%s_actor.dnn", CONTROLLER_NAME); + ndGetWorkingFileName(name, fileName); + actor->SaveToFile(fileName); + + ndBrain* const critic = m_master->GetCritic(); + snprintf(name, sizeof(name), "%s_critic.dnn", CONTROLLER_NAME); + ndGetWorkingFileName(name, fileName); + critic->SaveToFile(fileName); + } + if (episodeCount && !m_master->IsSampling()) { ndExpandTraceMessage("steps: %d\treward: %g\t trajectoryFrames: %g\n", m_master->GetFramesCount(), 100.0f * m_master->GetAverageScore() / m_horizon, m_master->GetAverageFrames()); @@ -490,8 +518,6 @@ namespace ndCarpole_1 ndUnsigned64 timer = ndGetTimeInMicroseconds() - m_timer; ndExpandTraceMessage("training time: %g seconds\n", ndFloat32(ndFloat64(timer) * ndFloat32(1.0e-6f))); - //ndGetWorkingFileName(CRITIC_NAME, fileName); - //m_master->GetCritic()->SaveToFile(fileName); manager->Terminate(); } } @@ -501,6 +527,7 @@ namespace ndCarpole_1 FILE* m_outFile; ndUnsigned64 m_timer; ndFloat32 m_maxScore; + ndFloat32 m_saveScore; ndFloat32 m_discountFactor; ndFloat32 m_horizon; ndUnsigned32 m_lastEpisode; @@ -529,8 +556,11 @@ void ndCartpoleContinue(ndDemoEntityManager* const scene) model->AddToWorld(world); // add the deep learning controller + char name[256]; char fileName[256]; - ndGetWorkingFileName(CONTROLLER_NAME, fileName); + snprintf(name, sizeof(name), "%s.dnn", CONTROLLER_NAME); + ndGetWorkingFileName(name, fileName); + ndSharedPtr brain(ndBrainLoad::Load(fileName)); model->SetNotifyCallback(new RobotModelNotify(brain, model)); #endif diff --git a/newton-4.00/applications/ndSandbox/demos/ndCartpoleDiscrete.cpp b/newton-4.00/applications/ndSandbox/demos/ndCartpoleDiscrete.cpp index 3b47503fb..30b967bb2 100644 --- a/newton-4.00/applications/ndSandbox/demos/ndCartpoleDiscrete.cpp +++ b/newton-4.00/applications/ndSandbox/demos/ndCartpoleDiscrete.cpp @@ -24,8 +24,9 @@ namespace ndCarpole_0 { //#define ND_TRAIN_AGENT - #define CONTROLLER_NAME "cartpoleDiscrete-vpg.dnn" - //#define CRITIC_NAME "cartpoleDiscreteCritic-vpg.dnn" + #define CONTROLLER_NAME "cartpoleDiscrete" + + #define CONTROLLER_RESUME_TRANING #define D_PUSH_ACCEL ndFloat32 (15.0f) #define D_REWARD_MIN_ANGLE ndFloat32 (20.0f * ndDegreeToRad) @@ -94,7 +95,6 @@ namespace ndCarpole_0 ndUrdfFile urdf; char fileName[256]; ndGetWorkingFileName("cartpole.urdf", fileName); - //ndGetWorkingFileName("cartpoleHandMade.urdf", fileName); ndModelArticulation* const cartPole = urdf.Import(fileName); SetModelVisualMesh(scene, cartPole); @@ -190,7 +190,6 @@ namespace ndCarpole_0 ,m_controller(nullptr) ,m_controllerTrainer(nullptr) { - //m_timestep = 0.0f; m_controllerTrainer = new ndControllerTrainer(master); m_controllerTrainer->m_robot = this; Init(robot); @@ -208,6 +207,7 @@ namespace ndCarpole_0 RobotModelNotify(const RobotModelNotify& src) :ndModelNotify(src) + ,m_controller(src.m_controller) { //Init(robot); ndAssert(0); @@ -348,17 +348,18 @@ namespace ndCarpole_0 ,m_outFile(nullptr) ,m_timer(ndGetTimeInMicroseconds()) ,m_maxScore(ndFloat32(-1.0e10f)) + ,m_saveScore(m_maxScore) ,m_discountFactor(0.99f) - ,m_horizon(ndFloat32(1.0f) / (ndFloat32(1.0f) - m_discountFactor)) ,m_lastEpisode(0xffffffff) ,m_stopTraining(200 * 1000000) ,m_modelIsTrained(false) { - ndWorld* const world = scene->GetWorld(); - - m_outFile = fopen("cartpole-vpg.csv", "wb"); + char name[256]; + snprintf(name, sizeof(name), "%s-vpg.csv", CONTROLLER_NAME); + m_outFile = fopen(name, "wb"); fprintf(m_outFile, "vpg\n"); - + + m_horizon = ndFloat32(1.0f) / (ndFloat32(1.0f) - m_discountFactor); ndBrainAgentDiscretePolicyGradient_TrainerMaster::HyperParameters hyperParameters; hyperParameters.m_extraTrajectorySteps = 256; @@ -370,8 +371,23 @@ namespace ndCarpole_0 m_master = ndSharedPtr(new ndBrainAgentDiscretePolicyGradient_TrainerMaster(hyperParameters)); m_bestActor = ndSharedPtr< ndBrain>(new ndBrain(*m_master->GetActor())); - m_master->SetName(CONTROLLER_NAME); + snprintf(name, sizeof(name), "%s.dnn", CONTROLLER_NAME); + m_master->SetName(name); + + #ifdef CONTROLLER_RESUME_TRANING + char fileName[256]; + snprintf(name, sizeof(name), "%s_critic.dnn", CONTROLLER_NAME); + ndGetWorkingFileName(name, fileName); + ndSharedPtr critic(ndBrainLoad::Load(fileName)); + m_master->GetCritic()->CopyFrom(**critic); + + snprintf(name, sizeof(name), "%s_actor.dnn", CONTROLLER_NAME); + ndGetWorkingFileName(name, fileName); + ndSharedPtr actor(ndBrainLoad::Load(fileName)); + m_master->GetActor()->CopyFrom(**actor); + #endif + ndWorld* const world = scene->GetWorld(); ndModelArticulation* const visualModel = CreateModel(scene, matrix); visualModel->AddToWorld(world); visualModel->SetNotifyCallback(new RobotModelNotify(m_master, visualModel)); @@ -388,7 +404,6 @@ namespace ndCarpole_0 model->SetNotifyCallback(new RobotModelNotify(m_master, model)); SetMaterial(model); } - scene->SetAcceleratedUpdate(); } @@ -501,6 +516,24 @@ namespace ndCarpole_0 } } + if (rewardTrajectory > m_saveScore) + { + char fileName[256]; + m_saveScore = ndFloor(rewardTrajectory) + 2.0f; + + // save partial controller in case of crash + ndBrain* const actor = m_master->GetActor(); + char name[256]; + snprintf(name, sizeof(name), "%s_actor.dnn", CONTROLLER_NAME); + ndGetWorkingFileName(name, fileName); + actor->SaveToFile(fileName); + + ndBrain* const critic = m_master->GetCritic(); + snprintf(name, sizeof(name), "%s_critic.dnn", CONTROLLER_NAME); + ndGetWorkingFileName(name, fileName); + critic->SaveToFile(fileName); + } + if (episodeCount && !m_master->IsSampling()) { ndExpandTraceMessage("steps: %d\treward: %g\t trajectoryFrames: %g\n", m_master->GetFramesCount(), 100.0f * m_master->GetAverageScore() / m_horizon, m_master->GetAverageFrames()); @@ -524,8 +557,6 @@ namespace ndCarpole_0 ndUnsigned64 timer = ndGetTimeInMicroseconds() - m_timer; ndExpandTraceMessage("training time: %g seconds\n", ndFloat32(ndFloat64(timer) * ndFloat32(1.0e-6f))); - //ndGetWorkingFileName(CRITIC_NAME, fileName); - //m_master->GetCritic()->SaveToFile(fileName); manager->Terminate(); } } @@ -535,6 +566,7 @@ namespace ndCarpole_0 FILE* m_outFile; ndUnsigned64 m_timer; ndFloat32 m_maxScore; + ndFloat32 m_saveScore; ndFloat32 m_discountFactor; ndFloat32 m_horizon; ndUnsigned32 m_lastEpisode; @@ -564,8 +596,11 @@ void ndCartpoleDiscrete(ndDemoEntityManager* const scene) model->AddToWorld(world); // add the deep learning controller + char name[256]; char fileName[256]; - ndGetWorkingFileName(CONTROLLER_NAME, fileName); + snprintf(name, sizeof(name), "%s.dnn", CONTROLLER_NAME); + ndGetWorkingFileName(name, fileName); + ndSharedPtr brain(ndBrainLoad::Load(fileName)); model->SetNotifyCallback(new RobotModelNotify(brain, model)); #endif