From 9ec7d0aa4fc76ea5f110e832b9b606a7d1b8a128 Mon Sep 17 00:00:00 2001
From: JulioJerez <jerezjulio0@gmail.com>
Date: Mon, 16 Sep 2024 02:48:56 -0700
Subject: [PATCH] using bas name

---
 .../demos/ndAdvancedIndustrialRobot.cpp       |  5 +-
 .../ndSandbox/demos/ndCartpoleContinue.cpp    | 88 +++++++++++++------
 .../ndSandbox/demos/ndCartpoleDiscrete.cpp    | 63 ++++++++++---
 3 files changed, 112 insertions(+), 44 deletions(-)
diff --git a/newton-4.00/applications/ndSandbox/demos/ndAdvancedIndustrialRobot.cpp b/newton-4.00/applications/ndSandbox/demos/ndAdvancedIndustrialRobot.cpp
index ccaeed7bf..4310d677c 100644
--- a/newton-4.00/applications/ndSandbox/demos/ndAdvancedIndustrialRobot.cpp
+++ b/newton-4.00/applications/ndSandbox/demos/ndAdvancedIndustrialRobot.cpp
@@ -1106,6 +1106,7 @@ namespace ndAdvancedRobot
 
 			m_master = ndSharedPtr<ndBrainAgentContinuePolicyGradient_TrainerMaster>(new ndBrainAgentContinuePolicyGradient_TrainerMaster(hyperParameters));
 			m_bestActor = ndSharedPtr<ndBrain>(new ndBrain(*m_master->GetActor()));
+
 			snprintf(name, sizeof(name), "%s.dnn", CONTROLLER_NAME);
 			m_master->SetName(name);
 
@@ -1331,8 +1332,10 @@ void ndAdvancedIndustrialRobot(ndDemoEntityManager* const scene)
 	ndSharedPtr<ndJointBilateralConstraint> fixJoint(new ndJointFix6dof(model->GetRoot()->m_body->GetMatrix(), model->GetRoot()->m_body->GetAsBodyKinematic(), floor));
 	world->AddJoint(fixJoint);
 
+	char name[256];
 	char fileName[256];
-	ndGetWorkingFileName(CONTROLLER_NAME, fileName);
+	snprintf(name, sizeof(name), "%s.dnn", CONTROLLER_NAME);
+	ndGetWorkingFileName(name, fileName);
 	ndSharedPtr<ndBrain> brain(ndBrainLoad::Load(fileName));
 	model->SetNotifyCallback(new RobotModelNotify(brain, model, true));
 	
diff --git a/newton-4.00/applications/ndSandbox/demos/ndCartpoleContinue.cpp b/newton-4.00/applications/ndSandbox/demos/ndCartpoleContinue.cpp
index efa732000..291bb43f2 100644
--- a/newton-4.00/applications/ndSandbox/demos/ndCartpoleContinue.cpp
+++ b/newton-4.00/applications/ndSandbox/demos/ndCartpoleContinue.cpp
@@ -24,7 +24,9 @@
 namespace ndCarpole_1
 {
 	//#define ND_TRAIN_AGENT
-	#define CONTROLLER_NAME			"cartpoleContinue-vpg.dnn"
+	#define CONTROLLER_NAME			"cartpoleContinue"
+
+	#define CONTROLLER_RESUME_TRANING
 
 	#define D_PUSH_ACCEL			ndBrainFloat (15.0f)
 	#define D_REWARD_MIN_ANGLE		ndBrainFloat (20.0f * ndDegreeToRad)
@@ -137,7 +139,6 @@ namespace ndCarpole_1
 		};
 
 		public:
-
 		RobotModelNotify(ndSharedPtr<ndBrainAgentContinuePolicyGradient_TrainerMaster>& master, ndModelArticulation* const robot)
 			:ndModelNotify()
 			,m_controller(nullptr)
@@ -166,6 +167,14 @@ namespace ndCarpole_1
 			ndAssert(0);
 		}
 
+		//RobotModelNotify(const RobotModelNotify& src)
+		//	:ndModelNotify(src)
+		//	,m_controller(src.m_controller)
+		//{
+		//	//Init(robot);
+		//	ndAssert(0);
+		//}
+
 		~RobotModelNotify()
 		{
 			if (m_controller)
@@ -299,17 +308,19 @@ namespace ndCarpole_1
 			,m_outFile(nullptr)
 			,m_timer(ndGetTimeInMicroseconds())
 			,m_maxScore(ndFloat32(-1.0e10f))
+			,m_saveScore(m_maxScore)
 			,m_discountFactor(0.99f)
-			,m_horizon(ndFloat32(1.0f) / (ndFloat32(1.0f) - m_discountFactor))
+			//,m_horizon(ndFloat32(1.0f) / (ndFloat32(1.0f) - m_discountFactor))
 			,m_lastEpisode(0xfffffff)
 			,m_stopTraining(100 * 1000000)
 			,m_modelIsTrained(false)
 		{
-			ndWorld* const world = scene->GetWorld();
-
-			m_outFile = fopen("cartpole-vpg.csv", "wb");
+			char name[256];
+			snprintf(name, sizeof(name), "%s-vpg.csv", CONTROLLER_NAME);
+			m_outFile = fopen(name, "wb");
 			fprintf(m_outFile, "vpg\n");
 
+			m_horizon = ndFloat32(1.0f) / (ndFloat32(1.0f) - m_discountFactor);
 			ndBrainAgentContinuePolicyGradient_TrainerMaster::HyperParameters hyperParameters;
 
 			hyperParameters.m_extraTrajectorySteps = 256;
@@ -321,14 +332,26 @@ namespace ndCarpole_1
 			m_master = ndSharedPtr<ndBrainAgentContinuePolicyGradient_TrainerMaster>(new ndBrainAgentContinuePolicyGradient_TrainerMaster(hyperParameters));
 			m_bestActor = ndSharedPtr< ndBrain>(new ndBrain(*m_master->GetActor()));
 
-			m_master->SetName(CONTROLLER_NAME);
+			snprintf(name, sizeof(name), "%s.dnn", CONTROLLER_NAME);
+			m_master->SetName(name);
 
-			ndModelArticulation* const visualModel = CreateModel(scene, matrix);
-			visualModel->AddToWorld(world);
+			#ifdef CONTROLLER_RESUME_TRANING
+				char fileName[256];
+				snprintf(name, sizeof(name), "%s_critic.dnn", CONTROLLER_NAME);
+				ndGetWorkingFileName(name, fileName);
+				ndSharedPtr<ndBrain> critic(ndBrainLoad::Load(fileName));
+				m_master->GetCritic()->CopyFrom(**critic);
 
-			#ifdef ND_TRAIN_AGENT
-				visualModel->SetNotifyCallback(new RobotModelNotify(m_master, visualModel));
+				snprintf(name, sizeof(name), "%s_actor.dnn", CONTROLLER_NAME);
+				ndGetWorkingFileName(name, fileName);
+				ndSharedPtr<ndBrain> actor(ndBrainLoad::Load(fileName));
+				m_master->GetActor()->CopyFrom(**actor);
 			#endif
+
+			ndWorld* const world = scene->GetWorld();
+			ndModelArticulation* const visualModel = CreateModel(scene, matrix);
+			visualModel->AddToWorld(world);
+			visualModel->SetNotifyCallback(new RobotModelNotify(m_master, visualModel));
 			SetMaterial(visualModel);
 
 			// add a hidden battery of model to generate trajectories in parallel
@@ -337,25 +360,12 @@ namespace ndCarpole_1
 			{
 				ndMatrix location(matrix);
 				location.m_posit.m_x += 3.0f * (ndRand() - 0.5f);
-				#if 0
-				//ndSharedPtr<ndBrainAgent> agent(new ndRobot::ndControllerTrainer(m_master));
-				//ndSharedPtr<ndModel> model(CreateModel(scene, location, agent));
-				ndModelArticulation* const model = (ndModelArticulation*)visualModel->Clone();
-
-				//world->AddModel(model);
-				////HideModel(model);
-				//SetMaterial(model);
-				#else
 				ndModelArticulation* const model = CreateModel(scene, location);
 				model->AddToWorld(world);
-				#ifdef ND_TRAIN_AGENT
-					model->SetNotifyCallback(new RobotModelNotify(m_master, model));
-				#endif
+				model->SetNotifyCallback(new RobotModelNotify(m_master, model));
 				SetMaterial(model);
-				#endif
-
-				scene->SetAcceleratedUpdate();
 			}
+			scene->SetAcceleratedUpdate();
 		}
 
 		~TrainingUpdata()
@@ -467,6 +477,24 @@ namespace ndCarpole_1
 					}
 				}
 
+				if (rewardTrajectory > m_saveScore)
+				{
+					char fileName[256];
+					m_saveScore = ndFloor(rewardTrajectory) + 2.0f;
+
+					// save partial controller in case of crash 
+					ndBrain* const actor = m_master->GetActor();
+					char name[256];
+					snprintf(name, sizeof(name), "%s_actor.dnn", CONTROLLER_NAME);
+					ndGetWorkingFileName(name, fileName);
+					actor->SaveToFile(fileName);
+
+					ndBrain* const critic = m_master->GetCritic();
+					snprintf(name, sizeof(name), "%s_critic.dnn", CONTROLLER_NAME);
+					ndGetWorkingFileName(name, fileName);
+					critic->SaveToFile(fileName);
+				}
+
 				if (episodeCount && !m_master->IsSampling())
 				{
 					ndExpandTraceMessage("steps: %d\treward: %g\t  trajectoryFrames: %g\n", m_master->GetFramesCount(), 100.0f * m_master->GetAverageScore() / m_horizon, m_master->GetAverageFrames());
@@ -490,8 +518,6 @@ namespace ndCarpole_1
 				ndUnsigned64 timer = ndGetTimeInMicroseconds() - m_timer;
 				ndExpandTraceMessage("training time: %g seconds\n", ndFloat32(ndFloat64(timer) * ndFloat32(1.0e-6f)));
 
-				//ndGetWorkingFileName(CRITIC_NAME, fileName);
-				//m_master->GetCritic()->SaveToFile(fileName);
 				manager->Terminate();
 			}
 		}
@@ -501,6 +527,7 @@ namespace ndCarpole_1
 		FILE* m_outFile;
 		ndUnsigned64 m_timer;
 		ndFloat32 m_maxScore;
+		ndFloat32 m_saveScore;
 		ndFloat32 m_discountFactor;
 		ndFloat32 m_horizon;
 		ndUnsigned32 m_lastEpisode;
@@ -529,8 +556,11 @@ void ndCartpoleContinue(ndDemoEntityManager* const scene)
 	model->AddToWorld(world);
 
 	// add the deep learning controller
+	char name[256];
 	char fileName[256];
-	ndGetWorkingFileName(CONTROLLER_NAME, fileName);
+	snprintf(name, sizeof(name), "%s.dnn", CONTROLLER_NAME);
+	ndGetWorkingFileName(name, fileName);
+
 	ndSharedPtr<ndBrain> brain(ndBrainLoad::Load(fileName));
 	model->SetNotifyCallback(new RobotModelNotify(brain, model));
 #endif
diff --git a/newton-4.00/applications/ndSandbox/demos/ndCartpoleDiscrete.cpp b/newton-4.00/applications/ndSandbox/demos/ndCartpoleDiscrete.cpp
index 3b47503fb..30b967bb2 100644
--- a/newton-4.00/applications/ndSandbox/demos/ndCartpoleDiscrete.cpp
+++ b/newton-4.00/applications/ndSandbox/demos/ndCartpoleDiscrete.cpp
@@ -24,8 +24,9 @@
 namespace ndCarpole_0
 {
 	//#define ND_TRAIN_AGENT
-	#define CONTROLLER_NAME		"cartpoleDiscrete-vpg.dnn"
-	//#define CRITIC_NAME		"cartpoleDiscreteCritic-vpg.dnn"
+	#define CONTROLLER_NAME		"cartpoleDiscrete"
+
+	#define CONTROLLER_RESUME_TRANING
 
 	#define D_PUSH_ACCEL		ndFloat32 (15.0f)
 	#define D_REWARD_MIN_ANGLE	ndFloat32 (20.0f * ndDegreeToRad)
@@ -94,7 +95,6 @@ namespace ndCarpole_0
 		ndUrdfFile urdf;
 		char fileName[256];
 		ndGetWorkingFileName("cartpole.urdf", fileName);
-		//ndGetWorkingFileName("cartpoleHandMade.urdf", fileName);
 		ndModelArticulation* const cartPole = urdf.Import(fileName);
 
 		SetModelVisualMesh(scene, cartPole);
@@ -190,7 +190,6 @@ namespace ndCarpole_0
 			,m_controller(nullptr)
 			,m_controllerTrainer(nullptr)
 		{
-			//m_timestep = 0.0f;
 			m_controllerTrainer = new ndControllerTrainer(master);
 			m_controllerTrainer->m_robot = this;
 			Init(robot);
@@ -208,6 +207,7 @@ namespace ndCarpole_0
 
 		RobotModelNotify(const RobotModelNotify& src)
 			:ndModelNotify(src)
+			,m_controller(src.m_controller)
 		{
 			//Init(robot);
 			ndAssert(0);
@@ -348,17 +348,18 @@ namespace ndCarpole_0
 			,m_outFile(nullptr)
 			,m_timer(ndGetTimeInMicroseconds())
 			,m_maxScore(ndFloat32(-1.0e10f))
+			,m_saveScore(m_maxScore)
 			,m_discountFactor(0.99f)
-			,m_horizon(ndFloat32(1.0f) / (ndFloat32(1.0f) - m_discountFactor))
 			,m_lastEpisode(0xffffffff)
 			,m_stopTraining(200 * 1000000)
 			,m_modelIsTrained(false)
 		{
-			ndWorld* const world = scene->GetWorld();
-			
-			m_outFile = fopen("cartpole-vpg.csv", "wb");
+			char name[256];
+			snprintf(name, sizeof(name), "%s-vpg.csv", CONTROLLER_NAME);
+			m_outFile = fopen(name, "wb");
 			fprintf(m_outFile, "vpg\n");
-			
+
+			m_horizon = ndFloat32(1.0f) / (ndFloat32(1.0f) - m_discountFactor);			
 			ndBrainAgentDiscretePolicyGradient_TrainerMaster::HyperParameters hyperParameters;
 			
 			hyperParameters.m_extraTrajectorySteps = 256;
@@ -370,8 +371,23 @@ namespace ndCarpole_0
 			m_master = ndSharedPtr<ndBrainAgentDiscretePolicyGradient_TrainerMaster>(new ndBrainAgentDiscretePolicyGradient_TrainerMaster(hyperParameters));
 			m_bestActor = ndSharedPtr< ndBrain>(new ndBrain(*m_master->GetActor()));
 			
-			m_master->SetName(CONTROLLER_NAME);
+			snprintf(name, sizeof(name), "%s.dnn", CONTROLLER_NAME);
+			m_master->SetName(name);
+
+			#ifdef CONTROLLER_RESUME_TRANING
+				char fileName[256];
+				snprintf(name, sizeof(name), "%s_critic.dnn", CONTROLLER_NAME);
+				ndGetWorkingFileName(name, fileName);
+				ndSharedPtr<ndBrain> critic(ndBrainLoad::Load(fileName));
+				m_master->GetCritic()->CopyFrom(**critic);
+
+				snprintf(name, sizeof(name), "%s_actor.dnn", CONTROLLER_NAME);
+				ndGetWorkingFileName(name, fileName);
+				ndSharedPtr<ndBrain> actor(ndBrainLoad::Load(fileName));
+				m_master->GetActor()->CopyFrom(**actor);
+			#endif
 
+			ndWorld* const world = scene->GetWorld();
 			ndModelArticulation* const visualModel = CreateModel(scene, matrix);
 			visualModel->AddToWorld(world);
 			visualModel->SetNotifyCallback(new RobotModelNotify(m_master, visualModel));
@@ -388,7 +404,6 @@ namespace ndCarpole_0
 				model->SetNotifyCallback(new RobotModelNotify(m_master, model));
 				SetMaterial(model);
 			}
-
 			scene->SetAcceleratedUpdate();
 		}
 
@@ -501,6 +516,24 @@ namespace ndCarpole_0
 					}
 				}
 
+				if (rewardTrajectory > m_saveScore)
+				{
+					char fileName[256];
+					m_saveScore = ndFloor(rewardTrajectory) + 2.0f;
+
+					// save partial controller in case of crash 
+					ndBrain* const actor = m_master->GetActor();
+					char name[256];
+					snprintf(name, sizeof(name), "%s_actor.dnn", CONTROLLER_NAME);
+					ndGetWorkingFileName(name, fileName);
+					actor->SaveToFile(fileName);
+
+					ndBrain* const critic = m_master->GetCritic();
+					snprintf(name, sizeof(name), "%s_critic.dnn", CONTROLLER_NAME);
+					ndGetWorkingFileName(name, fileName);
+					critic->SaveToFile(fileName);
+				}
+
 				if (episodeCount && !m_master->IsSampling())
 				{
 					ndExpandTraceMessage("steps: %d\treward: %g\t  trajectoryFrames: %g\n", m_master->GetFramesCount(), 100.0f * m_master->GetAverageScore() / m_horizon, m_master->GetAverageFrames());
@@ -524,8 +557,6 @@ namespace ndCarpole_0
 				ndUnsigned64 timer = ndGetTimeInMicroseconds() - m_timer;
 				ndExpandTraceMessage("training time: %g seconds\n", ndFloat32(ndFloat64(timer) * ndFloat32(1.0e-6f)));
 
-				//ndGetWorkingFileName(CRITIC_NAME, fileName);
-				//m_master->GetCritic()->SaveToFile(fileName);
 				manager->Terminate();
 			}
 		}
@@ -535,6 +566,7 @@ namespace ndCarpole_0
 		FILE* m_outFile;
 		ndUnsigned64 m_timer;
 		ndFloat32 m_maxScore;
+		ndFloat32 m_saveScore;
 		ndFloat32 m_discountFactor;
 		ndFloat32 m_horizon;
 		ndUnsigned32 m_lastEpisode;
@@ -564,8 +596,11 @@ void ndCartpoleDiscrete(ndDemoEntityManager* const scene)
 	model->AddToWorld(world);
 	
 	// add the deep learning controller
+	char name[256];
 	char fileName[256];
-	ndGetWorkingFileName(CONTROLLER_NAME, fileName);
+	snprintf(name, sizeof(name), "%s.dnn", CONTROLLER_NAME);
+	ndGetWorkingFileName(name, fileName);
+
 	ndSharedPtr<ndBrain> brain(ndBrainLoad::Load(fileName));
 	model->SetNotifyCallback(new RobotModelNotify(brain, model));
 #endif