Skip to content

Commit

Permalink
still more big bug fix
Browse files Browse the repository at this point in the history
  • Loading branch information
JulioJerez committed Sep 27, 2024
1 parent a987344 commit 9efba70
Showing 1 changed file with 12 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -590,28 +590,18 @@ void ndBrainAgentContinuePolicyGradient_TrainerMaster::OptimizePolicy()
#else
for (ndInt32 i = numberOfActions - 1; i >= 0; --i)
{
ndBrainFloat meanLoss = ndBrainFloat(0.0f);
ndBrainFloat sigmaLoss = ndBrainFloat(0.0f);

if (output[i + numberOfActions] <= ND_CONTINUE_POLICY_GRADIENT_MIN_VARIANCE)
{
sigmaLoss = 0.0f;
}

if (output[i + numberOfActions] >= ND_CONTINUE_POLICY_GRADIENT_MIN_VARIANCE)
{
const ndBrainFloat mean = output[i];
const ndBrainFloat sigma1 = output[i + numberOfActions];
const ndBrainFloat sigma2 = sigma1 * sigma1;
const ndBrainFloat sigma3 = sigma2 * sigma1;
const ndBrainFloat num = (actions[i] - mean);

// this was a huge bug, it is gradient ascend
meanLoss = -num / sigma2;
sigmaLoss = num * num / sigma3 - ndBrainFloat(1.0f) / sigma1;
}
loss[i] = -meanLoss * advantage;
loss[i + numberOfActions] = -sigmaLoss * advantage;
const ndBrainFloat mean = output[i];
const ndBrainFloat sigma1 = output[i + numberOfActions];
const ndBrainFloat sigma2 = sigma1 * sigma1;
const ndBrainFloat sigma3 = sigma2 * sigma1;
const ndBrainFloat num = (actions[i] - mean);

// this was a huge bug, it is gradient ascend
ndBrainFloat meanGradient = -num / sigma2;
ndBrainFloat sigmaGradient = num * num / sigma3 - ndBrainFloat(1.0f) / sigma1;

loss[i] = -meanGradient * advantage;
loss[i + numberOfActions] = -sigmaGradient * advantage;
}
#endif
}
Expand Down

0 comments on commit 9efba70

Please sign in to comment.