Skip to content

Commit

Permalink
now really fixed the wrong loss computation due to overflows, see lin…
Browse files Browse the repository at this point in the history
…es 436-445 and 483-492
  • Loading branch information
mensch72 committed Mar 1, 2024
1 parent 400ec5e commit 3bb9e87
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 5 deletions.
8 changes: 4 additions & 4 deletions src/agents/makeMDPAgentSatisfia.wppl
Original file line number Diff line number Diff line change
Expand Up @@ -439,9 +439,9 @@ var makeMDPAgentSatisfia = function(params_, world) {
loss = combinedLoss(state, action, aleph4state, estAlephs1[index], 1 / indices.length); // ! bottleneck !
return loss;
}, indices),
meanLoss = sum(losses) / indices.length,
minLoss = _SU.min(losses),
propensities = map(function(loss) {
return Math.min(1e100, Math.max(Math.exp(-(loss-meanLoss) / lossTemperature), 1e-100));
return Math.max(Math.exp(-(loss-minLoss) / lossTemperature), 1e-100);
}, losses);

if (debug) console.log(pad(state),"| localPolicyData", prettyState(state), aleph, actions, {propensities});
Expand Down Expand Up @@ -486,9 +486,9 @@ var makeMDPAgentSatisfia = function(params_, world) {
loss = combinedLoss(state, action, aleph4state, estAlephs2[index], 1 / indices2.length);
return loss;
}, indices2),
meanLoss2 = sum(losses2) / indices2.length,
meanLoss2 = _SU.min(losses2),
propensities2 = map(function(loss) {
return Math.min(1e100, Math.max(Math.exp(-(loss-meanLoss2) / lossTemperature), 1e-100));
return Math.max(Math.exp(-(loss-meanLoss2) / lossTemperature), 1e-100);
}, losses2);

if (debug) console.log(pad(state),"| | localPolicyData", prettyState(state), aleph4state, {a1, midTarget, estAleph1, mid1, indices2, aleph2target, estAlephs2, propensities2});
Expand Down
2 changes: 1 addition & 1 deletion src/utils/utilsLog.js
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ const locActionData2ASCII= function(

const printPolicy= function(padding, support, ps) {
for (var i = 0; i < support.length; i++) {
console.log(padding, "| | action", support[i][0], "aspiration", support[i][1], "prob.", ps[i]);
console.log(padding, "| | action", support[i][0], "aspiration", support[i][1], "prob.", ps[i]);
}
};

Expand Down

0 comments on commit 3bb9e87

Please sign in to comment.