Skip to content

Commit

Permalink
moved simulate to simulateMDP.wppl
Browse files Browse the repository at this point in the history
  • Loading branch information
mensch72 committed Feb 29, 2024
1 parent 9bfbaf8 commit fb01e79
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 32 deletions.
33 changes: 1 addition & 32 deletions examples/runVerySimpleGW.wppl
Original file line number Diff line number Diff line change
Expand Up @@ -34,37 +34,6 @@ var env = getEnv(),
messingPotential = agent.messingPotential_state,
cupLoss = agent.cupLoss_state;

// Generate and draw a trajectory:
var simulate = function(state, aleph, _t) {
var t = _t ? _t : 0,
aleph4state = asInterval(aleph);
if (options.verbose || options.debug) console.log(pad(state),"SIMULATE, t",t,"state",prettyState(state),"aleph4state",aleph4state,"...");
var localPolicy = localPolicy(state, aleph4state),
actionAndAleph = sample(localPolicy),
action = actionAndAleph[0],
aleph4action = actionAndAleph[1],
Edel = expectedDelta(state, action);
var stepData = {state, aleph4state, action, aleph4action, Edel};
if (state.terminateAfterAction) {
if (options.verbose || options.debug) console.log(pad(state),"SIMULATE, t",t,"state",prettyState(state),"aleph4state",aleph4state,": localPolicy",JSON.stringify(localPolicy.params),"\n"+pad(state),"| action",action,"aleph4action",aleph4action,"Edel",Edel,"(terminal)");
return {
trajectory: [stepData], // sequence of [state, action] pairs
tr4viz: [state],
conditionalExpectedIndicator: Edel // expected indicator conditional on this trajectory
};
} else {
var nextState = transition(state, action),
nextAleph4state = propagateAspiration(state, action, aleph4action, Edel, nextState);
if (options.verbose || options.debug) console.log(pad(state),"SIMULATE, t",t,"state",prettyState(state),"aleph4state",aleph4state,": localPolicy",JSON.stringify(localPolicy.params),"\n"+pad(state),"| action",action,"aleph4action",aleph4action,"Edel",Edel,"nextState",prettyState(nextState),"nextAleph4state",nextAleph4state);
var nextOut = simulate(nextState, nextAleph4state, t+1);
return {
trajectory: [stepData].concat(nextOut.trajectory),
tr4viz: [state].concat(nextOut.tr4viz),
conditionalExpectedIndicator: Edel + nextOut.conditionalExpectedIndicator
};
}
};

console.log("aleph0", asInterval(aleph0));

var t0 = _SU.time();
Expand All @@ -80,7 +49,7 @@ var gd = agent.getData, agentData = gd();

// estimate distribution of trajectories:

var sym = simulate(mdp.startState, aleph0);
var sym = simulateMDPAgentSatisfia(mdp, agent, mdp.startState, aleph0, options); // simulate(mdp.startState, aleph0);

var trajDist = Infer({ model() {
return sym.trajectory;
Expand Down
9 changes: 9 additions & 0 deletions src/agents/makeMDPAgentSatisfia.wppl
Original file line number Diff line number Diff line change
Expand Up @@ -1215,6 +1215,15 @@ var makeMDPAgentSatisfia = function(params_, world) {
return res;
});

// alternative, nonlinear loss, as an example to be used in the webbook:
var combinedLoss_alt = dp.cache(function(s, a, al4s, al4a, p) {
// cheap criteria, including some myopic versions of the more expensive ones:
var lFeasibilityPower = lossCoeff4FeasibilityPower != 0 ? lossCoeff4FeasibilityPower * squared(maxAdmissibleQ(s, a) - minAdmissibleQ(s, a)) : 0,
lMP = lossCoeff4MP != 0 ? lossCoeff4MP * messingPotential_action(s, a) : 0;

return max(lFeasibilityPower, lMP);
});

var getData = function() {
var stateActionPairs = Array.from(stateActionPairsSet),
states = Array.from(_SU.setFrom(map(function(pair) { return pair[0]; }, stateActionPairs))),
Expand Down
35 changes: 35 additions & 0 deletions src/simulation/simulateMDP.wppl
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,41 @@ var simulateMDPAgentHyperbolic = function(startState, world, agent, outputType)
return sampleSequence(startState);
};

var simulateMDPAgentSatisfia = function(mdp, agent, state, aleph, options, _t) {
var t = _t ? _t : 0,
aleph4state = asInterval(aleph);
if (options.verbose || options.debug) console.log(pad(state),"SIMULATE, t",t,"state",prettyState(state),"aleph4state",aleph4state,"...");
var lp = agent.localPolicy,
localPolicy = lp(state, aleph4state),
actionAndAleph = sample(localPolicy),
action = actionAndAleph[0],
aleph4action = actionAndAleph[1],
ed = mdp.expectedDelta,
Edel = ed(state, action);
var stepData = {state, aleph4state, action, aleph4action, Edel};
if (state.terminateAfterAction) {
if (options.verbose || options.debug) console.log(pad(state),"SIMULATE, t",t,"state",prettyState(state),"aleph4state",aleph4state,": localPolicy",JSON.stringify(localPolicy.params),"\n"+pad(state),"| action",action,"aleph4action",aleph4action,"Edel",Edel,"(terminal)");
return {
trajectory: [stepData], // sequence of [state, action] pairs
tr4viz: [state],
conditionalExpectedTotal: Edel // expected Total conditional on this state trajectory
};
} else {
var w = mdp.world,
tr = w.transition,
nextState = tr(state, action),
pa = agent.propagateAspiration,
nextAleph4state = pa(state, action, aleph4action, Edel, nextState);
if (options.verbose || options.debug) console.log(pad(state),"SIMULATE, t",t,"state",prettyState(state),"aleph4state",aleph4state,": localPolicy",JSON.stringify(localPolicy.params),"\n"+pad(state),"| action",action,"aleph4action",aleph4action,"Edel",Edel,"nextState",prettyState(nextState),"nextAleph4state",nextAleph4state);
var nextOut = simulateMDPAgentSatisfia(mdp, agent, nextState, nextAleph4state, options, t+1);
return {
trajectory: [stepData].concat(nextOut.trajectory),
tr4viz: [state].concat(nextOut.tr4viz),
conditionalExpectedTotal: Edel + nextOut.conditionalExpectedTotal
};
}
};


var simulateMDP = function(startState, world, agent, outputType) {
var params = agent.params;
Expand Down

0 comments on commit fb01e79

Please sign in to comment.