moved simulate to simulateMDP.wppl

pik-gane · Feb 29, 2024 · fb01e79 · fb01e79
1 parent 9bfbaf8
commit fb01e79
Show file tree

Hide file tree

Showing 3 changed files with 45 additions and 32 deletions.
diff --git a/examples/runVerySimpleGW.wppl b/examples/runVerySimpleGW.wppl
@@ -34,37 +34,6 @@ var env = getEnv(),
     messingPotential = agent.messingPotential_state,
     cupLoss = agent.cupLoss_state;
 
-// Generate and draw a trajectory:
-var simulate = function(state, aleph, _t) {
-  var t = _t ? _t : 0,
-      aleph4state = asInterval(aleph);
-  if (options.verbose || options.debug) console.log(pad(state),"SIMULATE, t",t,"state",prettyState(state),"aleph4state",aleph4state,"...");
-  var localPolicy = localPolicy(state, aleph4state),
-      actionAndAleph = sample(localPolicy),
-      action = actionAndAleph[0], 
-      aleph4action = actionAndAleph[1],
-      Edel = expectedDelta(state, action);
-  var stepData = {state, aleph4state, action, aleph4action, Edel};
-  if (state.terminateAfterAction) {
-    if (options.verbose || options.debug) console.log(pad(state),"SIMULATE, t",t,"state",prettyState(state),"aleph4state",aleph4state,": localPolicy",JSON.stringify(localPolicy.params),"\n"+pad(state),"| action",action,"aleph4action",aleph4action,"Edel",Edel,"(terminal)");
-    return { 
-      trajectory: [stepData], // sequence of [state, action] pairs
-      tr4viz: [state],
-      conditionalExpectedIndicator: Edel // expected indicator conditional on this trajectory
-    };
-  } else {
-    var nextState = transition(state, action),
-        nextAleph4state = propagateAspiration(state, action, aleph4action, Edel, nextState);
-    if (options.verbose || options.debug) console.log(pad(state),"SIMULATE, t",t,"state",prettyState(state),"aleph4state",aleph4state,": localPolicy",JSON.stringify(localPolicy.params),"\n"+pad(state),"| action",action,"aleph4action",aleph4action,"Edel",Edel,"nextState",prettyState(nextState),"nextAleph4state",nextAleph4state);
-    var nextOut = simulate(nextState, nextAleph4state, t+1);
-    return { 
-      trajectory: [stepData].concat(nextOut.trajectory), 
-      tr4viz: [state].concat(nextOut.tr4viz),
-      conditionalExpectedIndicator: Edel + nextOut.conditionalExpectedIndicator
-    };
-  }
-};
-
 console.log("aleph0", asInterval(aleph0));
 
 var t0 =  _SU.time();
@@ -80,7 +49,7 @@ var gd = agent.getData, agentData = gd();
 
 // estimate distribution of trajectories:
 
-var sym = simulate(mdp.startState, aleph0);
+var sym = simulateMDPAgentSatisfia(mdp, agent, mdp.startState, aleph0, options); // simulate(mdp.startState, aleph0);
 
 var trajDist = Infer({ model() {
   return sym.trajectory;

diff --git a/src/agents/makeMDPAgentSatisfia.wppl b/src/agents/makeMDPAgentSatisfia.wppl
@@ -1215,6 +1215,15 @@ var makeMDPAgentSatisfia = function(params_, world) {
       return res;
     });
 
+    // alternative, nonlinear loss, as an example to be used in the webbook:
+    var combinedLoss_alt = dp.cache(function(s, a, al4s, al4a, p) { 
+      // cheap criteria, including some myopic versions of the more expensive ones:
+      var lFeasibilityPower = lossCoeff4FeasibilityPower != 0 ? lossCoeff4FeasibilityPower * squared(maxAdmissibleQ(s, a) - minAdmissibleQ(s, a)) : 0,
+          lMP = lossCoeff4MP != 0 ? lossCoeff4MP * messingPotential_action(s, a) : 0;
+
+      return max(lFeasibilityPower, lMP);
+    });
+
     var getData = function() {
       var stateActionPairs = Array.from(stateActionPairsSet),
           states = Array.from(_SU.setFrom(map(function(pair) { return pair[0]; }, stateActionPairs))),

diff --git a/src/simulation/simulateMDP.wppl b/src/simulation/simulateMDP.wppl
@@ -49,6 +49,41 @@ var simulateMDPAgentHyperbolic = function(startState, world, agent, outputType)
   return sampleSequence(startState);
 };
 
+var simulateMDPAgentSatisfia = function(mdp, agent, state, aleph, options, _t) {
+  var t = _t ? _t : 0,
+      aleph4state = asInterval(aleph);
+  if (options.verbose || options.debug) console.log(pad(state),"SIMULATE, t",t,"state",prettyState(state),"aleph4state",aleph4state,"...");
+  var lp = agent.localPolicy,
+      localPolicy = lp(state, aleph4state),
+      actionAndAleph = sample(localPolicy),
+      action = actionAndAleph[0], 
+      aleph4action = actionAndAleph[1],
+      ed = mdp.expectedDelta,
+      Edel = ed(state, action);
+  var stepData = {state, aleph4state, action, aleph4action, Edel};
+  if (state.terminateAfterAction) {
+    if (options.verbose || options.debug) console.log(pad(state),"SIMULATE, t",t,"state",prettyState(state),"aleph4state",aleph4state,": localPolicy",JSON.stringify(localPolicy.params),"\n"+pad(state),"| action",action,"aleph4action",aleph4action,"Edel",Edel,"(terminal)");
+    return { 
+      trajectory: [stepData], // sequence of [state, action] pairs
+      tr4viz: [state],
+      conditionalExpectedTotal: Edel // expected Total conditional on this state trajectory
+    };
+  } else {
+    var w = mdp.world, 
+        tr = w.transition,
+        nextState = tr(state, action),
+        pa = agent.propagateAspiration,
+        nextAleph4state = pa(state, action, aleph4action, Edel, nextState);
+    if (options.verbose || options.debug) console.log(pad(state),"SIMULATE, t",t,"state",prettyState(state),"aleph4state",aleph4state,": localPolicy",JSON.stringify(localPolicy.params),"\n"+pad(state),"| action",action,"aleph4action",aleph4action,"Edel",Edel,"nextState",prettyState(nextState),"nextAleph4state",nextAleph4state);
+    var nextOut = simulateMDPAgentSatisfia(mdp, agent, nextState, nextAleph4state, options, t+1);
+    return { 
+      trajectory: [stepData].concat(nextOut.trajectory), 
+      tr4viz: [state].concat(nextOut.tr4viz),
+      conditionalExpectedTotal: Edel + nextOut.conditionalExpectedTotal
+    };
+  }
+};
+
 
 var simulateMDP = function(startState, world, agent, outputType) {
   var params = agent.params;