Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

example of generating the tree #2

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
161 changes: 82 additions & 79 deletions examples/runVerySimpleGW.wppl
Original file line number Diff line number Diff line change
Expand Up @@ -30,82 +30,85 @@ var env = getEnv(),
messingPotential = agent.messingPotential_state,
cupLoss = agent.cupLoss_state;

// Generate and draw a trajectory:
var simulate = function(state, aleph, _t) {
var t = _t ? _t : 0,
aleph4state = asInterval(aleph);
if (options.verbose || options.debug) console.log(pad(state),"SIMULATE, t",t,"state",prettyState(state),"aleph4state",aleph4state,"...");
var localPolicy = localPolicy(state, aleph4state),
actionAndAleph = sample(localPolicy),
action = actionAndAleph[0],
aleph4action = actionAndAleph[1],
Edel = expectedDelta(state, action);
var stepData = {state, aleph4state, action, aleph4action, Edel};
if (state.terminateAfterAction) {
if (options.verbose || options.debug) console.log(pad(state),"SIMULATE, t",t,"state",prettyState(state),"aleph4state",aleph4state,": localPolicy",JSON.stringify(localPolicy.params),"\n"+pad(state),"| action",action,"aleph4action",aleph4action,"Edel",Edel,"(terminal)");
return {
trajectory: [stepData], // sequence of [state, action] pairs
conditionalExpectedIndicator: Edel // expected indicator conditional on this trajectory
};
} else {
var nextState = transition(state, action),
nextAleph4state = propagateAspiration(state, action, aleph4action, Edel, nextState);
if (options.verbose || options.debug) console.log(pad(state),"SIMULATE, t",t,"state",prettyState(state),"aleph4state",aleph4state,": localPolicy",JSON.stringify(localPolicy.params),"\n"+pad(state),"| action",action,"aleph4action",aleph4action,"Edel",Edel,"nextState",prettyState(nextState),"nextAleph4state",nextAleph4state);
var nextOut = simulate(nextState, nextAleph4state, t+1);
return {
trajectory: [stepData].concat(nextOut.trajectory),
conditionalExpectedIndicator: Edel + nextOut.conditionalExpectedIndicator
};
}
};

console.log("aleph0", asInterval(aleph0));

var t0 = webpplAgents.time();
// verify meeting of expectations:
console.log("V", V(startState, aleph0));
console.log("TIME:", webpplAgents.time() - t0, "ms");
console.log("cupLoss", cupLoss(mdp.startState, aleph0));
console.log("entropy", entropy(mdp.startState, aleph0));
console.log("KLdiv", KLdiv(mdp.startState, aleph0));
console.log("messPot", messingPotential(mdp.startState, aleph0));

var gd = agent.getData, agentData = gd();

// estimate distribution of trajectories:

var trajDist = Infer({ model() {
return simulate(mdp.startState, aleph0).trajectory;
}}).getDist();

console.log("\nDATA FOR REGRESSION TESTS: \ntrajDist");
var regressionTestData = webpplAgents.trajDist2simpleJSON(trajDist);
console.log(JSON.stringify(regressionTestData));
console.log("END OF DATA FOR REGRESSION TESTS\n");

var trajData = trajDist2TrajData(trajDist, agent);

//console.log("trajData", trajData);

var locActionData = webpplAgents.trajDist2LocActionData(trajDist, trajData);
console.log("locActionData", locActionData);

console.log("\nminAdmissibleQ:");
console.log(stateActionFct2ASCII(agent.minAdmissibleQ, agentData.stateActionPairs));
console.log("\nmaxAdmissibleQ:");
console.log(stateActionFct2ASCII(agent.maxAdmissibleQ, agentData.stateActionPairs));

console.log("\nQ:");
console.log(webpplAgents.locActionData2ASCII(locActionData.Q));
console.log("\ncupLoss:");
console.log(webpplAgents.locActionData2ASCII(locActionData.cupLoss));
console.log("\nmessingPotential:");
console.log(webpplAgents.locActionData2ASCII(locActionData.messingPotential));
console.log("\ncombinedLoss:");
console.log(webpplAgents.locActionData2ASCII(locActionData.combinedLoss));

console.log("\naction frequencies:");
console.log(webpplAgents.locActionData2ASCII(locActionData.actionFrequency));



console.log(JSON.stringify(getDynTree(agent, mdp.startState, aleph0)));

if (false) {

// Generate and draw a trajectory:
var simulate = function(state, aleph, _t) {
var t = _t ? _t : 0,
aleph4state = asInterval(aleph);
if (options.verbose || options.debug) console.log(pad(state),"SIMULATE, t",t,"state",prettyState(state),"aleph4state",aleph4state,"...");
var localPolicy = localPolicy(state, aleph4state),
actionAndAleph = sample(localPolicy),
action = actionAndAleph[0],
aleph4action = actionAndAleph[1],
Edel = expectedDelta(state, action);
var stepData = {state, aleph4state, action, aleph4action, Edel};
if (state.terminateAfterAction) {
if (options.verbose || options.debug) console.log(pad(state),"SIMULATE, t",t,"state",prettyState(state),"aleph4state",aleph4state,": localPolicy",JSON.stringify(localPolicy.params),"\n"+pad(state),"| action",action,"aleph4action",aleph4action,"Edel",Edel,"(terminal)");
return {
trajectory: [stepData], // sequence of [state, action] pairs
conditionalExpectedIndicator: Edel // expected indicator conditional on this trajectory
};
} else {
var nextState = transition(state, action),
nextAleph4state = propagateAspiration(state, action, aleph4action, Edel, nextState);
if (options.verbose || options.debug) console.log(pad(state),"SIMULATE, t",t,"state",prettyState(state),"aleph4state",aleph4state,": localPolicy",JSON.stringify(localPolicy.params),"\n"+pad(state),"| action",action,"aleph4action",aleph4action,"Edel",Edel,"nextState",prettyState(nextState),"nextAleph4state",nextAleph4state);
var nextOut = simulate(nextState, nextAleph4state, t+1);
return {
trajectory: [stepData].concat(nextOut.trajectory),
conditionalExpectedIndicator: Edel + nextOut.conditionalExpectedIndicator
};
}
};

console.log("aleph0", asInterval(aleph0));

var t0 = webpplAgents.time();
// verify meeting of expectations:
console.log("V", V(startState, aleph0));
console.log("TIME:", webpplAgents.time() - t0, "ms");
console.log("cupLoss", cupLoss(mdp.startState, aleph0));
console.log("entropy", entropy(mdp.startState, aleph0));
console.log("KLdiv", KLdiv(mdp.startState, aleph0));
console.log("messPot", messingPotential(mdp.startState, aleph0));

var gd = agent.getData, agentData = gd();

// estimate distribution of trajectories:

var trajDist = Infer({ model() {
return simulate(mdp.startState, aleph0).trajectory;
}}).getDist();

console.log("\nDATA FOR REGRESSION TESTS: \ntrajDist");
var regressionTestData = webpplAgents.trajDist2simpleJSON(trajDist);
console.log(JSON.stringify(regressionTestData));
console.log("END OF DATA FOR REGRESSION TESTS\n");

var trajData = trajDist2TrajData(trajDist, agent);

//console.log("trajData", trajData);

var locActionData = webpplAgents.trajDist2LocActionData(trajDist, trajData);
console.log("locActionData", locActionData);

console.log("\nminAdmissibleQ:");
console.log(stateActionFct2ASCII(agent.minAdmissibleQ, agentData.stateActionPairs));
console.log("\nmaxAdmissibleQ:");
console.log(stateActionFct2ASCII(agent.maxAdmissibleQ, agentData.stateActionPairs));

console.log("\nQ:");
console.log(webpplAgents.locActionData2ASCII(locActionData.Q));
console.log("\ncupLoss:");
console.log(webpplAgents.locActionData2ASCII(locActionData.cupLoss));
console.log("\nmessingPotential:");
console.log(webpplAgents.locActionData2ASCII(locActionData.messingPotential));
console.log("\ncombinedLoss:");
console.log(webpplAgents.locActionData2ASCII(locActionData.combinedLoss));

console.log("\naction frequencies:");
console.log(webpplAgents.locActionData2ASCII(locActionData.actionFrequency));

}
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
"src/agents/makeMDPAgent.wppl",
"src/agents/makeMDPAgentSatisfia.wppl",
"src/agents/makePOMDPAgent.wppl",
"src/simulation/getDynTree.wppl",
"src/simulation/simulateMDP.wppl",
"src/simulation/simulatePOMDP.wppl",
"src/visualization/gridworld.wppl",
Expand Down
2 changes: 2 additions & 0 deletions src/agents/makeMDPAgentSatisfia.wppl
Original file line number Diff line number Diff line change
Expand Up @@ -1221,6 +1221,8 @@ var makeMDPAgentSatisfia = function(params_, world) {
};

return {
transitionDistribution,
expectedDelta, varianceOfDelta, skewnessOfDelta, excessKurtosisOfDelta,
minAdmissibleQ, maxAdmissibleQ, minAdmissibleV, maxAdmissibleV,
localPolicy, localPolicyData, propagateAspiration,
Q, V, Q2, V2, Q_DeltaSquare, V_DeltaSquare, Q_ones, V_ones,
Expand Down
9 changes: 9 additions & 0 deletions src/main.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,15 @@ module.exports = {

setFrom: (arg) => new Set(arg),

objectFromPairs: (pairs) => {
var result = {};
for (var index in pairs) {
var [key, value] = pairs[index];
result[key] = value;
}
return result;
},

min: (arr) => Math.min.apply(null, arr),

max: (arr) => Math.max.apply(null, arr),
Expand Down
50 changes: 50 additions & 0 deletions src/simulation/getDynTree.wppl
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// test with
// $ webppl --require webppl-dp --require . examples/runVerySimpleGW.wppl -- --gw=GW4

var getDynTree = function(agent, state, aleph) {
/* Construct a tree that represents all possible histories starting at state with aspiration aleph.
Return value has format

{action1: [aleph4action, actionLogit, {nextState1: [aleph4state, nextStateLogit, Edel, nextStateBranch],
nextState2: [aleph4state, nextStateLogit, Edel, nextStateBranch],
...}],
action2: [aleph4action, actionLogit, {nextState1: [aleph4state, nextStateLogit, Edel, nextStateBranch],
nextState2: [aleph4state, nextStateLogit, Edel, nextStateBranch],
...}],
...}

where
- action1, action2, ... are the actions available at state
- aleph4action is the aspiration for the action
- actionLogit is the logit for the action according to the local policy
- nextState1, nextState2, ... are the possible next states
- aleph4state is the aspiration for the next state
- nextStateLogit is the logit for the next state according to the transition distribution
- Edel is the expected delta for the action
- nextStateBranch is the tree for the next state
*/
var localPolicy = agent.localPolicy,
expectedDelta = agent.expectedDelta,
transitionDistribution = agent.transitionDistribution,
propagateAspiration = agent.propagateAspiration,
locPol = localPolicy(state, asInterval(aleph));
var stateBranch = webpplAgents.objectFromPairs(map(function(actionAndAleph) {
var action = actionAndAleph[0],
aleph4action = actionAndAleph[1],
actionLogit = locPol.score(actionAndAleph),
Edel = expectedDelta(state, action);
if (state.terminateAfterAction) {
return [action, [aleph4action, actionLogit]];
} else {
var transDist = transitionDistribution(state, action),
actionBranch = webpplAgents.objectFromPairs(map(function(nextState) {
var nextStateLogit = transDist.score(nextState),
nextAleph4state = propagateAspiration(state, action, aleph4action, Edel, nextState);
var nextStateBranch = getDynTree(agent, nextState, nextAleph4state);
return [JSON.stringify(nextState), [nextAleph4state, nextStateLogit, Edel, nextStateBranch]];
}, transDist.support()));
return [action, [aleph4action, actionLogit, actionBranch]];
}
}, locPol.support()));
return stateBranch;
};