-
Notifications
You must be signed in to change notification settings - Fork 6
/
QLearner.java
127 lines (108 loc) · 5.05 KB
/
QLearner.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
package tools;
import java.util.*;
import java.util.logging.*;
import cartago.Artifact;
import cartago.OPERATION;
import cartago.OpFeedbackParam;
public class QLearner extends Artifact {
private Lab lab; // the lab environment that will be learnt
private int stateCount; // the number of possible states in the lab environment
private int actionCount; // the number of possible actions in the lab environment
private HashMap<Integer, double[][]> qTables; // a map for storing the qTables computed for different goals
private static final Logger LOGGER = Logger.getLogger(QLearner.class.getName());
public void init(String environmentURL) {
// the URL of the W3C Thing Description of the lab Thing
this.lab = new Lab(environmentURL);
this.stateCount = this.lab.getStateCount();
LOGGER.info("Initialized with a state space of n="+ stateCount);
this.actionCount = this.lab.getActionCount();
LOGGER.info("Initialized with an action space of m="+ actionCount);
qTables = new HashMap<>();
}
/**
* Computes a Q matrix for the state space and action space of the lab, and against
* a goal description. For example, the goal description can be of the form [z1level, z2Level],
* where z1Level is the desired value of the light level in Zone 1 of the lab,
* and z2Level is the desired value of the light level in Zone 2 of the lab.
* For exercise 11, the possible goal descriptions are:
* [0,0], [0,1], [0,2], [0,3],
* [1,0], [1,1], [1,2], [1,3],
* [2,0], [2,1], [2,2], [2,3],
* [3,0], [3,1], [3,2], [3,3].
*
*<p>
* HINT: Use the methods of {@link LearningEnvironment} (implemented in {@link Lab})
* to interact with the learning environment (here, the lab), e.g., to retrieve the
* applicable actions, perform an action at the lab during learning etc.
*</p>
* @param goalDescription the desired goal against the which the Q matrix is calculated (e.g., [2,3])
* @param episodesObj the number of episodes used for calculating the Q matrix
* @param alphaObj the learning rate with range [0,1].
* @param gammaObj the discount factor [0,1]
* @param epsilonObj the exploration probability [0,1]
* @param rewardObj the reward assigned when reaching the goal state
**/
@OPERATION
public void calculateQ(Object[] goalDescription , Object episodesObj, Object alphaObj, Object gammaObj, Object epsilonObj, Object rewardObj) {
// ensure that the right datatypes are used
Integer episodes = Integer.valueOf(episodesObj.toString());
Double alpha = Double.valueOf(alphaObj.toString());
Double gamma = Double.valueOf(gammaObj.toString());
Double epsilon = Double.valueOf(epsilonObj.toString());
Integer reward = Integer.valueOf(rewardObj.toString());
}
/**
* Returns information about the next best action based on a provided state and the QTable for
* a goal description. The returned information can be used by agents to invoke an action
* using a ThingArtifact.
*
* @param goalDescription the desired goal against the which the Q matrix is calculated (e.g., [2,3])
* @param currentStateDescription the current state e.g. [2,2,true,false,true,true,2]
* @param nextBestActionTag the (returned) semantic annotation of the next best action, e.g. "http://example.org/was#SetZ1Light"
* @param nextBestActionPayloadTags the (returned) semantic annotations of the payload of the next best action, e.g. [Z1Light]
* @param nextBestActionPayload the (returned) payload of the next best action, e.g. true
**/
@OPERATION
public void getActionFromState(Object[] goalDescription, Object[] currentStateDescription,
OpFeedbackParam<String> nextBestActionTag, OpFeedbackParam<Object[]> nextBestActionPayloadTags,
OpFeedbackParam<Object[]> nextBestActionPayload) {
// remove the following upon implementing Task 2.3!
// sets the semantic annotation of the next best action to be returned
nextBestActionTag.set("http://example.org/was#SetZ1Light");
// sets the semantic annotation of the payload of the next best action to be returned
Object payloadTags[] = { "Z1Light" };
nextBestActionPayloadTags.set(payloadTags);
// sets the payload of the next best action to be returned
Object payload[] = { true };
nextBestActionPayload.set(payload);
}
/**
* Print the Q matrix
*
* @param qTable the Q matrix
*/
void printQTable(double[][] qTable) {
System.out.println("Q matrix");
for (int i = 0; i < qTable.length; i++) {
System.out.print("From state " + i + ": ");
for (int j = 0; j < qTable[i].length; j++) {
System.out.printf("%6.2f ", (qTable[i][j]));
}
System.out.println();
}
}
/**
* Initialize a Q matrix
*
* @return the Q matrix
*/
private double[][] initializeQTable() {
double[][] qTable = new double[this.stateCount][this.actionCount];
for (int i = 0; i < stateCount; i++){
for(int j = 0; j < actionCount; j++){
qTable[i][j] = 0.0;
}
}
return qTable;
}
}