-
Notifications
You must be signed in to change notification settings - Fork 1
/
ValueFuntion2.m
49 lines (38 loc) · 1.31 KB
/
ValueFuntion2.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
function value=ValueFuntion2(meanVector,covarianceMatrix,action,alpha)
% Value function with tunring parameter
%{
This function is used to compute the value function for a given belief state and an action.
Input: meanVector -- the relevance score of ranked documents
covarianceMatrix -- the similarity matrix
action -- re-ranking in the first page
alpha -- tunring parameter
Output: The expected total return
%}
global num;
global top;
global weight;
if isempty(action)
value=0;
return;
end
rankList=ChangeAction2RankList(action,length(meanVector));
instantReward=weight*meanVector(rankList(1:top));
% MC sampling
theta=meanVector(action);
sigma=covarianceMatrix(action,action);
sample=mvnrnd(theta,sigma,num);
feedback=[action;action]';
% Compute the probability of each possible judgement
[judgement,prob]=CreatPro(sample,3);
futureReward=zeros(1,size(judgement,1));
for i=1:size(judgement,1)
if prob(i)>0.001 % Filter judgments having very low probability
feedback(:,1)=judgement(i,:);
meanUpdate=BeliefUpdate(meanVector,covarianceMatrix,feedback);
meanUpdate=sort(meanUpdate,'descend');
futureReward(i)=weight*meanUpdate(1:top);
end
end
% Expected total return
value=alpha*instantReward+(1-alpha)*futureReward*prob;
end