-
Notifications
You must be signed in to change notification settings - Fork 0
/
f_pavlov.m
73 lines (61 loc) · 1.75 KB
/
f_pavlov.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
function [ fx] = f_pavlov( x_t,theta,u_t,in )
% function [ fx,dfdx,dfdP ] = f_pavlov( x_t,P,u_t,in )
% IN:
% - x_t : Q-values (2*1)
% - P : learning rate (1*1)
% - u_t : previous action and feedback
% - in : [fixed_learning_rate - if you want the learning rate to be fixed
% fixed_alpha - value of fixed learning rate
% decay - if feedback_salience should decay by epsilon]
if in.fixed_learning_rate
theta(1) = in.fixed_alpha;
end
alpha = 1./(1+exp(-theta(1))); % learning rate
% epsilon = 1./(1+exp(-theta(2))); % feedback decay
epsilon = 1;
gamma = alpha;
cs = u_t(3);
us = u_t(4);
congruent = u_t(6);
feedback = u_t(4);
% feedback_salience = epsilon*(x_t(3));
if in.decay
feedback_salience = epsilon*x_t(3);
else
feedback_salience = 1;
end
%By default
tracked_value = x_t(1);
if in.noCS
fx(1) = x_t(1) + alpha*(feedback_salience*feedback-x_t(1));
else
if (cs && congruent) || (~cs && ~congruent) % actual infusion on previous trial
fx(1) = x_t(1) + alpha*(feedback_salience*feedback-x_t(1));
fx(2) = x_t(2)+ (alpha-gamma)*(feedback_salience*feedback-x_t(1));
elseif (~cs && congruent) || (cs && ~congruent) % no actual infusion on previous trial
fx(2) = x_t(2) + alpha*(feedback_salience*feedback-x_t(2));
fx(1) = x_t(1)+ (alpha-gamma)*(feedback_salience*feedback-x_t(2));
tracked_value = x_t(2);
end
end
if in.decay
fx(3) = (feedback_salience); % this is the extent to which feedback has decayed
end
if in.track_pe
fx(3) = feedback_salience*feedback-tracked_value;
end
%
% if cs
% %dfdx = [df1dx1 , df1dx2;
% % df2dx1 , df2dx2]
% dfdx = [1-alpha];
% dfdP = [alpha*(1-alpha)*(us-x_t)];
%
%
% else
% dfdx = 0;
% dfdP = 0;
%
% end
% dfdx = dfdx';
% dfdP = dfdP';