-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
240 lines (186 loc) · 7.72 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
"""
Utility functions for Langevin regression
Jared Callaham (2020)
"""
import numpy as np
from time import time
from scipy.optimize import minimize
import matplotlib.pyplot as plt
# Return a single expression from a list of expressions and coefficients
# Note this will give a SymPy expression and not a function
def sindy_model(Xi, expr_list):
return sum([Xi[i]*expr_list[i] for i in range(len(expr_list))])
def ntrapz(I, dx):
if isinstance(dx, int) or isinstance(dx, float) or len(dx)==1:
return np.trapz(I, dx=dx, axis=0)
else:
return np.trapz( ntrapz(I, dx[1:]), dx=dx[0])
def kl_divergence(p_in, q_in, dx=1, tol=None):
"""
Approximate Kullback-Leibler divergence for arbitrary dimensionality
"""
if tol==None:
tol = max( min(p_in.flatten()), min(q_in.flatten()))
q = q_in.copy()
p = p_in.copy()
q[q<tol] = tol
p[p<tol] = tol
return ntrapz(p*np.log(p/q), dx)
def KM_avg(X, r_edges, stride, dt):
r_centers = (r_edges[:-1]+r_edges[1:])/2
Y = X[::stride]
tau = stride*dt
dY = (Y[1:] - Y[:-1])/tau # Step (like a finite-difference derivative estimate)
dY2 = (Y[1:] - Y[:-1])**2/tau # Conditional variance
f_KM = np.zeros(len(r_edges)-1)
a_KM = np.zeros(f_KM.shape)
f_err = np.zeros(f_KM.shape)
a_err = np.zeros(f_KM.shape)
# At each histogram bin, find time series points where the state falls into this bin
for i in range(len(r_edges)-1):
mask = np.nonzero( (Y[:-1] > r_edges[i]) * (Y[:-1] < r_edges[i+1]) )[0]
if len(mask) > 0:
f_KM[i] = np.mean(dY[mask]) # Conditional average ~ drift
f_err[i] = np.std(dY[mask])/np.sqrt(len(mask)) # Conditional variance ~ diffusion
# Estimate error by variance of samples in the bin
a_KM[i] = 0.5*np.mean(dY2[mask]) # Conditional average
a_err[i] = np.std(dY2[mask])/np.sqrt(len(mask))
else:
f_KM[i] = np.nan
f_err[i] = np.nan
a_KM[i] = np.nan
a_err[i] = np.nan
plt.figure(figsize=(8, 4))
plt.subplot(121)
plt.plot(r_centers, f_KM, label='Finite-time KM')
plt.gca().fill_between(r_centers, f_KM-3*f_err, f_KM+3*f_err, alpha=0.4)
plt.xlabel('$X_t$', fontsize = 14)
plt.ylabel('Drift', fontsize = 14)
plt.grid()
# plt.ylim([-50, 50])
plt.legend()
plt.subplot(122)
plt.plot(r_centers, a_KM, label='Finite-time KM')
plt.gca().fill_between(r_centers, a_KM-3*a_err, a_KM+3*a_err, alpha=0.4)
plt.ylabel('Diffusion', fontsize = 14)
plt.grid()
# plt.ylim([0, 20])
plt.legend()
return f_KM, a_KM, f_err, a_err
# Return optimal coefficients for finite-time correctio
def AFP_opt(cost, params):
### RUN OPTIMIZATION PROBLEM
start_time = time()
Xi0 = params["Xi0"]
is_complex = np.iscomplex(Xi0[0])
if is_complex:
Xi0 = np.concatenate((np.real(Xi0), np.imag(Xi0))) # Split vector in two for complex
opt_fun = lambda Xi: cost(Xi[:len(Xi)//2] + 1j*Xi[len(Xi)//2:], params)
else:
opt_fun = lambda Xi: cost(Xi, params)
res = minimize(opt_fun, Xi0, method='nelder-mead',
options={'disp': False, 'maxfev':int(1e4)})
print('%%%% Optimization time: {0} seconds, Cost: {1} %%%%'.format(time() - start_time, res.fun) )
# Return coefficients and cost function
if is_complex:
# Return to complex number
return res.x[:len(res.x)//2] + 1j*res.x[len(res.x)//2:], res.fun
else:
return res.x, res.fun
def SSR_loop(opt_fun, params):
"""
Stepwise sparse regression: general function for a given optimization problem
opt_fun should take the parameters and return coefficients and cost
Requires a list of drift and diffusion expressions,
(although these are just passed to the opt_fun)
"""
# Lists of candidate expressions... coefficients are optimized
f_expr, s_expr = params['f_expr'].copy(), params['s_expr'].copy()
lib_f, lib_s = params['lib_f'].copy(), params['lib_s'].copy()
Xi0 = params['Xi0'].copy()
m = len(f_expr) + len(s_expr)
Xi = np.zeros((m, m-1), dtype=Xi0.dtype) # Output results
V = np.zeros((m-1)) # Cost at each step
# Full regression problem as baseline
Xi[:, 0], V[0] = opt_fun(params)
# Start with all candidates
active = np.array([i for i in range(m)])
# Iterate and threshold
for k in range(1, m-1):
# Loop through remaining terms and find the one that increases the cost function the least
min_idx = -1
V[k] = 1e8
for j in range(len(active)):
tmp_active = active.copy()
tmp_active = np.delete(tmp_active, j) # Try deleting this term
# Break off masks for drift/diffusion
f_active = tmp_active[tmp_active < len(f_expr)]
s_active = tmp_active[tmp_active >= len(f_expr)] - len(f_expr)
print(f_active)
print(s_active)
print(f_expr[f_active], s_expr[s_active])
params['f_expr'] = f_expr[f_active]
params['s_expr'] = s_expr[s_active]
params['lib_f'] = lib_f[:, f_active]
params['lib_s'] = lib_s[:, s_active]
params['Xi0'] = Xi0[tmp_active]
# Ensure that there is at least one drift and diffusion term left
if len(s_active) > 0 and len(f_active) > 0:
tmp_Xi, tmp_V = opt_fun(params)
# Keep minimum cost
if tmp_V < V[k]:
# Ensure that there is at least one drift and diffusion term left
#if (IS_DRIFT and len(f_active)>1) or (not IS_DRIFT and len(a_active)>1):
min_idx = j
V[k] = tmp_V
min_Xi = tmp_Xi
print("Cost: {0}".format(V[k]))
# Delete least important term
active = np.delete(active, min_idx) # Remove inactive index
Xi0[active] = min_Xi # Re-initialize with best results from previous
Xi[active, k] = min_Xi
print(Xi[:, k])
return Xi, V
# 1D Markov test
def markov_test(X, lag, N=32, L=2):
# Lagged time series
X1 = X[:-2*lag:lag]
X2 = X[lag:-lag:lag]
X3 = X[2*lag::lag]
# Two-time joint pdfs
bins = np.linspace(-L, L, N+1)
dx = bins[1]-bins[0]
p12, _, _ = np.histogram2d(X1, X2, bins=[bins, bins], density=True)
p23, _, _ = np.histogram2d(X2, X3, bins=[bins, bins], density=True)
p2, _ = np.histogram(X2, bins=bins, density=True)
p2[p2<1e-4] = 1e-4
# Conditional PDF (Markov assumption)
pcond_23 = p23.copy()
for j in range(pcond_23.shape[1]):
pcond_23[:, j] = pcond_23[:, j]/p2
# Three-time PDFs
p123, _ = np.histogramdd(np.array([X1, X2, X3]).T, bins=np.array([bins, bins, bins]), density=True)
p123_markov = np.einsum('ij,jk->ijk',p12, pcond_23)
# Chi^2 value
#return utils.ntrapz( (p123 - p123_markov)**2, [dx, dx, dx] )/(np.var(p123.flatten()) + np.var(p123_markov.flatten()))
return kl_divergence(p123, p123_markov, dx=[dx, dx, dx], tol=1e-6)
### FAST AUTOCORRELATION FUNCTION
# From https://dfm.io/posts/autocorr/
def next_pow_two(n):
i = 1
while i < n:
i = i << 1
return i
def autocorr_func_1d(x, norm=True):
x = np.atleast_1d(x)
if len(x.shape) != 1:
raise ValueError("invalid dimensions for 1D autocorrelation function")
n = next_pow_two(len(x))
# Compute the FFT and then (from that) the auto-correlation function
f = np.fft.fft(x - np.mean(x), n=2*n)
acf = np.fft.ifft(f * np.conjugate(f))[:len(x)].real
acf /= 4*n
# Optionally normalize
if norm:
acf /= acf[0]
return acf