-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbootstrap.py
79 lines (67 loc) · 3.4 KB
/
bootstrap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import numpy as np
import collections
try:
from scipy.stats import scoreatpercentile
except:
# in case no scipy
scoreatpercentile = False
def _confidence_interval_1d(A, alpha=.05, metric=np.mean, numResamples=10000, interpolate=True):
"""Calculates bootstrap confidence interval along one dimensional array"""
if not isinstance(alpha, collections.Iterable):
alpha = np.array([alpha])
N = len(A)
resampleInds = np.random.randint(0, N, (numResamples,N))
metricOfResampled = metric(A[resampleInds], axis=-1)
confidenceInterval = np.zeros(2*len(alpha),dtype='float')
if interpolate:
for thisAlphaInd, thisAlpha in enumerate(alpha):
confidenceInterval[2*thisAlphaInd] = scoreatpercentile(metricOfResampled, thisAlpha*100/2.0)
confidenceInterval[2*thisAlphaInd+1] = scoreatpercentile(metricOfResampled, 100-thisAlpha*100/2.0)
else:
sortedMetricOfResampled = np.sort(metricOfResampled)
for thisAlphaInd, thisAlpha in enumerate(alpha):
confidenceInterval[2*thisAlphaInd] = sortedMetricOfResampled[int(round(thisAlpha*numResamples/2.0))]
confidenceInterval[2*thisAlphaInd+1] = sortedMetricOfResampled[int(round(numResamples - thisAlpha*numResamples/2.0))]
return confidenceInterval
def _ma_confidence_interval_1d(A, alpha=.05, metric=np.mean, numResamples=10000, interpolate=True):
A = np.ma.masked_invalid(A, copy=True)
A = A.compressed()
confidenceInterval = _confidence_interval_1d(A, alpha, metric, numResamples, interpolate)
return confidenceInterval
def confidence_interval(A, axis=None, alpha=.05, metric=np.mean, numResamples=10000, interpolate=True):
"""Return the bootstrap confidence interval of an array or along an axis ignoring NaNs and masked elements.
Parameters
----------
A : array_like
Array containing numbers whose confidence interval is desired.
axis : int, optional
Axis along which the confidence interval is computed.
The default is to compute the confidence interval of the flattened array.
alpha: float or array, optional
confidence level of confidence interval. 100.0*(1-alpha) percent confidence interval will be returned.
If length-n array, n confidence intervals will be computed
The default is .05
metric : numpy function, optional
metric to calculate confidence interval for.
The default is numpy.mean
numResamples : int, optional
number of bootstrap samples. The default is 10000.
interpolate: bool, optional
uses scipy.stats.scoreatpercentile to interpolate between bootstrap samples if alpha*numResamples/2.0 is not integer.
The default is True
Returns
-------
confidenceInterval : ndarray
An array with the same shape as `A`, with the specified axis replaced by one twice the length of the alpha
If `A` is a 0-d array, or if axis is None, a length-2 ndarray is returned.
"""
if interpolate is True and scoreatpercentile is False:
print("need scipy to interpolate between values")
interpolate = False
A = A.copy()
if axis is None:
A = A.ravel()
outA = _ma_confidence_interval_1d(A, alpha, metric, numResamples, interpolate)
else:
outA = np.apply_along_axis(_ma_confidence_interval_1d, axis, A, alpha, metric, numResamples, interpolate)
return outA