import numpy as np import collections try: from scipy.stats import scoreatpercentile except: # in case no scipy scoreatpercentile = False def _confidence_interval_1d(A, alpha=.05, metric=np.mean, numResamples=10000, interpolate=True): """Calculates bootstrap confidence interval along one dimensional array""" if not isinstance(alpha, collections.Iterable): alpha = np.array([alpha]) N = len(A) resampleInds = np.random.randint(0, N, (numResamples,N)) metricOfResampled = metric(A[resampleInds], axis=-1) confidenceInterval = np.zeros(2*len(alpha),dtype='float') if interpolate: for thisAlphaInd, thisAlpha in enumerate(alpha): confidenceInterval[2*thisAlphaInd] = scoreatpercentile(metricOfResampled, thisAlpha*100/2.0) confidenceInterval[2*thisAlphaInd+1] = scoreatpercentile(metricOfResampled, 100-thisAlpha*100/2.0) else: sortedMetricOfResampled = np.sort(metricOfResampled) for thisAlphaInd, thisAlpha in enumerate(alpha): confidenceInterval[2*thisAlphaInd] = sortedMetricOfResampled[int(round(thisAlpha*numResamples/2.0))] confidenceInterval[2*thisAlphaInd+1] = sortedMetricOfResampled[int(round(numResamples - thisAlpha*numResamples/2.0))] return confidenceInterval def _ma_confidence_interval_1d(A, alpha=.05, metric=np.mean, numResamples=10000, interpolate=True): A = np.ma.masked_invalid(A, copy=True) A = A.compressed() confidenceInterval = _confidence_interval_1d(A, alpha, metric, numResamples, interpolate) return confidenceInterval def confidence_interval(A, axis=None, alpha=.05, metric=np.mean, numResamples=10000, interpolate=True): """Return the bootstrap confidence interval of an array or along an axis ignoring NaNs and masked elements. Parameters ---------- A : array_like Array containing numbers whose confidence interval is desired. axis : int, optional Axis along which the confidence interval is computed. The default is to compute the confidence interval of the flattened array. alpha: float or array, optional confidence level of confidence interval. 100.0*(1-alpha) percent confidence interval will be returned. If length-n array, n confidence intervals will be computed The default is .05 metric : numpy function, optional metric to calculate confidence interval for. The default is numpy.mean numResamples : int, optional number of bootstrap samples. The default is 10000. interpolate: bool, optional uses scipy.stats.scoreatpercentile to interpolate between bootstrap samples if alpha*numResamples/2.0 is not integer. The default is True Returns ------- confidenceInterval : ndarray An array with the same shape as `A`, with the specified axis replaced by one twice the length of the alpha If `A` is a 0-d array, or if axis is None, a length-2 ndarray is returned. """ if interpolate is True and scoreatpercentile is False: print "need scipy to interpolate between values" interpolate = False A = A.copy() if axis is None: A = A.ravel() outA = _ma_confidence_interval_1d(A, alpha, metric, numResamples, interpolate) else: outA = np.apply_along_axis(_ma_confidence_interval_1d, axis, A, alpha, metric, numResamples, interpolate) return outA