diff --git a/neuroglia/event.py b/neuroglia/event.py index 68dd113..18ccb00 100644 --- a/neuroglia/event.py +++ b/neuroglia/event.py @@ -5,22 +5,68 @@ from sklearn.base import BaseEstimator,TransformerMixin from .utils import create_interpolator, events_to_xr_dim -from .spike import Smoother, DEFAULT_TAU +from .spike import Binner, DEFAULT_TAU class PeriEventTraceSampler(BaseEstimator,TransformerMixin): - """docstring for EventTensorizer.""" + """Take event-aligned samples of traces from a population of neurons. + + Traces are sampled relative to the event time. There is no enforced + constraint that the times of events or sample_times relative to the events + need to align to trace sample times. Rather, samples are interpolated from + the values in the traces DataFrame. + + Parameters + ---------- + traces : pandas DataFrame with 'time' as the index and neuron IDs in columns + The traces that will be sampled from when the transform method is called + sample_times : array + Time relative to events that will be used to sample or bin spikes. + + Notes + ----- + + This estimator is stateless (besides constructor parameters), the + fit method does nothing but is useful when used in a pipeline. + """ def __init__(self, traces, sample_times): self.sample_times = sample_times self.traces = traces - def fit(self, X, y=None): + def _make_splined_traces(self): self.splined_traces_ = self.traces.apply( lambda y: create_interpolator(self.traces.index,y), axis=0, ) + + def fit(self, X, y=None): + """Do nothing and return the estimator unchanged + + This method is here to implement the scikit-learn API and work in + scikit-learn pipelines. + + Parameters + ---------- + X : array-like + + Returns + ------- + self + + """ return self def transform(self, X): + """Sample traces around each event + + Parameters + ---------- + X : pandas.DataFrame with a column named 'time' + + Returns + ------- + Xt : xarray.DataArray with columns ['event','sample_time','neuron'] + """ + self._make_splined_traces() # define a local function that will extract traces around each event def extractor(ev): @@ -39,23 +85,71 @@ def extractor(ev): class PeriEventSpikeSampler(BaseEstimator,TransformerMixin): - """docstring for PeriEventSpikeSampler.""" - def __init__(self, spikes, sample_times, fillna=True, tracizer=None,tracizer_kwargs=None): + """Take event-aligned samples of spikes from a population of neurons. + + Parameters + ---------- + spikes : pandas DataFrame with columns ['time','neurons'] + The spikes that will be sampled from when the transform method is called + sample_times : array + Time relative to events that will be used to sample or bin spikes. + fillna : boolean, optional (default: True) + Whether to fill unobserved values. This is likely to occur if a given + event has no spikes associated with it. + sampler : transformer, optional (default: neuroglia.spikes.Binner) + Binner or Smoother from neuroglia.spikes + sampler_kwargs : dict-like + Dictionary of keyword arguments to pass along to the Sampler + + Notes + ----- + + This estimator is stateless (besides constructor parameters), the + fit method does nothing but is useful when used in a pipeline. + """ + def __init__(self, spikes, sample_times, fillna=True, sampler=None,sampler_kwargs=None): self.spikes = spikes self.sample_times = sample_times self.fillna = fillna - self.Tracizer = tracizer - self.tracizer_kwargs = tracizer_kwargs + self.Sampler = sampler + self.sampler_kwargs = sampler_kwargs + + def _assign_sampler(self): + if self.Sampler is None: + self.Sampler = Binner + if self.sampler_kwargs is None: + self.sampler_kwargs = dict() def fit(self, X, y=None): - if self.Tracizer is None: - self.Tracizer = Smoother - if self.tracizer_kwargs is None: - self.tracizer_kwargs = dict() + """Do nothing and return the estimator unchanged + + This method is here to implement the scikit-learn API and work in + scikit-learn pipelines. + + Parameters + ---------- + X : array-like + Returns + ------- + self + + """ return self def transform(self, X): + """Sample spikes around each event + + Parameters + ---------- + X : pandas.DataFrame with a column named 'time' + + Returns + ------- + Xt : xarray.DataArray with columns ['event','sample_time','neuron'] + """ + + self._assign_sampler() # define a local function that will extract traces around each event def extractor(ev): @@ -69,8 +163,8 @@ def extractor(ev): ) local_spikes = self.spikes[local_mask] - tracizer = self.Tracizer(t,**self.tracizer_kwargs) - traces = tracizer.fit_transform(local_spikes) + sampler = self.Sampler(t,**self.sampler_kwargs) + traces = sampler.fit_transform(local_spikes) traces.index = self.sample_times[:len(traces)] diff --git a/neuroglia/nwb.py b/neuroglia/nwb.py index c8b1277..fb09d19 100644 --- a/neuroglia/nwb.py +++ b/neuroglia/nwb.py @@ -1,23 +1,76 @@ import pandas as pd -from sklearn.base import TransformerMixin +from sklearn.base import BaseEstimator,TransformerMixin -class SpikeTablizer(TransformerMixin): - """converts a dictionary of spike times in the form of neuron:[times] to a - dataframe with "neuron" and "time" columns, sorted by "time" +class SpikeTablizer(BaseEstimator,TransformerMixin): + """Convert a dictionary of spike times to a dataframe of spike times. + + It is common to store spike times as a dictionary where the keys are neuron + IDs and the values are arrays of spike times for a given neuron. + + This transformer converts a dictionary of spike times into a table of spike + times. + + Examples + -------- + + >>> import numpy as np + >>> import pandas as pd + >>> from neuroglia.nwb import SpikeTablizer + >>> binner = SpikeTablizer() + >>> spike_dict = {0:[0.1,0.2,0.3],2:[0.11]} + >>> spikes = binner.fit_transform(spike_dict) + + See also + -------- + + neuroglia.spike.Smoother + neuroglia.spike.Binner + + Notes + ----- + + This estimator is stateless (besides constructor parameters), the + fit method does nothing but is useful when used in a pipeline. """ def __init__(self): - super(SpikeTablizer, self).__init__() + pass def fit(self, X, y=None): # pragma: no cover + """ Do nothing an return the estimator unchanged. + + This method is here to implement the scikit-learn API and work in + scikit-learn pipelines. + + Parameters + ---------- + + X : dictionary of spike times in the format {::>> import numpy as np >>> import pandas as pd - >>> from neuroglia.spike import Binner - >>> binner = Binner(np.arange(0,1.0,0.001)) + >>> from neuroglia.spike import Smoother + >>> smoother = Smoother(np.arange(0,1.0,0.001)) >>> spikes = pd.DataFrame({'times':np.random.rand}) >>> X = binner.fit_transform(spikes) See also -------- - neuroglia.spike.Smoother + neuroglia.spike.Binner neuroglia.nwb.SpikeTablizer + Notes + ----- + + This estimator is stateless (besides constructor parameters), the + fit method does nothing but is useful when used in a pipeline. + """ def __init__(self,sample_times,kernel='gaussian',tau=DEFAULT_TAU): @@ -142,7 +155,8 @@ def __init__(self,sample_times,kernel='gaussian',tau=DEFAULT_TAU): def fit(self, X, y=None): """ Do nothing an return the estimator unchanged. - This method is just there to implement the usual API and hence work in pipelines. + This method is here to implement the scikit-learn API and work in + scikit-learn pipelines. Parameters ---------- @@ -157,7 +171,7 @@ def fit(self, X, y=None): """ return self - def __make_trace(self,neuron_spikes): + def _make_trace(self,neuron_spikes): neuron = get_neuron(neuron_spikes) kernel_func = lambda spike: KERNELS[self.kernel](loc=spike,scale=self.tau) @@ -175,7 +189,20 @@ def __make_trace(self,neuron_spikes): ) def transform(self, X): - traces = X.groupby('neuron').apply(self.__make_trace).T + """ Smooth each neuron's spikes into a trace of smoothed spikes. + + Parameters + ---------- + X : pandas DataFrame with columns ['time','neuron'] + spike times that will be binned + + Returns + ------- + Xt : pandas DataFrame of smoothed spikes + Columns are neuron labels and the index is the left edge of the + sample time. + """ + traces = X.groupby('neuron').apply(self._make_trace).T if len(traces)==0: traces = pd.DataFrame(index=self.sample_times) return traces diff --git a/neuroglia/tensor.py b/neuroglia/tensor.py index 936be5f..97c0e68 100644 --- a/neuroglia/tensor.py +++ b/neuroglia/tensor.py @@ -3,20 +3,45 @@ import numpy as np class ResponseReducer(BaseEstimator,TransformerMixin): - """docstring for Annotator.""" - def __init__(self, method='mean', dim='sample_times'): - super(ResponseReducer, self).__init__() - - if method == 'mean': - self.method = np.mean - elif method == 'max': - self.method = np.max - else: - self.method = method + """Reduces a response tensor by performing a function along one dimension + + + Notes + ----- + + This estimator is stateless (besides constructor parameters), the + fit method does nothing but is useful when used in a pipeline. + """ + def __init__(self, func, dim='sample_times'): + self.func = func self.dim = dim def fit(self, X, y=None): + """Do nothing and return the estimator unchanged + + This method is here to implement the scikit-learn API and work in + scikit-learn pipelines. + + Parameters + ---------- + X : xarray.DataArray in `response tensor` structre ['events','sample_times','neurons'] + + Returns + ------- + self + + """ return self def transform(self, X): - return X.reduce(self.method,dim=self.dim) + """Reduces a response tensor by performing a function along one dimension + + Parameters + ---------- + X : xarray.DataArray in `response tensor` structre ['events','sample_times','neurons'] + + Returns + ------- + Xt : xarray.DataArray with remaining dimensions + """ + return X.reduce(self.func,dim=self.dim) diff --git a/neuroglia/trace.py b/neuroglia/trace.py index c59705e..636c530 100644 --- a/neuroglia/trace.py +++ b/neuroglia/trace.py @@ -4,7 +4,29 @@ from sklearn.preprocessing import binarize class Binarizer(BaseEstimator, TransformerMixin): - """docstring for scikit learn Binarizer + """Binarize data (set feature values to 0 or 1) according to a threshold + + This transformer is a DataFram-friendly alternative to + sklearn.preprocessing.Binarizer + + Values greater than the threshold map to 1, while values less than + or equal to the threshold map to 0. With the default threshold of 0, + only positive values map to 1. + + Parameters + ---------- + threshold : float, optional (0.0 by default) + Feature values below or equal to this are replaced by 0, above it by 1. + Threshold may not be less than 0 for operations on sparse matrices. + copy : boolean, optional, default True + set to False to perform inplace binarization and avoid a copy (if + the input is already a numpy array or a scipy.sparse CSR matrix). + + Notes + ----- + + This estimator is stateless (besides constructor parameters), the + fit method does nothing but is useful when used in a pipeline. """ def __init__(self, threshold=0.0, copy=True): @@ -12,9 +34,30 @@ def __init__(self, threshold=0.0, copy=True): self.copy = copy def fit(self, X, y=None): + """Do nothing and return the estimator unchanged + + This method is here to implement the scikit-learn API and work in + scikit-learn pipelines. + + Parameters + ---------- + X : array-like + + Returns + ------- + self + + """ return self def transform(self, X): + """Binarize each element of X + + Parameters + ---------- + X : {array-like, sparse matrix}, shape [n_samples, n_features] + The data to binarize, element by element. + """ df = True try: index = X.index @@ -61,26 +104,97 @@ def edge_detector(X,falling=False): return X class EdgeDetector(BaseEstimator,TransformerMixin): - """docstring for EdgeDetector.""" + """Detect rising or falling edges in a trace + + This transformer detects edges in a trace, where the value of an observation + is higher (by default) or lower (if falling=True) than the prior + observation. + + Parameters + ---------- + falling : boolean, optional (False by default) + Setting this parameter to True will detect falling edges + + Notes + ----- + + This estimator is stateless (besides constructor parameters), the + fit method does nothing but is useful when used in a pipeline. + """ def __init__(self, falling=False): self.falling = falling def fit(self,X,y=None): + """Do nothing and return the estimator unchanged + + This method is here to implement the scikit-learn API and work in + scikit-learn pipelines. + + Parameters + ---------- + X : array-like + + Returns + ------- + self + + """ return self def transform(self,X): + """Detect Edges in each trace + Parameters + ---------- + X : DataFrame in `traces` strcutre [n_samples, n_traces] + """ return edge_detector(X,self.falling) class WhenTrueFinder(BaseEstimator,TransformerMixin): - """docstring for WhenTrueFinder.""" + """Finds times when a trace is non-negative + + This transformer returns a list of events, shaped like a spike table. + + This is useful, for example, for constructing a spike table from inferred + spike events. + + Notes + ----- + + This estimator is stateless (besides constructor parameters), the + fit method does nothing but is useful when used in a pipeline. + """ def __init__(self): pass def fit(self,X,y=None): + """Do nothing and return the estimator unchanged + + This method is here to implement the scikit-learn API and work in + scikit-learn pipelines. + + Parameters + ---------- + X : array-like + + Returns + ------- + self + + """ return self def transform(self,X): + """Find times when trace is greater than zero + + Parameters + ---------- + X : DataFrame in `traces` strcutre [n_samples, n_traces] + + Returns + ------- + Xt : DataFrame with columns ['time','neuron'] + """ return (X[X > 0] .stack() .reset_index()[['level_0','level_1']] diff --git a/tests/test_event.py b/tests/test_event.py index 6c34c4d..07e2e58 100644 --- a/tests/test_event.py +++ b/tests/test_event.py @@ -5,6 +5,7 @@ import numpy.testing as npt import xarray.testing as xrt +from neuroglia.spike import Smoother from neuroglia.event import PeriEventTraceSampler, PeriEventSpikeSampler from sklearn.base import clone @@ -48,7 +49,7 @@ def test_PeriEventTraceSampler_dims(): clone(tensorizer) def test_PeriEventSpikeSampler(): - tensorizer = PeriEventSpikeSampler(SPIKES,sample_times=TS) + tensorizer = PeriEventSpikeSampler(SPIKES,sample_times=TS,sampler=Smoother) tensor = tensorizer.fit_transform(EVENTS) npt.assert_equal(tensor['neuron'].data,SPIKES['neuron'].unique()) diff --git a/tests/test_nwb.py b/tests/test_nwb.py index 436164f..301665d 100644 --- a/tests/test_nwb.py +++ b/tests/test_nwb.py @@ -4,11 +4,21 @@ def test_spike_tablizer(): - compare_list = [ng.nwb.SpikeTablizer().transform({'a':[1.1,2,3], 'b':[1,2.5,6]}), - ng.nwb.SpikeTablizer().fit_transform({'a': [1.1, 2, 3], 'b': [1, 2.5, 6]})] + spike_dict = { + 'a': [1.1,2,3], + 'b': [1,2.5,6], + } + + compare_list = [ + ng.nwb.SpikeTablizer().transform(spike_dict).reset_index(drop=True), + ng.nwb.SpikeTablizer().fit_transform(spike_dict).reset_index(drop=True), + ] + base = pd.DataFrame({'neuron': ['b', 'a', 'a', 'b', 'a', 'b'], - 'time': [1.0, 1.1, 2.0, 2.5, 3.0, 6.0]}).set_index('time') + 'time': [1.0, 1.1, 2.0, 2.5, 3.0, 6.0]}) for compare in compare_list: + print(base) + print(compare) pdt.assert_frame_equal(base, compare) if __name__ == "__main__": diff --git a/tests/test_tensor.py b/tests/test_tensor.py index dcd81f8..900a2bb 100644 --- a/tests/test_tensor.py +++ b/tests/test_tensor.py @@ -27,7 +27,7 @@ ) def test_ResponseReducer_smoke(): - extractor = ResponseReducer() + extractor = ResponseReducer(func=np.mean) responses = extractor.fit_transform(TENSOR) npt.assert_array_equal(responses['event'],LBL)