AllenInstitute · neuromusic · Nov 20, 2017 · Nov 20, 2017 · Nov 20, 2017 · Nov 20, 2017
diff --git a/neuroglia/event.py b/neuroglia/event.py
@@ -5,22 +5,68 @@
 from sklearn.base import BaseEstimator,TransformerMixin
 
 from .utils import create_interpolator, events_to_xr_dim
-from .spike import Smoother, DEFAULT_TAU
+from .spike import Binner, DEFAULT_TAU
 
 class PeriEventTraceSampler(BaseEstimator,TransformerMixin):
-    """docstring for EventTensorizer."""
+    """Take event-aligned samples of traces from a population of neurons.
+
+    Traces are sampled relative to the event time. There is no enforced
+    constraint that the times of events or sample_times relative to the events
+    need to align to trace sample times. Rather, samples are interpolated from
+    the values in the traces DataFrame.
+
+    Parameters
+    ----------
+    traces : pandas DataFrame with 'time' as the index and neuron IDs in columns
+        The traces that will be sampled from when the transform method is called
+    sample_times : array
+        Time relative to events that will be used to sample or bin spikes.
+
+    Notes
+    -----
+
+    This estimator is stateless (besides constructor parameters), the
+    fit method does nothing but is useful when used in a pipeline.
+    """
     def __init__(self, traces, sample_times):
         self.sample_times = sample_times
         self.traces = traces
 
-    def fit(self, X, y=None):
+    def _make_splined_traces(self):
         self.splined_traces_ = self.traces.apply(
             lambda y: create_interpolator(self.traces.index,y),
             axis=0,
         )
+
+    def fit(self, X, y=None):
+        """Do nothing and return the estimator unchanged
+
+        This method is here to implement the scikit-learn API and work in
+        scikit-learn pipelines.
+
+        Parameters
+        ----------
+        X : array-like
+
+        Returns
+        -------
+        self
+
+        """
         return self
 
     def transform(self, X):
+        """Sample traces around each event
+
+        Parameters
+        ----------
+        X : pandas.DataFrame with a column named 'time'
+
+        Returns
+        -------
+        Xt : xarray.DataArray with columns ['event','sample_time','neuron']
+        """
+        self._make_splined_traces()
 
         # define a local function that will extract traces around each event
         def extractor(ev):
@@ -39,23 +85,71 @@ def extractor(ev):
 
 
 class PeriEventSpikeSampler(BaseEstimator,TransformerMixin):
-    """docstring for PeriEventSpikeSampler."""
-    def __init__(self, spikes, sample_times, fillna=True, tracizer=None,tracizer_kwargs=None):
+    """Take event-aligned samples of spikes from a population of neurons.
+
+    Parameters
+    ----------
+    spikes : pandas DataFrame with columns ['time','neurons']
+        The spikes that will be sampled from when the transform method is called
+    sample_times : array
+        Time relative to events that will be used to sample or bin spikes.
+    fillna : boolean, optional (default: True)
+        Whether to fill unobserved values. This is likely to occur if a given
+        event has no spikes associated with it.
+    sampler : transformer, optional (default: neuroglia.spikes.Binner)
+        Binner or Smoother from neuroglia.spikes
+    sampler_kwargs : dict-like
+        Dictionary of keyword arguments to pass along to the Sampler
+
+    Notes
+    -----
+
+    This estimator is stateless (besides constructor parameters), the
+    fit method does nothing but is useful when used in a pipeline.
+    """
+    def __init__(self, spikes, sample_times, fillna=True, sampler=None,sampler_kwargs=None):
         self.spikes = spikes
         self.sample_times = sample_times
         self.fillna = fillna
-        self.Tracizer = tracizer
-        self.tracizer_kwargs = tracizer_kwargs
+        self.Sampler = sampler
+        self.sampler_kwargs = sampler_kwargs
+
+    def _assign_sampler(self):
+        if self.Sampler is None:
+            self.Sampler = Binner
+        if self.sampler_kwargs is None:
+            self.sampler_kwargs = dict()
 
     def fit(self, X, y=None):
-        if self.Tracizer is None:
-            self.Tracizer = Smoother
-        if self.tracizer_kwargs is None:
-            self.tracizer_kwargs = dict()
+        """Do nothing and return the estimator unchanged
+
+        This method is here to implement the scikit-learn API and work in
+        scikit-learn pipelines.
+
+        Parameters
+        ----------
+        X : array-like
 
+        Returns
+        -------
+        self
+
+        """
         return self
 
     def transform(self, X):
+        """Sample spikes around each event
+
+        Parameters
+        ----------
+        X : pandas.DataFrame with a column named 'time'
+
+        Returns
+        -------
+        Xt : xarray.DataArray with columns ['event','sample_time','neuron']
+        """
+
+        self._assign_sampler()
 
         # define a local function that will extract traces around each event
         def extractor(ev):
@@ -69,8 +163,8 @@ def extractor(ev):
             )
             local_spikes = self.spikes[local_mask]
 
-            tracizer = self.Tracizer(t,**self.tracizer_kwargs)
-            traces = tracizer.fit_transform(local_spikes)
+            sampler = self.Sampler(t,**self.sampler_kwargs)
+            traces = sampler.fit_transform(local_spikes)
 
             traces.index = self.sample_times[:len(traces)]
 

diff --git a/neuroglia/nwb.py b/neuroglia/nwb.py
@@ -1,23 +1,76 @@
 import pandas as pd
-from sklearn.base import TransformerMixin
+from sklearn.base import BaseEstimator,TransformerMixin
 
-class SpikeTablizer(TransformerMixin):
-    """converts a dictionary of spike times in the form of neuron:[times] to a
-    dataframe with "neuron" and "time" columns, sorted by "time"
+class SpikeTablizer(BaseEstimator,TransformerMixin):
+    """Convert a dictionary of spike times to a dataframe of spike times.
+
+    It is common to store spike times as a dictionary where the keys are neuron
+    IDs and the values are arrays of spike times for a given neuron.
+
+    This transformer converts a dictionary of spike times into a table of spike
+    times.
+
+    Examples
+    --------
+
+    >>> import numpy as np
+    >>> import pandas as pd
+    >>> from neuroglia.nwb import SpikeTablizer
+    >>> binner = SpikeTablizer()
+    >>> spike_dict = {0:[0.1,0.2,0.3],2:[0.11]}
+    >>> spikes = binner.fit_transform(spike_dict)
+
+    See also
+    --------
+
+    neuroglia.spike.Smoother
+    neuroglia.spike.Binner
+
+    Notes
+    -----
+
+    This estimator is stateless (besides constructor parameters), the
+    fit method does nothing but is useful when used in a pipeline.
 
     """
     def __init__(self):
-        super(SpikeTablizer, self).__init__()
+        pass
 
     def fit(self, X, y=None):       # pragma: no cover
+        """ Do nothing an return the estimator unchanged.
+
+        This method is here to implement the scikit-learn API and work in
+        scikit-learn pipelines.
+
+        Parameters
+        ----------
+
+        X : dictionary of spike times in the format {<neuron>:<spike_times}
+        y : (ignored)
+
+        Returns
+        -------
+
+        self
+        """
         return self
 
     def transform(self, X):
+        """ Convert a dictionary of spike times to a dataframe of spike times.
+
+        Parameters
+        ----------
+        X : dictionary of spike times in the format {<neuron>:<spike_times}
+
+        Returns
+        -------
+        Xt : pandas DataFrame with columns ['time','neuron']
+        """
         population = {'neuron':[],'time':[]}
         for n,times in X.items():
             for t in times:
                 population['neuron'].append(n)
                 population['time'].append(t)
         df = pd.DataFrame(population).sort_values('time')
-        df.set_index(['time'], inplace=True)
+        # df.set_index(['time'], inplace=True)
         return df
diff --git a/neuroglia/spike.py b/neuroglia/spike.py
@@ -42,14 +42,22 @@ class Binner(BaseEstimator,TransformerMixin):
     neuroglia.spike.Smoother
     neuroglia.nwb.SpikeTablizer
 
+
+    Notes
+    -----
+
+    This estimator is stateless (besides constructor parameters), the
+    fit method does nothing but is useful when used in a pipeline.
+
     """
     def __init__(self,sample_times):
         self.sample_times = sample_times
 
     def fit(self, X, y=None):
         """ Do nothing an return the estimator unchanged.
 
-        This method is just there to implement the usual API and hence work in pipelines.
+        This method is here to implement the scikit-learn API and work in
+        scikit-learn pipelines.
 
         Parameters
         ----------
@@ -80,7 +88,6 @@ def transform(self, X):
         ----------
         X : pandas DataFrame with columns ['time','neuron']
             spike times that will be binned
-        y : (ignored)
 
         Returns
         -------
@@ -120,17 +127,23 @@ class Smoother(BaseEstimator,TransformerMixin):
 
     >>> import numpy as np
     >>> import pandas as pd
-    >>> from neuroglia.spike import Binner
-    >>> binner = Binner(np.arange(0,1.0,0.001))
+    >>> from neuroglia.spike import Smoother
+    >>> smoother = Smoother(np.arange(0,1.0,0.001))
     >>> spikes = pd.DataFrame({'times':np.random.rand})
     >>> X = binner.fit_transform(spikes)
 
     See also
     --------
 
-    neuroglia.spike.Smoother
+    neuroglia.spike.Binner
     neuroglia.nwb.SpikeTablizer
 
+    Notes
+    -----
+
+    This estimator is stateless (besides constructor parameters), the
+    fit method does nothing but is useful when used in a pipeline.
+
     """
     def __init__(self,sample_times,kernel='gaussian',tau=DEFAULT_TAU):
 
@@ -142,7 +155,8 @@ def __init__(self,sample_times,kernel='gaussian',tau=DEFAULT_TAU):
     def fit(self, X, y=None):
         """ Do nothing an return the estimator unchanged.
 
-        This method is just there to implement the usual API and hence work in pipelines.
+        This method is here to implement the scikit-learn API and work in
+        scikit-learn pipelines.
 
         Parameters
         ----------
@@ -157,7 +171,7 @@ def fit(self, X, y=None):
         """
         return self
 
-    def __make_trace(self,neuron_spikes):
+    def _make_trace(self,neuron_spikes):
         neuron = get_neuron(neuron_spikes)
 
         kernel_func = lambda spike: KERNELS[self.kernel](loc=spike,scale=self.tau)
@@ -175,7 +189,20 @@ def __make_trace(self,neuron_spikes):
             )
 
     def transform(self, X):
-        traces = X.groupby('neuron').apply(self.__make_trace).T
+        """ Smooth each neuron's spikes into a trace of smoothed spikes.
+
+        Parameters
+        ----------
+        X : pandas DataFrame with columns ['time','neuron']
+            spike times that will be binned
+
+        Returns
+        -------
+        Xt : pandas DataFrame of smoothed spikes
+            Columns are neuron labels and the index is the left edge of the
+            sample time.
+        """
+        traces = X.groupby('neuron').apply(self._make_trace).T
         if len(traces)==0:
             traces = pd.DataFrame(index=self.sample_times)
         return traces