Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tidy up some docstrings. #350

Merged
merged 2 commits into from
Jan 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ instance/

# Sphinx documentation
docs/_build/
docs/autoapi/
_readthedocs/

# PyBuilder
Expand Down
2 changes: 1 addition & 1 deletion docs/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS ?= -T -E -d _build/doctrees -D language=en
EXCLUDENB ?= -D exclude_patterns="notebooks/*","_build","**.ipynb_checkpoints"
EXCLUDENB ?= -D exclude_patterns="notebooks/*","_build","**.ipynb_checkpoints","**.ipynb"
SPHINXBUILD ?= sphinx-build
SOURCEDIR = .
BUILDDIR = ../_readthedocs/
Expand Down
5 changes: 2 additions & 3 deletions src/tape/analysis/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,15 +68,14 @@ def meta(self, ens: "Ensemble"):

@abstractmethod
def on(self, ens: "Ensemble") -> List[str]:
"""
Return the columns to group source table by.
"""Return the columns to group source table by.

Parameters
----------
ens : Ensemble
The ensemble object.

Returns:
Returns
--------
List[str]
The column names to group by. Typically, `[ens._id_col]`.
Expand Down
6 changes: 3 additions & 3 deletions src/tape/analysis/stetsonj.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def __call__(
stetsonJ : `dict`
StetsonJ statistic for each of input bands.

Notes
Note
----------
In case that no value for `band_to_calc` is passed, the function is
executed on all available bands in `band`.
Expand Down Expand Up @@ -113,7 +113,7 @@ def _stetson_J_single(fluxes, errors):
.. [1] Stetson, P. B., "On the Automatic Determination of Light-Curve
Parameters for Cepheid Variables", PASP, 108, 851S, 1996

Notes
Note
----------
Taken from
https://github.com/lsst/meas_base/blob/main/python/lsst/meas/base/diaCalculationPlugins.py
Expand Down Expand Up @@ -168,7 +168,7 @@ def _stetson_J_mean(values, errors, mean=None, alpha=2.0, beta=2.0, n_iter=20, t
.. [1] Stetson, P. B., "On the Automatic Determination of Light-Curve
Parameters for Cepheid Variables", PASP, 108, 851S, 1996

Notes
Note
----------
Taken from
https://github.com/lsst/meas_base/blob/main/python/lsst/meas/base/diaCalculationPlugins.py
Expand Down
146 changes: 78 additions & 68 deletions src/tape/ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,18 +104,19 @@ def add_frame(self, frame, label):

Parameters
----------
frame: `tape.ensemble.EnsembleFrame`
frame: `tape.ensemble_frame.EnsembleFrame`
The frame object for the Ensemble to track.
label: `str`
| The label for the Ensemble to use to track the frame.
The label for the Ensemble to use to track the frame.

Returns
-------
self: `Ensemble`
Ensemble

Raises
------
ValueError if the label is "source", "object", or already tracked by the Ensemble.
ValueError
if the label is "source", "object", or already tracked by the Ensemble.
"""
if label == SOURCE_FRAME_LABEL or label == OBJECT_FRAME_LABEL:
raise ValueError(f"Unable to add frame with reserved label " f"'{label}'")
Expand All @@ -138,12 +139,13 @@ def update_frame(self, frame):

Returns
-------
self: `Ensemble`
Ensemble

Raises
------
ValueError if the `frame.label` is unpopulated, or if the frame is not a SourceFrame or ObjectFrame
but uses the reserved labels.
ValueError
if the `frame.label` is unpopulated, or if the frame is not a SourceFrame or ObjectFrame
but uses the reserved labels.
"""
if frame.label is None:
raise ValueError(f"Unable to update frame with no populated `EnsembleFrame.label`.")
Expand All @@ -167,16 +169,18 @@ def drop_frame(self, label):
Parameters
----------
label: `str`
| The label of the frame to be dropped by the Ensemble.
The label of the frame to be dropped by the Ensemble.

Returns
-------
self: `Ensemble`
Ensemble

Raises
------
ValueError if the label is "source", or "object".
KeyError if the label is not tracked by the Ensemble.
ValueError
if the label is "source", or "object".
KeyError
if the label is not tracked by the Ensemble.
"""
if label == SOURCE_FRAME_LABEL or label == OBJECT_FRAME_LABEL:
raise ValueError(f"Unable to drop frame with reserved label " f"'{label}'")
Expand All @@ -191,15 +195,16 @@ def select_frame(self, label):
Parameters
----------
label: `str`
| The label of a frame tracked by the Ensemble to be selected.
The label of a frame tracked by the Ensemble to be selected.

Returns
-------
result: `tape.ensemble.EnsembleFrame`
tape.ensemble.EnsembleFrame

Raises
------
KeyError if the label is not tracked by the Ensemble.
KeyError
if the label is not tracked by the Ensemble.
"""
if label not in self.frames:
raise KeyError(
Expand Down Expand Up @@ -229,7 +234,8 @@ def frame_info(self, labels=None, verbose=True, memory_usage=True, **kwargs):

Raises
------
KeyError if a label in labels is not tracked by the Ensemble.
KeyError
if a label in labels is not tracked by the Ensemble.
"""
if labels is None:
labels = self.frames.keys()
Expand Down Expand Up @@ -265,7 +271,7 @@ def insert_sources(
):
"""Manually insert sources into the ensemble.

Requires, at a minimum, the objects ID and the band, timestamp,
Requires, at a minimum, the object's ID and the band, timestamp,
and flux of the observation.

Note
Expand Down Expand Up @@ -364,6 +370,7 @@ def info(self, verbose=True, memory_usage=True, **kwargs):
memory_usage: `bool`, optional
Specifies whether total memory usage of the DataFrame elements
(including the index) should be displayed.

Returns
----------
None
Expand All @@ -377,8 +384,7 @@ def info(self, verbose=True, memory_usage=True, **kwargs):
self.source.info(verbose=verbose, memory_usage=memory_usage, **kwargs)

def check_sorted(self, table="object"):
"""Checks to see if an Ensemble Dataframe is sorted (increasing) on
the index.
"""Checks to see if an Ensemble Dataframe is sorted (increasing) on the index.

Parameters
----------
Expand All @@ -387,8 +393,8 @@ def check_sorted(self, table="object"):

Returns
-------
A boolean value indicating whether the index is sorted (True)
or not (False)
boolean
indicating whether the index is sorted (True) or not (False)
"""
if table == "object":
idx = self.object.index
Expand All @@ -412,10 +418,10 @@ def check_lightcurve_cohesion(self):

Returns
-------
A boolean value indicating whether the sources tied to a given object
are only found in a single partition (True), or if they are split
across multiple partitions (False)

boolean
indicates whether the sources tied to a given object are only found
in a single partition (True), or if they are split across multiple
partitions (False)
"""
idx = self.source.index
counts = idx.map_partitions(lambda a: Counter(a.unique())).compute()
Expand All @@ -440,8 +446,9 @@ def compute(self, table=None, **kwargs):

Returns
-------
A single pandas data frame for the specified table or a tuple of (object, source)
data frames.
`pd.Dataframe`
A single pandas data frame for the specified table or a tuple of
(object, source) data frames.
"""
if table:
self._lazy_sync_tables(table)
Expand Down Expand Up @@ -559,14 +566,17 @@ def query(self, expr, table="object"):

Examples
--------
# Keep sources with flux above 100.0:
ens.query("flux > 100", table="source")
Keep sources with flux above 100.0::

# Keep sources in the green band:
ens.query("band_col_name == 'g'", table="source")
ens.query("flux > 100", table="source")

# Filtering on the flux column without knowing its name:
ens.query(f"{ens._flux_col} > 100", table="source")
Keep sources in the green band::

ens.query("band_col_name == 'g'", table="source")

Filtering on the flux column without knowing its name::

ens.query(f"{ens._flux_col} > 100", table="source")
"""
self._lazy_sync_tables(table)
if table == "object":
Expand Down Expand Up @@ -622,11 +632,13 @@ def assign(self, table="object", temporary=False, **kwargs):

Examples
--------
# Direct assignment of my_series to a column named "new_column".
ens.assign(table="object", new_column=my_series)
Direct assignment of my_series to a column named "new_column"::

ens.assign(table="object", new_column=my_series)

Subtract the value in "err" from the value in "flux"::

# Subtract the value in "err" from the value in "flux".
ens.assign(table="source", lower_bnd=lambda x: x["flux"] - 2.0 * x["err"])
ens.assign(table="source", lower_bnd=lambda x: x["flux"] - 2.0 * x["err"])
"""
self._lazy_sync_tables(table)

Expand Down Expand Up @@ -869,12 +881,12 @@ def bin_sources(
Notes
-----
* This should only be used for slowly varying sources where we can
treat the source as constant within `time_window`.
treat the source as constant within `time_window`.

* As a default the function only aggregates and keeps the id, band,
time, flux, and flux error columns. Additional columns can be preserved
by providing the mapping of column name to aggregation function with the
`additional_cols` parameter.
time, flux, and flux error columns. Additional columns can be preserved
by providing the mapping of column name to aggregation function with the
`additional_cols` parameter.
"""
self._lazy_sync_tables(table="source")

Expand Down Expand Up @@ -991,31 +1003,28 @@ def batch(self, func, *args, meta=None, by_band=False, use_map=True, on=None, la

Examples
--------
Run a TAPE function on the ensemble:
```
from tape.analysis.stetsonj import calc_stetson_J
ens = Ensemble().from_dataset('rrlyr82')
ensemble.batch(calc_stetson_J, band_to_calc='i')
```

Run a light-curve function on the ensemble:
```
from light_curve import EtaE
ens.batch(EtaE(), band_to_calc='g')
```

Run a custom function on the ensemble:
```
def s2n_inter_quartile_range(flux, err):
first, third = np.quantile(flux / err, [0.25, 0.75])
return third - first

ens.batch(s2n_inter_quartile_range, ens._flux_col, ens._err_col)
```
Or even a numpy built-in function:
```
amplitudes = ens.batch(np.ptp, ens._flux_col)
```
Run a TAPE function on the ensemble::

from tape.analysis.stetsonj import calc_stetson_J
ens = Ensemble().from_dataset('rrlyr82')
ensemble.batch(calc_stetson_J, band_to_calc='i')

Run a light-curve function on the ensemble::

from light_curve import EtaE
ens.batch(EtaE(), band_to_calc='g')

Run a custom function on the ensemble::

def s2n_inter_quartile_range(flux, err):
first, third = np.quantile(flux / err, [0.25, 0.75])
return third - first

ens.batch(s2n_inter_quartile_range, ens._flux_col, ens._err_col)

Or even a numpy built-in function::

amplitudes = ens.batch(np.ptp, ens._flux_col)
"""

self._lazy_sync_tables(table="all")
Expand Down Expand Up @@ -1507,6 +1516,7 @@ def from_dask_dataframe(

def from_hipscat(self, dir, source_subdir="source", object_subdir="object", column_mapper=None, **kwargs):
"""Read in parquet files from a hipscat-formatted directory structure

Parameters
----------
dir: 'str'
Expand Down Expand Up @@ -1900,7 +1910,7 @@ def _lazy_sync_tables_from_frame(self, frame):

Parameters
----------
frame: `tape.EnsembleFrame`
frame: `tape.ensemble_frame.EnsembleFrame`
The frame being modified. Only an `ObjectFrame` or
`SourceFrame tracked by this `Ensemble` may trigger
a sync.
Expand Down Expand Up @@ -2144,7 +2154,7 @@ def sf2(self, sf_method="basic", argument_container=None, use_map=True):
result : `pandas.DataFrame`
Structure function squared for each of input bands.

Notes
Note
----------
In case that no value for `band_to_calc` is passed, the function is
executed on all available bands in `band`.
Expand Down Expand Up @@ -2186,7 +2196,7 @@ def _translate_meta(self, meta):
Returns
----------
result : `ensemble.TapeFrame` or `ensemble.TapeSeries`
The appropriate meta for Dask producing an `Ensemble.EnsembleFrame` or
The appropriate meta for Dask producing an `tape.ensemble_frame.EnsembleFrame` or
`Ensemble.EnsembleSeries` respectively
"""
if isinstance(meta, TapeFrame) or isinstance(meta, TapeSeries):
Expand Down
Loading