-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Base classes for creation stages (#96)
* draft the scaffolding of noisifier * draft the scaffolding of selector * add run() method to noisifier and selector * Finish up base classes for Noisifier and Selector, add unit tests * added addRandom.py * fix up error message --------- Co-authored-by: Eric Charles <[email protected]>
- Loading branch information
Showing
6 changed files
with
225 additions
and
14 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
"""Add a column of random numbers to a dataframe.""" | ||
|
||
import numpy as np | ||
|
||
from ceci.config import StageParameter as Param | ||
from rail.creation.noisifier import Noisifier | ||
|
||
class AddColumnOfRandom(Noisifier): | ||
"""Add a column of random numbers to a dataframe | ||
""" | ||
|
||
name = "AddColumnOfRandom" | ||
config_options = Noisifier.config_options.copy() | ||
config_options.update( | ||
col_name=Param(str, "chaos_bunny", msg="Name of the column with random numbers"), | ||
) | ||
|
||
def __init__(self, args, comm=None): | ||
""" | ||
Constructor | ||
Does standard Noisifier initialization | ||
""" | ||
Noisifier.__init__(self, args, comm=comm) | ||
|
||
def _initNoiseModel(self): # pragma: no cover | ||
np.random.seed(self.config.seed) | ||
|
||
def _addNoise(self): # pragma: no cover | ||
data = self.get_data('input') | ||
copy = data.copy() | ||
copy.insert(0, self.config.col_name, np.random.uniform(size=len(copy))) | ||
self.add_data("output", copy) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
"""Abstract base class defining a noisifier. | ||
The key feature here is the run adds noise to the catalog. | ||
Intended subclasses are noisifier that adds LSST noise / other telescope noise | ||
""" | ||
|
||
from rail.core.stage import RailStage | ||
from rail.core.data import PqHandle | ||
|
||
|
||
class Noisifier(RailStage): | ||
"""Base class Noisifier, which adds noise to the input catalog | ||
Noisifier take "input" data in the form of pandas dataframes in Parquet | ||
files and provide as "output" another pandas dataframes written to Parquet | ||
files. | ||
""" | ||
|
||
name = 'Noisifier' | ||
config_options = RailStage.config_options.copy() | ||
config_options.update(seed=1337) | ||
inputs = [('input', PqHandle)] | ||
outputs = [('output', PqHandle)] | ||
|
||
def __init__(self, args, comm=None): | ||
"""Initialize Noisifier that can add noise to photometric data""" | ||
RailStage.__init__(self, args, comm=comm) | ||
|
||
|
||
def _initNoiseModel(self): # pragma: no cover | ||
raise NotImplementedError("Noisifier._initNoiseModel()") | ||
|
||
def _addNoise(self): # pragma: no cover | ||
raise NotImplementedError("Noisifier._addNoise()") | ||
|
||
def __call__(self, sample, seed: int = None): | ||
"""The main interface method for ``Noisifier``. | ||
Adds noise to the input catalog | ||
This will attach the input to this `Noisifier` | ||
Then it will call the _initNoiseModel() and _addNoise(), which need to be | ||
implemented by the sub-classes. | ||
The _initNoiseModel() method will initialize the noise model of the sub-classes, and | ||
store the noise model as self.noiseModel | ||
The _addNoise() method will add noise to the flux and magnitude of the column of the | ||
catalog. | ||
The finalize() method will check the end results (like preserving number of rows) | ||
Finally, this will return a PqHandle providing access to that output | ||
data. | ||
Parameters | ||
---------- | ||
sample : table-like | ||
The sample to be degraded | ||
seed : int, default=None | ||
An integer to set the numpy random seed | ||
Returns | ||
------- | ||
output_data : PqHandle | ||
A handle giving access to a table with degraded sample | ||
""" | ||
if seed is not None: | ||
self.config.seed = seed | ||
self.set_data('input', sample) | ||
|
||
self.run() | ||
self.finalize() | ||
return self.get_handle('output') | ||
|
||
|
||
def run(self): | ||
|
||
self._initNoiseModel() | ||
self._addNoise() | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
"""Abstract base class defining a selector. | ||
The key feature here is make selection to either the photometric or spectroscopic catalog. | ||
Intended subclasses spectroscopic selection, probability selection on a grid for the photometry, | ||
or pure photometric selection. | ||
""" | ||
|
||
from ceci.config import StageParameter as Param | ||
from rail.core.stage import RailStage | ||
from rail.core.data import PqHandle | ||
|
||
|
||
class Selector(RailStage): | ||
"""Base class Selector, which makes selection to the catalog | ||
Selector take "input" data in the form of pandas dataframes in Parquet | ||
files and provide as "output" another pandas dataframes written to Parquet | ||
files. | ||
""" | ||
|
||
name = 'Selector' | ||
config_options = RailStage.config_options.copy() | ||
config_options.update( | ||
drop_rows=Param(bool, True, msg="Drop selected rows from output table"), | ||
) | ||
inputs = [('input', PqHandle)] | ||
outputs = [('output', PqHandle)] | ||
|
||
def __init__(self, args, comm=None): | ||
"""Initialize Noisifier that can add noise to photometric data""" | ||
RailStage.__init__(self, args, comm=comm) | ||
|
||
def __call__(self, sample): | ||
"""The main interface method for ``Selector``. | ||
Adds noise to the input catalog | ||
This will attach the input to this `Selector` | ||
Then it will call the select() which add a flag column to the catalog. flag=1 means | ||
selected, 0 means dropped. | ||
If dropRows = True, the dropped rows will not be presented in the output catalog, | ||
otherwise, all rows will be presented. | ||
Finally, this will return a PqHandle providing access to that output | ||
data. | ||
Parameters | ||
---------- | ||
sample : table-like | ||
The sample to be selected | ||
Returns | ||
------- | ||
output_data : PqHandle | ||
A handle giving access to a table with selected sample | ||
""" | ||
self.set_data('input', sample) | ||
self.run() | ||
self.finalize() | ||
return self.get_handle('output') | ||
|
||
def run(self): | ||
data = self.get_data('input') | ||
selection_mask = self._select() | ||
if self.config['drop_rows']: | ||
out_data = data[selection_mask.astype(bool)] | ||
else: | ||
out_data = data.copy() | ||
out_data.insert(0, 'flag', selection_mask) | ||
self.add_data("output", out_data) | ||
|
||
def _select(self): # pragma: no cover | ||
raise NotImplementedError("Selector._select()") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters