Skip to content

Commit

Permalink
Merge pull request #51 from lsst-uk/dataclass_refactor
Browse files Browse the repository at this point in the history
Small data-class refactor.
  • Loading branch information
astronomerritt authored Feb 29, 2024
2 parents f1122a1 + 273ae9a commit c605b7b
Show file tree
Hide file tree
Showing 2 changed files with 201 additions and 79 deletions.
40 changes: 29 additions & 11 deletions src/adler/dataclasses/AdlerPlanetoid.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,36 @@
from lsst.rsp import get_tap_service
from adler.dataclasses.DataSchema import Observations, MPCORB, SSObject
from adler.science.DummyScience import DummyScience


class AdlerPlanetoid:
def __init__(self, ssObjectId, sql_filename=None):
"""AdlerPlanetoid class. Contains the Observations, MPCORB and SSObject objects."""

def __init__(self, ssObjectId, population_location="RSP", sql_filename=None):
"""Initialises the AdlerPlanetoid object.
Parameters
-----------
ssObjectId : str
ssObjectId of the object of interest.
population_location : str
String delineating source of data. Should be "RSP" for Rubin Science Platform or "SQL" for a SQL table.
sql_filename: str, optional
Location of local SQL database, if using.
"""
self.ssObjectId = ssObjectId
self.population_location = population_location
self.sql_filename = sql_filename
# can also include date ranges at some point

# can draw from a local SQL database
if not sql_filename:
self.service = get_tap_service("ssotap")
else:
self.service = None

# this creates the AdlerPlanetoid.Observations, AdlerPlanetoid.MPCORB and
# AdlerPlanetoid.SSObject objects.
self.populate_observations()
self.populate_MPCORB()
self.populate_SSObject()

def populate_observations(self):
"""Populates the Observations object class attribute."""
observations_sql_query = f"""
SELECT
ssObject.ssObjectId, mag, magErr, band, midpointMjdTai as mjd, ra, dec, phaseAngle,
Expand All @@ -35,10 +44,11 @@ def populate_observations(self):
"""

self.Observations = Observations(
self.ssObjectId, observations_sql_query, self.service, self.sql_filename
self.ssObjectId, self.population_location, observations_sql_query, sql_filename=self.sql_filename
)

def populate_MPCORB(self):
"""Populates the MPCORB object class attribute."""
MPCORB_sql_query = f"""
SELECT
ssObjectId, mpcDesignation, mpcNumber, mpcH, mpcG, epoch, peri, node, incl, e, n, q,
Expand All @@ -49,9 +59,12 @@ def populate_MPCORB(self):
ssObjectId = {self.ssObjectId}
"""

self.MPCORB = MPCORB(self.ssObjectId, MPCORB_sql_query, self.service, self.sql_filename)
self.MPCORB = MPCORB(
self.ssObjectId, self.population_location, MPCORB_sql_query, sql_filename=self.sql_filename
)

def populate_SSObject(self):
"""Populates the SSObject class attribute."""
SSObject_sql_query = f"""
SELECT
discoverySubmissionDate, firstObservationDate, arc, numObs,
Expand All @@ -63,7 +76,12 @@ def populate_SSObject(self):
ssObjectId = {self.ssObjectId}
"""

self.SSObject = SSObject(self.ssObjectId, SSObject_sql_query, self.service, self.sql_filename)
self.SSObject = SSObject(
self.ssObjectId,
self.population_location,
sql_query=SSObject_sql_query,
sql_filename=self.sql_filename,
)

def do_pretend_science(self):
self.DummyScienceResult = DummyScience().science_result
Expand Down
240 changes: 172 additions & 68 deletions src/adler/dataclasses/DataSchema.py
Original file line number Diff line number Diff line change
@@ -1,102 +1,206 @@
import numpy as np
import sys
from lsst.rsp import get_tap_service


class DataSchema:
"""Parent class for Observations (a join of DiaSource and SSSource), MPCORB
and SSObject data classes.
and SSObject data classes. Largely a collection of common methods. Should never be instantiated
by itself.
"""

def __init__(self, ssObjectId, sql_query, service, sql_filename=None):
self.ssObjectId = ssObjectId
self.sql_query = sql_query
self.service = service
def populate(self, population_location, sql_query, sql_filename):
"""Populates the DataSchema object, either from the RSP or a SQL table. Note that this calls the methods
get_RSP_table() and get_SQL_table(), which must exist in the child classes.
Parameters
-----------
population_location : str
String delineating source of data. Should be "RSP" for Rubin Science Platform or "SQL" for a SQL table.
sql_query: str
SQL query to retrieve data from database.
"""

if not sql_filename:
self.data_table = self.get_RSP_table(self.sql_query)
if population_location == "RSP": # pragma: no cover
self.get_RSP_table(sql_query)
elif population_location == "SQL":
self.get_SQL_table(sql_query, sql_filename)
else:
self.data_table = self.get_SQL_table(self.sql_query)
sys.exit(
"Population source not recognised. Please supply either 'RSP' or 'SQL' for population_location argument."
)

def get_RSP_table(self, sql_query):
rsp_table = self.service.search(sql_query).to_table()
return rsp_table
self.populate_from_table()

def get_SQL_table(self, sql_query, testing_filename):
pass
def get_RSP_table(self, sql_query): # pragma: no cover
"""Retrieves the table of data from the RSP. Populates the data_table class variable.
Parameters
-----------
sql_query : str
SQL query to be sent to the RSP tap service.
# should be one function to get whatever from the table and type accordingly
def get_array_from_table(self, column_name):
return np.array(self.data_table[column_name])
"""
self.sql_query = sql_query
self.service = get_tap_service("ssotap")

def get_string_from_table(self, column_name):
return str(self.data_table[column_name][0])
self.data_table = self.service.search(sql_query).to_table()

def get_float_from_table(self, column_name):
return float(self.data_table[column_name][0])
def get_SQL_table(self, sql_query, sql_filename):
pass

def get_int_from_table(self, column_name):
return int(self.data_table[column_name][0])
def get_from_table(self, column_name, type):
"""Retrieves information from the data_table class variable and forces it to be a specified type.
Parameters
-----------
column_name : str
Column name under which the data of interest is stored.
type : str
String delineating data type. Should be "str", "float", "int" or "array".
Returns
-----------
data : any type
The data requested from the table cast to the type required.
"""
try:
if type == "str":
return str(self.data_table[column_name][0])
elif type == "float":
return float(self.data_table[column_name][0])
elif type == "int":
return int(self.data_table[column_name][0])
elif type == "array":
return np.array(self.data_table[column_name])
else:
print("Type not recognised.")
except ValueError:
sys.exit("Could not cast column name to type.")


class Observations(DataSchema):
"""This is a SQL join of DiaSource and SSSource which contains all of the
observations of the object.
observations of the object. Inherits from DataSchema.
"""

def __init__(self, ssObjectId, observations_query, service, sql_filename=None):
super().__init__(ssObjectId, observations_query, service, sql_filename)
def __init__(self, ssObjectId, population_location, sql_query, sql_filename=None):
"""Initiates the Observations object.
Parameters
-----------
ssObjectId : str
ssObjectId of the object of interest.
population_location : str
String delineating source of data. Should be "RSP" for Rubin Science Platform, "SQL" for a SQL table,
or "arguments" for arguments.
sql_query : str
SQL query to retrieve data from database.
sql_filename: str, optional
Location of local SQL database, if using.
# This populates each of the variables with a numpy array of the specific column.
# This should probably be moved to a constructor class method.
self.mag = self.get_array_from_table("mag")
self.magErr = self.get_array_from_table("magErr")
self.mjd = self.get_array_from_table("mjd")
self.ra = self.get_array_from_table("ra")
self.dec = self.get_array_from_table("dec")
self.phaseAngle = self.get_array_from_table("phaseAngle")
self.topocentricDist = self.get_array_from_table("topocentricDist")
self.heliocentricDist = self.get_array_from_table("heliocentricDist")
"""

self.ssObjectId = ssObjectId
self.population_location = population_location
self.populate(self.population_location, sql_query, sql_filename)
self.calculate_reduced_mag()

def populate_from_table(self):
"""Populates the Observations object from the data_table class variable created on initialisation."""

self.mag = self.get_from_table("mag", "array")
self.magErr = self.get_from_table("magErr", "array")
self.mjd = self.get_from_table("mjd", "array")
self.ra = self.get_from_table("ra", "array")
self.dec = self.get_from_table("dec", "array")
self.phaseAngle = self.get_from_table("phaseAngle", "array")
self.topocentricDist = self.get_from_table("topocentricDist", "array")
self.heliocentricDist = self.get_from_table("heliocentricDist", "array")

def calculate_reduced_mag(self):
"""
Calculates the reduced magnitude column.
"""
self.reduced_mag = self.mag - 5 * np.log10(self.topocentricDist * self.heliocentricDist)


class MPCORB(DataSchema):
"""Grabs information from MPCORB."""

def __init__(self, ssObjectId, observations_query, service, sql_filename=None):
super().__init__(ssObjectId, observations_query, service, sql_filename)

self.mpcDesignation = self.get_string_from_table("mpcDesignation")
self.mpcNumber = self.get_string_from_table("mpcNumber")
self.mpcH = self.get_float_from_table("mpcH")
self.mpcG = self.get_float_from_table("mpcH")
self.epoch = self.get_float_from_table("epoch")
self.peri = self.get_float_from_table("peri")
self.node = self.get_float_from_table("node")
self.incl = self.get_float_from_table("incl")
self.e = self.get_float_from_table("e")
self.n = self.get_float_from_table("n")
self.q = self.get_float_from_table("q")
self.uncertaintyParameter = self.get_string_from_table("uncertaintyParameter")
self.flags = self.get_string_from_table("flags")
def __init__(self, ssObjectId, population_location, sql_query, sql_filename):
"""Initiates the MPCORB object.
Parameters
-----------
ssObjectId : str
ssObjectId of the object of interest.
population_location : str
String delineating source of data. Should be "RSP" for Rubin Science Platform or "SQL" for a SQL table.
sql_query : str
SQL query to retrieve data from database.
sql_filename: str, optional
Location of local SQL database, if using.
"""

# no mean anomaly, no a in MPCORB table
self.ssObjectId = ssObjectId
self.population_location = population_location
self.populate(self.population_location, sql_query, sql_filename)

def populate_from_table(self):
"""Populates the MPCORB object from the data_table class variable created on initialisation."""

self.mpcDesignation = self.get_from_table("mpcDesignation", "str")
self.mpcNumber = self.get_from_table("mpcNumber", "str")
self.mpcH = self.get_from_table("mpcH", "float")
self.mpcG = self.get_from_table("mpcH", "float")
self.epoch = self.get_from_table("epoch", "float")
self.peri = self.get_from_table("peri", "float")
self.node = self.get_from_table("node", "float")
self.incl = self.get_from_table("incl", "float")
self.e = self.get_from_table("e", "float")
self.n = self.get_from_table("n", "float")
self.q = self.get_from_table("q", "float")
self.uncertaintyParameter = self.get_from_table("uncertaintyParameter", "str")
self.flags = self.get_from_table("flags", "str")


class SSObject(DataSchema):
"""Grabs information from MPCORB."""
"""Grabs information from SSObject."""

def __init__(self, ssObjectId, population_location, sql_query, sql_filename):
"""Initiates the SSObject object.
Parameters
-----------
ssObjectId : str
ssObjectId of the object of interest.
population_location : str
String delineating source of data. Should be "RSP" for Rubin Science Platform or "SQL" for a SQL table.
sql_query : str
SQL query to retrieve data from database.
sql_filename: str, optional
Location of local SQL database, if using.
"""

def __init__(self, ssObjectId, observations_query, service, sql_filename=None):
super().__init__(ssObjectId, observations_query, service, sql_filename)

self.discoverySubmissionDate = self.get_float_from_table("discoverySubmissionDate")
self.firstObservationDate = self.get_float_from_table("firstObservationDate")
self.arc = self.get_float_from_table("arc")
self.numObs = self.get_int_from_table("numObs")
self.r_H = self.get_float_from_table("r_H")
self.r_G12 = self.get_float_from_table("r_G12")
self.r_Herr = self.get_float_from_table("r_Herr")
self.r_G12Err = self.get_float_from_table("r_G12err")
self.r_nData = self.get_int_from_table("r_nData")
self.maxExtendedness = self.get_float_from_table("maxExtendedness")
self.minExtendedness = self.get_float_from_table("minExtendedness")
self.medianExtendedness = self.get_float_from_table("medianExtendedness")
self.ssObjectId = ssObjectId
self.population_location = population_location
self.populate(self.population_location, sql_query, sql_filename)

def populate_from_table(self):
"""Populates the SSObject object from the data_table class variable created on initialisation."""

self.discoverySubmissionDate = self.get_from_table("discoverySubmissionDate", "float")
self.firstObservationDate = self.get_from_table("firstObservationDate", "float")
self.arc = self.get_from_table("arc", "float")
self.numObs = self.get_from_table("numObs", "int")
self.r_H = self.get_from_table("r_H", "float")
self.r_G12 = self.get_from_table("r_G12", "float")
self.r_Herr = self.get_from_table("r_Herr", "float")
self.r_G12Err = self.get_from_table("r_G12err", "float")
self.r_nData = self.get_from_table("r_nData", "int")
self.maxExtendedness = self.get_from_table("maxExtendedness", "float")
self.minExtendedness = self.get_from_table("minExtendedness", "float")
self.medianExtendedness = self.get_from_table("medianExtendedness", "float")

0 comments on commit c605b7b

Please sign in to comment.