diff --git a/README.md b/README.md index 29b8bee..1006e25 100644 --- a/README.md +++ b/README.md @@ -72,6 +72,13 @@ Notes: the Python Project Template documentation on [Sphinx and Python Notebooks](https://lincc-ppt.readthedocs.io/en/latest/practices/sphinx.html#python-notebooks) +## Dev Guide - Adding notebooks to Read The Docs + +- Copy notebook into `docs/notebooks` (N.B. the notebook must have at least one section header and be using the "Python 3 (ipykernel)" kernel, not some conda env kernel that may only be installed locally) +- Update the toctree in the file `docs/notebooks.rst` +- Ensure necessary requirements are declared in `pyproject.toml` and `docs/requirements.txt`. Also, make sure that the notebook being added to the docs is using the python3 (ipykernel) kernel, not some conda env kernel that may only be installed locally +- To update the docs locally, from the `docs` dir run: `python -m sphinx -T -E -b html -d _build/doctrees -D language=en . ../_readthedocs/html` + ## Dev Guide - Updating pyproject.toml If you are adding code that requires a new dependency, this needs to be included in pyproject.toml under the `[project]' section: diff --git a/docs/notebooks.rst b/docs/notebooks.rst index 7f7e544..61e3308 100644 --- a/docs/notebooks.rst +++ b/docs/notebooks.rst @@ -4,3 +4,4 @@ Notebooks .. toctree:: Introducing Jupyter Notebooks + Adler phasecurve models diff --git a/docs/notebooks/adler_phasecurve_example.ipynb b/docs/notebooks/adler_phasecurve_example.ipynb new file mode 100644 index 0000000..6e64763 --- /dev/null +++ b/docs/notebooks/adler_phasecurve_example.ipynb @@ -0,0 +1,359 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "8738000d", + "metadata": {}, + "source": [ + "# Adler phasecurve models\n", + "This notebook demonstrates how Adler implements phasecurve models. An example object with photometric observations is loaded. We can create a phasecurve model object from the SSObject parameters associated with this object. We can also fit a phasecurve model of our choice to the observations." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d591f5d8-9148-46ff-a62b-0f2a29eb806c", + "metadata": {}, + "outputs": [], + "source": [ + "from adler.dataclasses.AdlerPlanetoid import AdlerPlanetoid\n", + "from adler.science.PhaseCurve import PhaseCurve\n", + "\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib.gridspec as gridspec\n", + "import astropy.units as u" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "645efb98-567d-481e-a79c-b1cfdc828726", + "metadata": {}, + "outputs": [], + "source": [ + "# ssObjectId of object to analyse\n", + "ssoid = \"8268570668335894776\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10b36aab-b322-49b8-8ff3-49bef68d7416", + "metadata": {}, + "outputs": [], + "source": [ + "# retrieve the object data via adler\n", + "\n", + "# # here we use an offline SQL database which contains the observations of the sso\n", + "fname = \"../../tests/data/testing_database.db\"\n", + "planetoid = AdlerPlanetoid.construct_from_SQL(ssoid, sql_filename=fname)\n", + "\n", + "# alternatively we can retrieve the object data directly from the RSP\n", + "# planetoid = AdlerPlanetoid.construct_from_RSP(ssoid)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d9a0623d-0dc7-49c1-99dd-a76ef970a3ff", + "metadata": {}, + "outputs": [], + "source": [ + "# inspect the whole object\n", + "planetoid.__dict__" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1d360360-025b-4a77-acf5-325b2f2d1873", + "metadata": {}, + "outputs": [], + "source": [ + "# inspect just the ssObject table\n", + "planetoid.SSObject.__dict__" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "be8f8d63", + "metadata": {}, + "outputs": [], + "source": [ + "# retrieve all observations in the r filter\n", + "obs_r = planetoid.observations_in_filter(\"r\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "da1e483d", + "metadata": {}, + "outputs": [], + "source": [ + "# inspect the fields available in the observations table\n", + "obs_r.__dict__" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9d7dc125-06c1-49ad-8854-17d8c8b6954f", + "metadata": {}, + "outputs": [], + "source": [ + "# plot the observations as a phasecurve\n", + "x_plot = \"phaseAngle\"\n", + "y_plot = \"reduced_mag\"\n", + "\n", + "x = getattr(obs_r, x_plot)\n", + "y = getattr(obs_r, y_plot)\n", + "xerr = obs_r.magErr\n", + "\n", + "fig = plt.figure()\n", + "gs = gridspec.GridSpec(1, 1)\n", + "ax1 = plt.subplot(gs[0, 0])\n", + "\n", + "ax1.errorbar(x, y, xerr, fmt=\"o\")\n", + "\n", + "ax1.invert_yaxis()\n", + "ax1.set_xlabel(x_plot)\n", + "ax1.set_ylabel(y_plot)\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6efe3b5a-09dd-4d5e-9f41-20ea6e1b43df", + "metadata": {}, + "outputs": [], + "source": [ + "# retrieve the phase curve model parameters provided in the ssObject table\n", + "\n", + "sso_r = planetoid.SSObject_in_filter(\"r\")\n", + "\n", + "r_H = sso_r.H\n", + "r_G12 = sso_r.G12\n", + "\n", + "pc = PhaseCurve(H=r_H * u.mag, phase_parameter_1=r_G12, model_name=\"HG12_Pen16\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "80f552f1-8907-4cc9-b57c-2e667eab459c", + "metadata": {}, + "outputs": [], + "source": [ + "# what sbpy model is being used?\n", + "pc.model_function" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "24c1955e-95cd-4d77-ad05-aa5b8d18620a", + "metadata": {}, + "outputs": [], + "source": [ + "# set up an array of phase angles to plot the model\n", + "alpha = np.linspace(0, np.amax(obs_r.phaseAngle)) * u.deg\n", + "alpha" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c3f30fe0-0d89-4ffa-8237-9c71181d44ee", + "metadata": {}, + "outputs": [], + "source": [ + "# calculate the model reduced magnitude over these phase angles\n", + "red_mag = pc.ReducedMag(alpha)\n", + "red_mag" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "04be98a1-e4dc-4216-bcd9-ef777f6053fb", + "metadata": {}, + "outputs": [], + "source": [ + "# plot the observations with the model phase curve\n", + "x_plot = \"phaseAngle\"\n", + "y_plot = \"reduced_mag\"\n", + "\n", + "x = getattr(obs_r, x_plot)\n", + "y = getattr(obs_r, y_plot)\n", + "xerr = obs_r.magErr\n", + "\n", + "fig = plt.figure()\n", + "gs = gridspec.GridSpec(1, 1)\n", + "ax1 = plt.subplot(gs[0, 0])\n", + "\n", + "ax1.errorbar(x, y, xerr, fmt=\"o\")\n", + "\n", + "ax1.plot(alpha.value, red_mag.value)\n", + "\n", + "ax1.invert_yaxis()\n", + "ax1.set_xlabel(x_plot)\n", + "ax1.set_ylabel(y_plot)\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9815543d-6140-4bdb-8bad-8296994723f4", + "metadata": {}, + "outputs": [], + "source": [ + "# plot the observations as a lightcurve\n", + "x_plot = \"midPointMjdTai\"\n", + "y_plot = \"reduced_mag\"\n", + "\n", + "x = getattr(obs_r, x_plot)\n", + "y = getattr(obs_r, y_plot)\n", + "xerr = obs_r.magErr\n", + "\n", + "fig = plt.figure()\n", + "gs = gridspec.GridSpec(1, 1)\n", + "ax1 = plt.subplot(gs[0, 0])\n", + "\n", + "ax1.errorbar(x, y, xerr, fmt=\"o\")\n", + "\n", + "ax1.invert_yaxis()\n", + "ax1.set_xlabel(x_plot)\n", + "ax1.set_ylabel(y_plot)\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "de462b92-3914-4091-b0af-bddd9e9c1ef1", + "metadata": {}, + "outputs": [], + "source": [ + "# do a different phase curve fit to the data\n", + "# adler is able to fit different models, and perform more sophisticated fits" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f92891c9-6ccf-4dac-8887-9545f633ba90", + "metadata": {}, + "outputs": [], + "source": [ + "# create a new PhaseCurve object with a different sbpy model\n", + "pc_fit = PhaseCurve(H=pc.H, model_name=\"HG\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "db24432b-6d05-4ff2-9d98-e52d8c2e4342", + "metadata": {}, + "outputs": [], + "source": [ + "pc_fit.model_function" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9039e2e2-27d9-4d21-b2f6-9504a5b85ce4", + "metadata": {}, + "outputs": [], + "source": [ + "# use adler to fit this new phase curve model to the data\n", + "pc_fit.FitModel(\n", + " phase_angle=obs_r.phaseAngle * u.deg,\n", + " reduced_mag=obs_r.reduced_mag * u.mag,\n", + " mag_err=obs_r.magErr * u.mag,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5d4b7144-ee72-45e0-9606-c40f83c443c6", + "metadata": {}, + "outputs": [], + "source": [ + "# plot the observations with both\n", + "x_plot = \"phaseAngle\"\n", + "y_plot = \"reduced_mag\"\n", + "\n", + "x = getattr(obs_r, x_plot)\n", + "y = getattr(obs_r, y_plot)\n", + "xerr = obs_r.magErr\n", + "\n", + "fig = plt.figure()\n", + "gs = gridspec.GridSpec(1, 1)\n", + "ax1 = plt.subplot(gs[0, 0])\n", + "\n", + "ax1.errorbar(x, y, xerr, fmt=\"o\")\n", + "\n", + "ax1.plot(alpha.value, pc.ReducedMag(alpha).value, label=pc.model_name)\n", + "ax1.plot(alpha.value, pc_fit.ReducedMag(alpha).value, label=pc_fit.model_name)\n", + "\n", + "ax1.invert_yaxis()\n", + "ax1.set_xlabel(x_plot)\n", + "ax1.set_ylabel(y_plot)\n", + "ax1.legend()\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b7f39ed4-8334-4e10-a97c-a9471105225b", + "metadata": {}, + "outputs": [], + "source": [ + "# # now we would add our calculated values back into planetoid\n", + "# planetoid.AdlerSchema.r_H = pc_fit.abs_mag\n", + "# planetoid.AdlerSchema.r_G = pc_fit.phase_param" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1ca4bbfd-1954-469f-8608-40c52838d300", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/requirements.txt b/docs/requirements.txt index 3979f83..a02ca08 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -8,3 +8,6 @@ jupyter astropy sbpy matplotlib +numpy +ipykernel +scipy \ No newline at end of file diff --git a/notebooks/adlerdata_in_and_out.ipynb b/notebooks/adlerdata_in_and_out.ipynb new file mode 100644 index 0000000..22b68c5 --- /dev/null +++ b/notebooks/adlerdata_in_and_out.ipynb @@ -0,0 +1,318 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "55a1b889-7fb4-4d73-a9d0-23ab1bdb4dcb", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import sqlite3\n", + "\n", + "from adler.dataclasses.AdlerData import AdlerData\n", + "from adler.dataclasses.AdlerPlanetoid import AdlerPlanetoid\n", + "from adler.utilities.tests_utilities import get_test_data_filepath" + ] + }, + { + "cell_type": "markdown", + "id": "5264c132-e86d-4415-bfd4-cb1856d2fc33", + "metadata": {}, + "source": [ + "This is a quick notebook demonstrating how Adler's calculated values can be stored and then retrieved for later." + ] + }, + { + "cell_type": "markdown", + "id": "8baa263b-5f45-4f5a-b13e-565a5e2d181b", + "metadata": {}, + "source": [ + "First, let's make our AdlerPlanetoid object. In this case, we're populating it from a testing SQL database." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b04683c9-36db-4320-b6b7-7ef487aaf02e", + "metadata": {}, + "outputs": [], + "source": [ + "ssoid = \"8268570668335894776\"\n", + "test_db_path = get_test_data_filepath(\"testing_database.db\")\n", + "test_planetoid = AdlerPlanetoid.construct_from_SQL(ssoid, test_db_path, filter_list=[\"g\", \"r\"])" + ] + }, + { + "cell_type": "markdown", + "id": "e1de656a-1a10-418a-8646-f0e57d811dc6", + "metadata": {}, + "source": [ + "Now let's make up some pretend Adler calculated values, and populate the AdlerData object stored in AdlerPlanetoid." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b6e1a766-56fb-4682-8466-b42d5aa80ad2", + "metadata": {}, + "outputs": [], + "source": [ + "g_model_1 = {\n", + " \"model_name\": \"model_1\",\n", + " \"phaseAngle_min\": 31.0,\n", + " \"phaseAngle_range\": 32.0,\n", + " \"nobs\": 33,\n", + " \"arc\": 34.0,\n", + " \"H\": 35.0,\n", + " \"H_err\": 36.0,\n", + " \"phase_parameter_1\": 37.0,\n", + " \"phase_parameter_1_err\": 38.0,\n", + "}\n", + "\n", + "r_model_2 = {\n", + " \"model_name\": \"model_2\",\n", + " \"phaseAngle_min\": 41.0,\n", + " \"phaseAngle_range\": 42.0,\n", + " \"nobs\": 43,\n", + " \"arc\": 44.0,\n", + " \"H\": 45.0,\n", + " \"H_err\": 46.0,\n", + " \"phase_parameter_1\": 47.0,\n", + " \"phase_parameter_1_err\": 48.0,\n", + " \"phase_parameter_2\": 49.0,\n", + " \"phase_parameter_2_err\": 50.0,\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "63635480-5b9f-49a4-97f8-a70cce410829", + "metadata": {}, + "outputs": [], + "source": [ + "test_planetoid.AdlerData.populate_phase_parameters(\"g\", **g_model_1)\n", + "test_planetoid.AdlerData.populate_phase_parameters(\"r\", **r_model_2)" + ] + }, + { + "cell_type": "markdown", + "id": "1c5b2ebe-094f-4dbc-b5ef-594d60ec1b28", + "metadata": {}, + "source": [ + "Now we can write these out." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1e795fe1-afa7-4921-91f7-3dfc38240f80", + "metadata": {}, + "outputs": [], + "source": [ + "database_filepath = \"./gen_test_data/example_AdlerData_database.db\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b7637fbc-2b03-46fa-95f7-0e760b54d9e2", + "metadata": {}, + "outputs": [], + "source": [ + "test_planetoid.AdlerData.write_row_to_database(database_filepath)" + ] + }, + { + "cell_type": "markdown", + "id": "4e9eebdd-d844-4261-8521-c04688d3813a", + "metadata": {}, + "source": [ + "We'll use Pandas to look at what we just wrote out." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5bc42334-e6ad-451a-a3f3-17794206c82b", + "metadata": {}, + "outputs": [], + "source": [ + "con = sqlite3.connect(database_filepath)\n", + "adler_data_out = pd.read_sql(\"SELECT * from AdlerData\", con)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b4cb5d43-33b3-4fca-9a9b-52b781de7fb1", + "metadata": {}, + "outputs": [], + "source": [ + "adler_data_out" + ] + }, + { + "cell_type": "markdown", + "id": "3e77e0ab-1310-40f6-9380-499705849960", + "metadata": {}, + "source": [ + "Note that write_row_to_database() method always appends. So:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "810379ff-75bd-4c81-8ad9-66334e6ff9c5", + "metadata": {}, + "outputs": [], + "source": [ + "test_planetoid.AdlerData.write_row_to_database(database_filepath)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9e89559b-9500-4416-9d11-92bec442406a", + "metadata": {}, + "outputs": [], + "source": [ + "con = sqlite3.connect(database_filepath)\n", + "adler_data_out = pd.read_sql(\"SELECT * from AdlerData\", con)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "894623fa-7128-4518-9ca6-1b2a94d175d1", + "metadata": {}, + "outputs": [], + "source": [ + "adler_data_out" + ] + }, + { + "cell_type": "markdown", + "id": "49b89142-765f-4115-a011-d93c7c12737d", + "metadata": {}, + "source": [ + "Now we have added two rows." + ] + }, + { + "cell_type": "markdown", + "id": "65cefda8-ac98-4b04-ad4e-2175fb22b37c", + "metadata": {}, + "source": [ + "So perhaps we have an AdlerPlanetoid object and this time, we want to load in some previously calculated values for comparison. This is extremely easy. We'll do it on the AdlerPlanetoid object we already made." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5604ea13-4a37-44e2-bb2e-59ec2461e805", + "metadata": {}, + "outputs": [], + "source": [ + "test_planetoid.attach_previous_adler_data(database_filepath)" + ] + }, + { + "cell_type": "markdown", + "id": "8139a2c1-aa8c-4a57-86f6-5d3ac5923063", + "metadata": {}, + "source": [ + "This can be more easily accessed and read:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a926d90a-5d7d-4185-9f00-f06a51e06739", + "metadata": {}, + "outputs": [], + "source": [ + "test_planetoid.PreviousAdlerData.print_data()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "45f810b5-4d9f-46d5-8fcc-d6dddc8e6b0e", + "metadata": {}, + "outputs": [], + "source": [ + "test_planetoid.PreviousAdlerData.get_phase_parameters_in_filter(\"g\", \"model_1\").__dict__" + ] + }, + { + "cell_type": "markdown", + "id": "f0455add-ca30-417b-92c6-6cceb6f67363", + "metadata": {}, + "source": [ + "Or, if you don't want to work with an existing AdlerPlanetoid object, you can directly populate an AdlerData object from a database." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6ded7e1d-1016-4eb7-8442-3ff474c0d715", + "metadata": {}, + "outputs": [], + "source": [ + "adler_data_object = AdlerData(ssoid, [\"g\", \"r\"])\n", + "adler_data_object.populate_from_database(database_filepath)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "66a87ed1-3d99-4698-9c1f-5e6888989f0c", + "metadata": {}, + "outputs": [], + "source": [ + "adler_data_object.print_data()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "41dc5ab2-961e-497a-8dad-c8fb35043923", + "metadata": {}, + "outputs": [], + "source": [ + "adler_data_object.get_phase_parameters_in_filter(\"g\", \"model_1\").__dict__" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8a7ada08-dc87-4f5e-b72f-32caa164c210", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/gen_test_data/adler_demo_testing_database.db b/notebooks/gen_test_data/adler_demo_testing_database.db index 1c3cdb5..455b28b 100644 Binary files a/notebooks/gen_test_data/adler_demo_testing_database.db and b/notebooks/gen_test_data/adler_demo_testing_database.db differ diff --git a/notebooks/gen_test_data/example_AdlerData_database.db b/notebooks/gen_test_data/example_AdlerData_database.db new file mode 100644 index 0000000..817257c Binary files /dev/null and b/notebooks/gen_test_data/example_AdlerData_database.db differ diff --git a/pyproject.toml b/pyproject.toml index 7482ed1..7874b80 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,6 +21,7 @@ dependencies = [ "sbpy", "matplotlib", # for plotting "pandas", + "scipy", ] [project.urls] diff --git a/src/adler/__init__.py b/src/adler/__init__.py index 443fa66..6d46c05 100644 --- a/src/adler/__init__.py +++ b/src/adler/__init__.py @@ -1,3 +1,4 @@ from . import dataclasses from . import science from . import utilities +from . import lasair diff --git a/src/adler/dataclasses/AdlerData.py b/src/adler/dataclasses/AdlerData.py index e3231f6..72863d8 100644 --- a/src/adler/dataclasses/AdlerData.py +++ b/src/adler/dataclasses/AdlerData.py @@ -1,6 +1,7 @@ import os import sqlite3 import logging +import re import numpy as np from dataclasses import dataclass, field from datetime import datetime, timezone @@ -15,6 +16,7 @@ "phase_parameter_2", "phase_parameter_2_err", ] +ALL_FILTER_LIST = ["u", "g", "r", "i", "z", "y"] logger = logging.getLogger(__name__) @@ -108,6 +110,73 @@ def populate_phase_parameters(self, filter_name, **kwargs): kwargs.get(model_key), ) + def populate_from_database(self, filepath): + """Populates the AdlerData object with information from the most recent timestamped entry for the ssObjectId in a given database. + + Parameters + ----------- + filepath : path-like object + Filepath with the location of the output SQL database. Note that for now, we assume only one table with all the data. + """ + + con = self._get_database_connection(filepath) + cursor = con.cursor() + sql_query = f"""SELECT * from AdlerData where ssObjectId='{self.ssObjectId}' ORDER BY timestamp DESC LIMIT 1""" + query_result = cursor.execute(sql_query) + + try: + fetched_data_raw = query_result.fetchall()[0] + except IndexError: + logger.error("ValueError: No data found in this database for the supplied ssObjectId.") + raise ValueError("No data found in this database for the supplied ssObjectId.") + + fetched_data = [np.nan if v is None else v for v in fetched_data_raw] # replaces Nones with nans + column_list = self._get_database_columns(con, "AdlerData") + con.close() + + filter_bools = [ + any((column_heading.startswith(filter + "_") for column_heading in column_list)) + for filter in ALL_FILTER_LIST + ] + database_filter_list = [b for a, b in zip(filter_bools, ALL_FILTER_LIST) if a] + + if not all([requested_filter in database_filter_list for requested_filter in self.filter_list]): + logger.error( + "ValueError: Data does not exist for some of the requested filters in this database. Filters in database for this object: {}".format( + database_filter_list + ) + ) + raise ValueError( + "Data does not exist for some of the requested filters in this database. Filters in database for this object: {}".format( + database_filter_list + ) + ) + + for filter_name in self.filter_list: + expected_filter_columns = [filter_name + "_" + filter_key for filter_key in FILTER_DEPENDENT_KEYS] + filter_indices_list = [column_list.index(column_name) for column_name in expected_filter_columns] + filter_values = [fetched_data[a] for a in filter_indices_list] + filter_dependent_info = dict(zip(FILTER_DEPENDENT_KEYS, filter_values)) + + self.populate_phase_parameters(filter_name, **filter_dependent_info) + + r = re.compile("^(" + filter_name + "_).*_H$") + model_column_list = list(filter(r.match, column_list)) + models_in_filter = [model[2:-2] for model in model_column_list] + + for model_name in models_in_filter: + expected_model_columns = [ + filter_name + "_" + model_name + "_" + model_key for model_key in MODEL_DEPENDENT_KEYS + ] + model_indices_list = [ + column_list.index(column_name) for column_name in expected_model_columns + ] + model_values = [fetched_data[a] for a in model_indices_list] + model_dependent_info = dict(zip(MODEL_DEPENDENT_KEYS, model_values)) + model_dependent_info["model_name"] = model_name + + self.populate_phase_parameters(filter_name, **model_dependent_info) + def print_data(self): """Convenience method to clearly print the stored values.""" @@ -224,7 +293,7 @@ def get_phase_parameters_in_filter(self, filter_name, model_name=None): return output_obj - def _get_database_connection(self, filepath): + def _get_database_connection(self, filepath, create_new=False): """Returns the connection to the output SQL database, creating it if it does not exist. Parameters @@ -232,6 +301,9 @@ def _get_database_connection(self, filepath): filepath : path-like object Filepath with the location of the output SQL database. + create_new : Boolean + Whether to create the database if it doesn't already exist. Default is False. + Returns ---------- con : sqlite3 Connection object @@ -242,15 +314,20 @@ def _get_database_connection(self, filepath): database_exists = os.path.isfile( filepath ) # check this FIRST as the next statement creates the db if it doesn't exist - con = sqlite3.connect(filepath) - if not database_exists: # we need to make the table and a couple of starter columns + if not database_exists and create_new: # we need to make the table and a couple of starter columns + con = sqlite3.connect(filepath) cur = con.cursor() cur.execute("CREATE TABLE AdlerData(ssObjectId, timestamp)") + elif not database_exists and not create_new: + logger.error("ValueError: Database cannot be found at given filepath.") + raise ValueError("Database cannot be found at given filepath.") + else: + con = sqlite3.connect(filepath) return con - def _get_database_columns(self, con, table_name): + def _get_database_columns(self, con, tablename="AdlerData"): """Gets a list of the current columns in a given table in a SQL database. Parameters @@ -258,8 +335,8 @@ def _get_database_columns(self, con, table_name): con : sqlite3 Connection object The connection to the output SQL database. - table_name : str - The name of the relevant table in the database. + tablename : str + The name of the relevant table in the database. Default is "AdlerData". Returns @@ -270,7 +347,7 @@ def _get_database_columns(self, con, table_name): """ cur = con.cursor() - cur.execute(f"""SELECT * from {table_name} where 1=0""") + cur.execute(f"""SELECT * from {tablename} where 1=0""") return [d[0] for d in cur.description] def _get_row_data_and_columns(self): @@ -351,7 +428,7 @@ def write_row_to_database(self, filepath, table_name="AdlerData"): """ - con = self._get_database_connection(filepath) + con = self._get_database_connection(filepath, create_new=True) row_data, required_columns = self._get_row_data_and_columns() current_columns = self._get_database_columns(con, table_name) diff --git a/src/adler/dataclasses/AdlerPlanetoid.py b/src/adler/dataclasses/AdlerPlanetoid.py index d1f0084..162af1b 100644 --- a/src/adler/dataclasses/AdlerPlanetoid.py +++ b/src/adler/dataclasses/AdlerPlanetoid.py @@ -29,7 +29,7 @@ def __init__( filter_list : list of str A comma-separated list of the filters of interest. - date_range : list of int + date_range : list of float The minimum and maximum dates of the desired observations. observations_by_filter : list of Observations objects @@ -76,7 +76,7 @@ def construct_from_SQL( filter_list : list of str A comma-separated list of the filters of interest. - date_range : list of int + date_range : list of float The minimum and maximum dates of the desired observations. schema : str or None @@ -117,30 +117,87 @@ def construct_from_SQL( return cls(ssObjectId, filter_list, date_range, observations_by_filter, mpcorb, ssobject, adler_data) @classmethod - def construct_from_JSON(cls, json_filename): - with open(json_filename) as f: - json_dict = json.load(f) + def construct_from_cassandra( + cls, + ssObjectId, + filter_list=["u", "g", "r", "i", "z", "y"], + date_range=[60000.0, 67300.0], + cassandra_hosts=["10.21.3.123"], + ): # pragma: no cover + """Custom constructor which builds the AdlerPlanetoid object and the associated Observations, MPCORB and SSObject objects from + a Cassandra database. Used only for Lasair integration. + + TODO: move method to its own class which inherits from AdlerPlanetoid and move to adler-lasair repo? - observations_dict = {**json_dict["SSSource"], **json_dict["DiaSource"]} + Parameters + ----------- + ssObjectId : str + ssObjectId of the object of interest. - filter_list = [observations_dict["band"]] + filter_list : list of str + A comma-separated list of the filters of interest. - MPCORB_dict = json_dict["MPCORB"] - SSObject_dict = json_dict["SSObject"] + date_range : list of float + The minimum and maximum dates of the desired observations. - ssObjectId = observations_dict["ssObjectId"] + cassandra_hosts : list of str + Location of the Cassandra database - usually an IP address. Default is ["10.21.3.123"]. + + """ + # do not move this import! CassandraFetcher requires the non-mandatory + # cassandra-driver library - if not installed, and this import is at the top, + # test collection will break. + from adler.lasair.cassandra_fetcher import CassandraFetcher + + fetcher = CassandraFetcher(cassandra_hosts=cassandra_hosts) + + MPCORB_dict = fetcher.fetch_MPCORB(ssObjectId) + SSObject_dict = fetcher.fetch_SSObject(ssObjectId, filter_list) + observations_dict = fetcher.fetch_observations(ssObjectId) + + # note that Cassandra doesn't allow filters/joins + # instead we pull all observations for this ID, then filter with Pandas later + observations_table = pd.DataFrame(observations_dict) + observations_table.rename(columns={"decl": "dec"}, inplace=True) + + observations_by_filter = [] + for filter_name in filter_list: + obs_slice = observations_table[ + (observations_table["band"] == filter_name) + & (observations_table["midpointmjdtai"].between(date_range[0], date_range[1])) + ] + + if len(obs_slice) == 0: + logger.warning( + "No observations found in {} filter for this object. Skipping this filter.".format( + filter_name + ) + ) + else: + observations = Observations.construct_from_data_table(ssObjectId, filter_name, obs_slice) + observations_by_filter.append(observations) + + if len(observations_by_filter) == 0: + logger.error( + "No observations found for this object in the given filter(s). Check SSOID and try again." + ) + raise Exception( + "No observations found for this object in the given filter(s). Check SSOID and try again." + ) + + if len(filter_list) > len(observations_by_filter): + logger.info( + "Not all specified filters have observations. Recalculating filter list based on past observations." + ) + filter_list = [obs_object.filter_name for obs_object in observations_by_filter] + logger.info("New filter list is: {}".format(filter_list)) - observations_by_filter = [ - Observations.construct_from_dictionary(ssObjectId, filter_list[0], observations_dict) - ] mpcorb = MPCORB.construct_from_dictionary(ssObjectId, MPCORB_dict) ssobject = SSObject.construct_from_dictionary(ssObjectId, filter_list, SSObject_dict) adler_data = AdlerData(ssObjectId, filter_list) - return cls( - ssObjectId, filter_list, [np.nan, np.nan], observations_by_filter, mpcorb, ssobject, adler_data - ) + return cls(ssObjectId, filter_list, date_range, observations_by_filter, mpcorb, ssobject, adler_data) @classmethod def construct_from_RSP( @@ -157,7 +214,7 @@ def construct_from_RSP( filter_list : list of str A comma-separated list of the filters of interest. - date_range : list of int + date_range : list of float The minimum and maximum dates of the desired observations. """ @@ -215,7 +272,7 @@ def populate_observations( filter_list : list of str A comma-separated list of the filters of interest. - date_range : list of int + date_range : list of float The minimum and maximum dates of the desired observations. service : pyvo.dal.tap.TAPService object or None @@ -412,3 +469,18 @@ def SSObject_in_filter(self, filter_name): raise ValueError("Filter {} is not in AdlerPlanetoid.filter_list.".format(filter_name)) return self.SSObject.filter_dependent_values[filter_index] + + def attach_previous_adler_data(self, filepath): + """Attaches and returns an AdlerData object containing the most recent AdlerData + for this ssObjectId. + + Parameters + ----------- + filepath : path-like object + Filepath with the location of the output SQL database. + """ + + self.PreviousAdlerData = AdlerData(self.ssObjectId, self.filter_list) + self.PreviousAdlerData.populate_from_database(filepath) + + return self.PreviousAdlerData diff --git a/src/adler/dataclasses/MPCORB.py b/src/adler/dataclasses/MPCORB.py index 844cef9..237df1d 100644 --- a/src/adler/dataclasses/MPCORB.py +++ b/src/adler/dataclasses/MPCORB.py @@ -122,9 +122,27 @@ def construct_from_data_table(cls, ssObjectId, data_table): @classmethod def construct_from_dictionary(cls, ssObjectId, data_dict): + """Initialises the MPCORB object from a dictionary of data. + + Parameters + ----------- + ssObjectId : str + ssObjectId of the object of interest. + + data_dict : dict or dict-like object + Dictionary of data from which attributes shoud be populated. + + Returns + ----------- + MPCORB object + MPCORB object with class attributes populated from data_table. + + """ mpcorb_dict = {"ssObjectId": ssObjectId} for mpcorb_key, mpcorb_type in MPCORB_KEYS.items(): - mpcorb_dict[mpcorb_key] = get_from_dictionary(data_dict, mpcorb_key, mpcorb_type, "MPCORB") + mpcorb_dict[mpcorb_key] = get_from_dictionary( + data_dict, mpcorb_key.casefold(), mpcorb_type, "MPCORB" + ) return cls(**mpcorb_dict) diff --git a/src/adler/dataclasses/Observations.py b/src/adler/dataclasses/Observations.py index 3ea1217..48a4dfc 100644 --- a/src/adler/dataclasses/Observations.py +++ b/src/adler/dataclasses/Observations.py @@ -120,7 +120,7 @@ class Observations: num_obs: int = 0 @classmethod - def construct_from_data_table(cls, ssObjectId, filter_name, data_table): + def construct_from_data_table(cls, ssObjectId, filter_name, data_table, cassandra=False): """Initialises the Observations object from a table of data. Parameters @@ -144,7 +144,12 @@ def construct_from_data_table(cls, ssObjectId, filter_name, data_table): obs_dict = {"ssObjectId": ssObjectId, "filter_name": filter_name, "num_obs": len(data_table)} for obs_key, obs_type in OBSERVATIONS_KEYS.items(): - obs_dict[obs_key] = get_from_table(data_table, obs_key, obs_type, "SSSource/DIASource") + try: + obs_dict[obs_key] = get_from_table(data_table, obs_key, obs_type, "SSSource/DIASource") + except KeyError: # sometimes we have case issues... + obs_dict[obs_key] = get_from_table( + data_table, obs_key.casefold(), obs_type, "SSSource/DIASource" + ) obs_dict["reduced_mag"] = cls.calculate_reduced_mag( cls, obs_dict["mag"], obs_dict["topocentricDist"], obs_dict["heliocentricDist"] @@ -154,6 +159,25 @@ def construct_from_data_table(cls, ssObjectId, filter_name, data_table): @classmethod def construct_from_dictionary(cls, ssObjectId, filter_name, data_dict): + """Initialises the Observations object from a dictionary of data. + + Parameters + ----------- + ssObjectId : str + ssObjectId of the object of interest. + + filter_name : str + String of the filter the observations are taken in, + + data_dict : dict or dict-like object + Dictionary of data from which attributes shoud be populated. + + Returns + ----------- + Observations object + Observations object with class attributes populated from data_dict. + + """ obs_dict = {"ssObjectId": ssObjectId, "filter_name": filter_name, "num_obs": 1} for obs_key, obs_type in OBSERVATIONS_KEYS.items(): diff --git a/src/adler/dataclasses/SSObject.py b/src/adler/dataclasses/SSObject.py index eedc724..c0ba468 100644 --- a/src/adler/dataclasses/SSObject.py +++ b/src/adler/dataclasses/SSObject.py @@ -67,6 +67,25 @@ class SSObject: @classmethod def construct_from_data_table(cls, ssObjectId, filter_list, data_table): + """Initialises the SSObject object from a table of data. + + Parameters + ----------- + ssObjectId : str + ssObjectId of the object of interest. + + filter_list : list of str + A comma-separated list of the filters of interest. + + data_table : table-like object + Table of data from which attributes shoud be populated. + + Returns + ----------- + SSObject object + SSObject object with class attributes populated from data_table. + + """ sso_dict = {"ssObjectId": ssObjectId, "filter_list": filter_list, "filter_dependent_values": []} for sso_key, sso_type in SSO_KEYS.items(): @@ -88,19 +107,40 @@ def construct_from_data_table(cls, ssObjectId, filter_list, data_table): @classmethod def construct_from_dictionary(cls, ssObjectId, filter_list, data_dict): + """Initialises the SSObject object from a dictionary of data. + + Parameters + ----------- + ssObjectId : str + ssObjectId of the object of interest. + + filter_list : list of str + A comma-separated list of the filters of interest. + + data_dict : dict or dict-like object + Ditcionary of data from which attributes shoud be populated. + + Returns + ----------- + SSObject object + SSObject object with class attributes populated from data_dict. + + """ sso_dict = {"ssObjectId": ssObjectId, "filter_list": filter_list, "filter_dependent_values": []} for sso_key, sso_type in SSO_KEYS.items(): - sso_dict[sso_key] = get_from_dictionary(data_dict, sso_key, sso_type, "SSObject") + sso_dict[sso_key] = get_from_dictionary(data_dict, sso_key.casefold(), sso_type, "SSObject") for i, filter_name in enumerate(filter_list): filter_dept_object = FilterDependentSSO( filter_name=filter_name, - H=get_from_dictionary(data_dict, filter_name + "_H", float, "SSObject"), - G12=get_from_dictionary(data_dict, filter_name + "_G12", float, "SSObject"), - Herr=get_from_dictionary(data_dict, filter_name + "_HErr", float, "SSObject"), - G12err=get_from_dictionary(data_dict, filter_name + "_G12Err", float, "SSObject"), - nData=get_from_dictionary(data_dict, filter_name + "_Ndata", float, "SSObject"), + H=get_from_dictionary(data_dict, (filter_name + "_H").casefold(), float, "SSObject"), + G12=get_from_dictionary(data_dict, (filter_name + "_G12").casefold(), float, "SSObject"), + Herr=get_from_dictionary(data_dict, (filter_name + "_HErr").casefold(), float, "SSObject"), + G12err=get_from_dictionary( + data_dict, (filter_name + "_G12Err").casefold(), float, "SSObject" + ), + nData=get_from_dictionary(data_dict, (filter_name + "_Ndata").casefold(), float, "SSObject"), ) sso_dict["filter_dependent_values"].append(filter_dept_object) diff --git a/src/adler/dataclasses/dataclass_utilities.py b/src/adler/dataclasses/dataclass_utilities.py index d82c031..d6dd61e 100644 --- a/src/adler/dataclasses/dataclass_utilities.py +++ b/src/adler/dataclasses/dataclass_utilities.py @@ -109,6 +109,29 @@ def get_from_table(data_table, column_name, data_type, table_name="default"): def get_from_dictionary(data_dict, key_name, data_type, table_name="default"): + """Retrieves information from a dictionary and forces it to be a specified type. + + Parameters + ----------- + data_dict : dict or dict-like object + Dictionary containing columns of interest. + + key_name : str + Key name under which the data of interest is stored. + + data_type : type + Data type. Should be int, float, str or np.ndarray. + + table_name : str + Name of the table or dictionary. This is mostly for more informative error messages. Default="default". + + Returns + ----------- + data_val : str, float, int or nd.array + The data requested from the dictionary cast to the type required. + + """ + try: if data_type == str: data_val = str(data_dict[key_name]) @@ -124,14 +147,15 @@ def get_from_dictionary(data_dict, key_name, data_type, table_name="default"): except ValueError: print("error message") - data_val = check_value_populated(data_val, data_type, key_name, "JSON") + data_val = check_value_populated(data_val, data_type, key_name, "dictionary") return data_val def check_value_populated(data_val, data_type, column_name, table_name): """Checks to see if data_val populated properly and prints a helpful warning if it didn't. - Usually this will trigger because the RSP hasn't populated that field for this particular object. + Usually this will trigger because the RSP or Cassandra database hasn't populated that + field for this particular object. Parameters ----------- diff --git a/src/adler/lasair/cassandra_fetcher.py b/src/adler/lasair/cassandra_fetcher.py new file mode 100644 index 0000000..ba6e157 --- /dev/null +++ b/src/adler/lasair/cassandra_fetcher.py @@ -0,0 +1,160 @@ +import json +import sys +from cassandra.cluster import Cluster, ConsistencyLevel +from cassandra.query import dict_factory, SimpleStatement + + +class CassandraFetcher: # pragma: no cover + """Class to fetch data from a Cassandra database, used for Lasair integration. + + TODO: move to the lasair-adler repo. + + Attributes + ----------- + cassandra_hosts : list of str + Location of the Cassandra database - usually an IP address. Default is ["10.21.3.123"]. + + """ + + def __init__(self, cassandra_hosts): + self.cluster = Cluster(cassandra_hosts) + self.session = self.cluster.connect() + # Set the row_factory to dict_factory, otherwise + # the data returned will be in the form of object properties. + self.session.row_factory = dict_factory + self.session.set_keyspace("adler") + + def fetch_SSObject(self, ssObjectId, filter_list): + """Fetches the metadata from the SSObject table of a Cassandra database as a dictionary. + + Parameters + ----------- + ssObjectId : str + ssObjectId of the object of interest. + + filter_list : list of str + A comma-separated list of the filters of interest. + + Returns + ----------- + dict + A dictionary of metadata for the object of interest in the filters + of interest. + + """ + + filter_dependent_columns = "" + for filter_name in filter_list: + filter_string = "{}_H, {}_G12, {}_HErr, {}_G12Err, {}_Ndata, ".format( + filter_name, filter_name, filter_name, filter_name, filter_name + ) + + filter_dependent_columns += filter_string + + obj = {} + + SSObject_sql_query = f""" + SELECT + discoverySubmissionDate, firstObservationDate, arc, numObs, + {filter_dependent_columns} + maxExtendedness, minExtendedness, medianExtendedness + FROM + ssobjects + WHERE + ssObjectId = {ssObjectId} + """ + + ret = self.session.execute(SSObject_sql_query) + + for ssObject in ret: + obj = ssObject + + return obj + + def fetch_MPCORB(self, ssObjectId): + """Fetches the metadata from the MPCORB table of a Cassandra database as a dictionary. + + Parameters + ----------- + ssObjectId : str + ssObjectId of the object of interest. + + Returns + ----------- + dict + A dictionary of metadata for the object of interest. + + """ + + obj = {} + + MPCORB_sql_query = f""" + SELECT + ssObjectId, mpcDesignation, fullDesignation, mpcNumber, mpcH, mpcG, epoch, tperi, peri, node, incl, e, n, q, + uncertaintyParameter, flags + FROM + mpcorbs + WHERE + ssObjectId = {ssObjectId} + """ + + ret = self.session.execute(MPCORB_sql_query) + + for MPCORB in ret: + obj = MPCORB + + return obj + + def fetch_observations(self, ssObjectId): + """Fetches the source observations from the DIASource and SSSource tables as a dictionary. + Note that it will retrieve ALL observations for the object regardless of filter and data range, + so any filtering must be performed later. This is due to restrictions on queries to Cassandra. + + Parameters + ----------- + ssObjectId : str + ssObjectId of the object of interest. + + Returns + ----------- + dict + A dictionary of metadata for the object of interest in the filters + of interest. + + """ + + sourceDict = {} + + dia_query = f""" + SELECT + diasourceid, band, mag, magErr, midPointMjdTai, ra, decl + FROM + diasources + WHERE + ssObjectId = {ssObjectId} + """ + ret = self.session.execute(dia_query) + + n = 0 + for diaSource in ret: + sourceDict[diaSource["diasourceid"]] = diaSource + n += 1 + + ss_query = f"""SELECT diasourceid, phaseAngle, topocentricDist, heliocentricDist, heliocentricX, heliocentricY, heliocentricZ, + topocentricX, topocentricY, topocentricZ, eclipticLambda, eclipticBeta + FROM sssources + WHERE + ssObjectId = {ssObjectId} + """ + ret = self.session.execute(ss_query) + + n = 0 + for ssSource in ret: + n += 1 + sourceDict[ssSource["diasourceid"]].update(ssSource) + + sources = [] + for k, v in sourceDict.items(): + sources.append(v) + + return sources diff --git a/src/adler/utilities/AdlerCLIArguments.py b/src/adler/utilities/AdlerCLIArguments.py index 3def88d..4b6e7bd 100644 --- a/src/adler/utilities/AdlerCLIArguments.py +++ b/src/adler/utilities/AdlerCLIArguments.py @@ -29,6 +29,8 @@ def __init__(self, args): self.validate_arguments() def validate_arguments(self): + """Checks and validates the command-line arguments.""" + self._validate_filter_list() self._validate_date_range() self._validate_outpath() @@ -46,6 +48,7 @@ def validate_arguments(self): self._validate_colour_list() def _validate_filter_list(self): + """Validation checks for the filter_list command-line argument.""" expected_filters = ["u", "g", "r", "i", "z", "y"] if not set(self.filter_list).issubset(expected_filters): @@ -77,6 +80,9 @@ def _validate_colour_list(self): raise ValueError(err_msg) def _validate_ssObjectId(self): + """ + Validation checks for the ssObjectId command-line argument. + """ try: int(self.ssObjectId) except ValueError: @@ -84,6 +90,9 @@ def _validate_ssObjectId(self): raise ValueError("--ssObjectId command-line argument does not appear to be a valid ssObjectId.") def _validate_date_range(self): + """ + Validation checks for the date_range command-line argument. + """ for d in self.date_range: try: float(d) @@ -104,6 +113,9 @@ def _validate_date_range(self): ) def _validate_outpath(self): + """ + Validation checks for the outpath command-line argument. + """ # make it an absolute path if it's relative! self.outpath = os.path.abspath(self.outpath) @@ -112,6 +124,9 @@ def _validate_outpath(self): raise ValueError("The output path for the command-line argument --outpath cannot be found.") def _validate_ssObjectId_list(self): + """ + Validation checks for the ssObjectId_list command-line argument. + """ self.ssObjectId_list = os.path.abspath(self.ssObjectId_list) if not os.path.exists(self.ssObjectId_list): @@ -123,6 +138,9 @@ def _validate_ssObjectId_list(self): ) def _validate_sql_filename(self): + """ + Validation checks for the sel_filename command-line argument. + """ self.sql_filename = os.path.abspath(self.sql_filename) if not os.path.exists(self.sql_filename): diff --git a/tests/adler/dataclasses/test_AdlerData.py b/tests/adler/dataclasses/test_AdlerData.py index c00c656..6c1e1e2 100644 --- a/tests/adler/dataclasses/test_AdlerData.py +++ b/tests/adler/dataclasses/test_AdlerData.py @@ -11,7 +11,7 @@ # setting up the AdlerData object to be used for testing -test_object = AdlerData(666, ["u", "g", "r"]) +test_object = AdlerData("8268570668335894776", ["u", "g", "r"]) u_model_1 = { "model_name": "model_1", @@ -249,4 +249,38 @@ def test_write_row_to_database(tmp_path): # note that because I'm using Pandas there's some small dtype and np.nan/None stuff to clear up # but this makes for a quick streamlined test anyway expected_data = expected_data.replace({np.nan: None}) + expected_data = expected_data.astype({"ssObjectId": str}) pd.testing.assert_frame_equal(expected_data, written_data, check_dtype=False) + + +def test_read_row_from_database(): + # NOTE: the test database here has two rows, one with an earlier timestamp and different data + # So this test also ensures that only the most recent data for the object is pulled. + + db_location = get_test_data_filepath("test_AdlerData_database.db") + + new_object = AdlerData("8268570668335894776", ["u", "g", "r"]) + new_object.populate_from_database(db_location) + + assert new_object.__dict__ == test_object.__dict__ + + with pytest.raises(ValueError) as error_info_1: + empty_data = AdlerData("pretend_object", ["u", "g", "r"]) + empty_data.populate_from_database(db_location) + + assert error_info_1.value.args[0] == "No data found in this database for the supplied ssObjectId." + + with pytest.raises(ValueError) as error_info_2: + bad_filter = AdlerData("8268570668335894776", ["u", "g", "h"]) + bad_filter.populate_from_database(db_location) + + assert ( + error_info_2.value.args[0] + == "Data does not exist for some of the requested filters in this database. Filters in database for this object: ['u', 'g', 'r']" + ) + + with pytest.raises(ValueError) as error_info_3: + bad_filter = AdlerData("8268570668335894776", ["u", "g", "h"]) + bad_filter.populate_from_database("./dummy_location.db") + + assert error_info_3.value.args[0] == "Database cannot be found at given filepath." diff --git a/tests/adler/dataclasses/test_AdlerPlanetoid.py b/tests/adler/dataclasses/test_AdlerPlanetoid.py index 5e956d8..992925a 100644 --- a/tests/adler/dataclasses/test_AdlerPlanetoid.py +++ b/tests/adler/dataclasses/test_AdlerPlanetoid.py @@ -155,3 +155,30 @@ def test_failed_SQL_queries(): assert ( error_info_2.value.args[0] == "No SSObject data for this object could be found for this SSObjectId." ) + + +def test_attach_previous_adlerdata(): + test_planetoid = AdlerPlanetoid.construct_from_SQL(ssoid, test_db_path, filter_list=["g", "r"]) + + db_location = get_test_data_filepath("test_AdlerData_database.db") + + test_planetoid.attach_previous_adler_data(db_location) + + test_output = test_planetoid.PreviousAdlerData.get_phase_parameters_in_filter("g", "model_1") + + expected_output = { + "filter_name": "g", + "phaseAngle_min": 31.0, + "phaseAngle_range": 32.0, + "nobs": 33, + "arc": 34.0, + "model_name": "model_1", + "H": 35.0, + "H_err": 36.0, + "phase_parameter_1": 37.0, + "phase_parameter_1_err": 38.0, + "phase_parameter_2": np.nan, + "phase_parameter_2_err": np.nan, + } + + assert test_output.__dict__ == expected_output diff --git a/tests/data/df_6098332225018_g_r_app_3_N_ref_1.csv b/tests/data/df_6098332225018_g_r_app_3_N_ref_1.csv index 2e1a24f..519760c 100644 --- a/tests/data/df_6098332225018_g_r_app_3_N_ref_1.csv +++ b/tests/data/df_6098332225018_g_r_app_3_N_ref_1.csv @@ -1,3 +1,4 @@ +<<<<<<< HEAD ,diaSourceId,midPointMjdTai,g-r,delta_t_g-r,g-rErr,reduced_mag_r,midPointMjdTai1_r,midPointMjdTai2_r,reduced_mag,magErr 0,5.806484744413722e+18,61485.3588,0.6294446491054693,7.961480000005395,0.13199999928474426,17.19602428734781,61477.39732,61477.39732,17.82546893645328,0.13199999928474426 1,-1.3708475149938232e+18,61500.36601,0.4263135941635241,3.041850000001432,0.052000001072883606,17.12261631655302,61497.32416,61497.32416,17.548929910716545,0.052000001072883606 @@ -14,3 +15,21 @@ 12,8.768036112299468e+18,61590.24357,0.5062322871868119,0.26526999999623513,0.02199999988079071,16.585115951758564,61589.9783,61589.9783,17.091348238945375,0.02199999988079071 13,4.1433705206815264e+17,61616.20243,0.5004836109665014,0.19327999999950407,0.032999999821186066,16.94770017875798,61616.00915,61616.00915,17.44818378972448,0.032999999821186066 14,-8.355039433456489e+18,61648.07405,0.4917527634000258,0.024980000001960434,0.054999999701976776,17.135632936586298,61648.04907,61648.04907,17.627385699986323,0.054999999701976776 +======= +,midPointMjdTai,reduced_mag,magErr,g-r,delta_t_g-r,g-rErr,reduced_mag_r,midPointMjdTai1_r,midPointMjdTai2_r +0,61485.3588,17.82546893645328,0.13199999928474426,0.6294446491054693,7.961480000005395,0.13199999928474426,17.19602428734781,61477.39732,61477.39732 +1,61500.36601,17.548929910716545,0.052000001072883606,0.4263135941635241,3.041850000001432,0.052000001072883606,17.12261631655302,61497.32416,61497.32416 +2,61504.33885,17.545916408017177,0.05700000002980232,0.42122628677132923,0.024510000002919696,0.05700000002980232,17.124690121245848,61504.31434,61504.31434 +3,61524.2819,17.60109262744087,0.06199999898672104,0.4764025061950221,19.967560000004596,0.06199999898672104,17.124690121245848,61504.31434,61504.31434 +4,61525.31814,17.530157869823057,0.04699999839067459,0.4042173365957282,0.024610000000393484,0.04699999839067459,17.12594053322733,61525.29353,61525.29353 +5,61536.27685,17.474481419144603,0.039000000804662704,0.3952621411742143,4.973070000000007,0.039000000804662704,17.079219277970388,61531.30378,61531.30378 +6,61556.37692,17.262436542715754,0.03200000151991844,0.39311468820022455,0.024060000003373716,0.03200000151991844,16.86932185451553,61556.35286,61556.35286 +7,61558.20527,17.330018817677058,0.03200000151991844,0.46069696316152786,1.8524099999995087,0.03200000151991844,16.86932185451553,61556.35286,61556.35286 +8,61558.20674,17.302025752576462,0.029999999329447746,0.43270389806093235,1.8538800000023912,0.029999999329447746,16.86932185451553,61556.35286,61556.35286 +9,61589.25295,17.086183244116974,0.028999999165534973,0.5580614970225177,1.968010000004142,0.028999999165534973,16.528121747094456,61587.28494,61587.28494 +10,61589.25519,17.035176049202192,0.032999999821186066,0.5070543021077363,1.9702500000057626,0.032999999821186066,16.528121747094456,61587.28494,61587.28494 +11,61590.002,17.013050114521022,0.04399999976158142,0.4279341627624582,0.023699999997916166,0.04399999976158142,16.585115951758564,61589.9783,61589.9783 +12,61590.24357,17.091348238945375,0.02199999988079071,0.5062322871868119,0.26526999999623513,0.02199999988079071,16.585115951758564,61589.9783,61589.9783 +13,61616.20243,17.44818378972448,0.032999999821186066,0.5004836109665014,0.19327999999950407,0.032999999821186066,16.94770017875798,61616.00915,61616.00915 +14,61648.07405,17.627385699986323,0.054999999701976776,0.4917527634000258,0.024980000001960434,0.054999999701976776,17.135632936586298,61648.04907,61648.04907 +>>>>>>> main diff --git a/tests/data/df_6098332225018_g_r_app_3_N_ref_3.csv b/tests/data/df_6098332225018_g_r_app_3_N_ref_3.csv index cd3f82d..10797ff 100644 --- a/tests/data/df_6098332225018_g_r_app_3_N_ref_3.csv +++ b/tests/data/df_6098332225018_g_r_app_3_N_ref_3.csv @@ -1,3 +1,4 @@ +<<<<<<< HEAD ,diaSourceId,midPointMjdTai,g-r,delta_t_g-r,g-rErr,reduced_mag_r,midPointMjdTai1_r,midPointMjdTai2_r,reduced_mag,magErr 0,5.806484744413722e+18,61485.3588,0.6646031133749091,7.961480000005395,0.1387787184136571,17.16086582307837,61469.38552,61477.39732,17.82546893645328,0.13199999928474426 1,-1.3708475149938232e+18,61500.36601,0.3998204721333032,3.041850000001432,0.061730424176583504,17.149109438583242,61477.39732,61497.32416,17.548929910716545,0.052000001072883606 @@ -14,3 +15,21 @@ 12,8.768036112299468e+18,61590.24357,0.4915693990668686,0.26526999999623513,0.03569772593630112,16.599778839878507,61589.277,61589.9783,17.091348238945375,0.02199999988079071 13,4.1433705206815264e+17,61616.20243,0.5299196193560611,0.19327999999950407,0.041117958024899405,16.91826417036842,61610.20336,61616.00915,17.44818378972448,0.032999999821186066 14,-8.355039433456489e+18,61648.07405,0.5006338385771478,0.024980000001960434,0.08026750494410353,17.126751861409176,61620.16575,61648.04907,17.627385699986323,0.054999999701976776 +======= +,midPointMjdTai,reduced_mag,magErr,g-r,delta_t_g-r,g-rErr,reduced_mag_r,midPointMjdTai1_r,midPointMjdTai2_r +0,61485.3588,17.82546893645328,0.13199999928474426,0.6646031133749091,7.961480000005395,0.1387787184136571,17.16086582307837,61469.38552,61477.39732 +1,61500.36601,17.548929910716545,0.052000001072883606,0.3998204721333032,3.041850000001432,0.061730424176583504,17.149109438583242,61477.39732,61497.32416 +2,61504.33885,17.545916408017177,0.05700000002980232,0.41306100220004893,0.024510000002919696,0.06719475757019573,17.13285540581713,61503.35033,61504.31434 +3,61524.2819,17.60109262744087,0.06199999898672104,0.4682372216237418,19.967560000004596,0.07148521046953224,17.13285540581713,61503.35033,61504.31434 +4,61525.31814,17.530157869823057,0.04699999839067459,0.41017516413599964,0.024610000000393484,0.047603929903510755,17.119982705687057,61504.31434,61525.29353 +5,61536.27685,17.474481419144603,0.039000000804662704,0.3696556612160329,4.973070000000007,0.0435304535193636,17.10482575792857,61524.3074,61531.30378 +6,61556.37692,17.262436542715754,0.03200000151991844,0.23760932081133745,0.024060000003373716,0.11609811521344783,17.024827221904417,61525.29353,61556.35286 +7,61558.20527,17.330018817677058,0.03200000151991844,0.30519159577264077,1.8524099999995087,0.11609811521344783,17.024827221904417,61525.29353,61556.35286 +8,61558.20674,17.302025752576462,0.029999999329447746,0.27719853067204525,1.8538800000023912,0.11556284964731135,17.024827221904417,61525.29353,61556.35286 +9,61589.25295,17.086183244116974,0.028999999165534973,0.483080272053364,1.968010000004142,0.09278157251016264,16.60310297206361,61562.31723,61587.28494 +10,61589.25519,17.035176049202192,0.032999999821186066,0.43207307713858256,1.9702500000057626,0.09410855558372905,16.60310297206361,61562.31723,61587.28494 +11,61590.002,17.013050114521022,0.04399999976158142,0.4132712746425149,0.023699999997916166,0.05221424730174361,16.599778839878507,61589.277,61589.9783 +12,61590.24357,17.091348238945375,0.02199999988079071,0.4915693990668686,0.26526999999623513,0.03569772593630112,16.599778839878507,61589.277,61589.9783 +13,61616.20243,17.44818378972448,0.032999999821186066,0.5299196193560611,0.19327999999950407,0.041117958024899405,16.91826417036842,61610.20336,61616.00915 +14,61648.07405,17.627385699986323,0.054999999701976776,0.5006338385771478,0.024980000001960434,0.08026750494410353,17.126751861409176,61620.16575,61648.04907 +>>>>>>> main diff --git a/tests/data/df_6098332225018_g_r_app_3_N_ref_5.csv b/tests/data/df_6098332225018_g_r_app_3_N_ref_5.csv index 6e4384d..fbef51a 100644 --- a/tests/data/df_6098332225018_g_r_app_3_N_ref_5.csv +++ b/tests/data/df_6098332225018_g_r_app_3_N_ref_5.csv @@ -1,3 +1,4 @@ +<<<<<<< HEAD ,diaSourceId,midPointMjdTai,g-r,delta_t_g-r,g-rErr,reduced_mag_r,midPointMjdTai1_r,midPointMjdTai2_r,reduced_mag,magErr 0,5.806484744413722e+18,61485.3588,0.6646031133749091,7.961480000005395,0.1387787184136571,17.16086582307837,61469.38552,61477.39732,17.82546893645328,0.13199999928474426 1,-1.3708475149938232e+18,61500.36601,0.40214961118913806,3.041850000001432,0.06408282905927162,17.146780299527407,61469.38552,61497.32416,17.548929910716545,0.052000001072883606 @@ -14,3 +15,21 @@ 12,8.768036112299468e+18,61590.24357,0.5149825690876959,0.26526999999623513,0.04300417290467258,16.57636566985768,61586.24199,61589.9783,17.091348238945375,0.02199999988079071 13,4.1433705206815264e+17,61616.20243,0.6651807942177115,0.19327999999950407,0.17001002587811542,16.78300299550677,61589.27924,61616.00915,17.44818378972448,0.032999999821186066 14,-8.355039433456489e+18,61648.07405,0.5616368620560408,0.024980000001960434,0.10406682109366068,17.065748837930283,61616.22684,61648.04907,17.627385699986323,0.054999999701976776 +======= +,midPointMjdTai,reduced_mag,magErr,g-r,delta_t_g-r,g-rErr,reduced_mag_r,midPointMjdTai1_r,midPointMjdTai2_r +0,61485.3588,17.82546893645328,0.13199999928474426,0.6646031133749091,7.961480000005395,0.1387787184136571,17.16086582307837,61469.38552,61477.39732 +1,61500.36601,17.548929910716545,0.052000001072883606,0.40214961118913806,3.041850000001432,0.06408282905927162,17.146780299527407,61469.38552,61497.32416 +2,61504.33885,17.545916408017177,0.05700000002980232,0.411249516593152,0.024510000002919696,0.06403779113904624,17.134666891424025,61497.32416,61504.31434 +3,61524.2819,17.60109262744087,0.06199999898672104,0.4664257360168449,19.967560000004596,0.0685261888982895,17.134666891424025,61497.32416,61504.31434 +4,61525.31814,17.530157869823057,0.04699999839067459,0.4033930271697166,0.024610000000393484,0.05524451285749118,17.12676484265334,61503.35033,61525.29353 +5,61536.27685,17.474481419144603,0.039000000804662704,0.3506598671390968,4.973070000000007,0.05091030826493725,17.123821552005506,61503.35078,61531.30378 +6,61556.37692,17.262436542715754,0.03200000151991844,0.20073869280633616,0.024060000003373716,0.10276020693938347,17.061697849909418,61504.31434,61556.35286 +7,61558.20527,17.330018817677058,0.03200000151991844,0.2683209677676395,1.8524099999995087,0.10276020693938347,17.061697849909418,61504.31434,61556.35286 +8,61558.20674,17.302025752576462,0.029999999329447746,0.24032790266704396,1.8538800000023912,0.10215507815432866,17.061697849909418,61504.31434,61556.35286 +9,61589.25295,17.086183244116974,0.028999999165534973,0.40181661190363016,1.968010000004142,0.12432445391335133,16.684366632213344,61558.23271,61587.28494 +10,61589.25519,17.035176049202192,0.032999999821186066,0.3508094169888487,1.9702500000057626,0.12531787533089708,16.684366632213344,61558.23271,61587.28494 +11,61590.002,17.013050114521022,0.04399999976158142,0.4366844446633422,0.023699999997916166,0.057457452706149006,16.57636566985768,61586.24199,61589.9783 +12,61590.24357,17.091348238945375,0.02199999988079071,0.5149825690876959,0.26526999999623513,0.04300417290467258,16.57636566985768,61586.24199,61589.9783 +13,61616.20243,17.44818378972448,0.032999999821186066,0.6651807942177115,0.19327999999950407,0.17001002587811542,16.78300299550677,61589.27924,61616.00915 +14,61648.07405,17.627385699986323,0.054999999701976776,0.5616368620560408,0.024980000001960434,0.10406682109366068,17.065748837930283,61616.22684,61648.04907 +>>>>>>> main diff --git a/tests/data/test_AdlerData_database.db b/tests/data/test_AdlerData_database.db new file mode 100644 index 0000000..31bd8ef Binary files /dev/null and b/tests/data/test_AdlerData_database.db differ diff --git a/tests/data/test_SQL_database_table.csv b/tests/data/test_SQL_database_table.csv index 790d23c..6c34fe1 100644 --- a/tests/data/test_SQL_database_table.csv +++ b/tests/data/test_SQL_database_table.csv @@ -1,2 +1,2 @@ ssObjectId,timestamp,u_phaseAngle_min,u_phaseAngle_range,u_nobs,u_arc,u_model_1_H,u_model_1_H_err,u_model_1_phase_parameter_1,u_model_1_phase_parameter_1_err,u_model_1_phase_parameter_2,u_model_1_phase_parameter_2_err,u_model_2_H,u_model_2_H_err,u_model_2_phase_parameter_1,u_model_2_phase_parameter_1_err,u_model_2_phase_parameter_2,u_model_2_phase_parameter_2_err,g_phaseAngle_min,g_phaseAngle_range,g_nobs,g_arc,g_model_1_H,g_model_1_H_err,g_model_1_phase_parameter_1,g_model_1_phase_parameter_1_err,g_model_1_phase_parameter_2,g_model_1_phase_parameter_2_err,r_phaseAngle_min,r_phaseAngle_range,r_nobs,r_arc,r_model_2_H,r_model_2_H_err,r_model_2_phase_parameter_1,r_model_2_phase_parameter_1_err,r_model_2_phase_parameter_2,r_model_2_phase_parameter_2_err -666,2024-04-18 13:32:07.096776+00:00,11.0,12.0,13,14.0,15.0,16.0,17.0,18.0,,,25.0,26.0,27.0,28.0,29.0,30.0,31.0,32.0,33,34.0,35.0,36.0,37.0,38.0,,,41.0,42.0,43,44.0,45.0,46.0,47.0,48.0,49.0,50.0 +8268570668335894776,2024-04-18 13:32:07.096776+00:00,11.0,12.0,13,14.0,15.0,16.0,17.0,18.0,,,25.0,26.0,27.0,28.0,29.0,30.0,31.0,32.0,33,34.0,35.0,36.0,37.0,38.0,,,41.0,42.0,43,44.0,45.0,46.0,47.0,48.0,49.0,50.0