diff --git a/notebooks/adlerdata_in_and_out.ipynb b/notebooks/adlerdata_in_and_out.ipynb new file mode 100644 index 0000000..22b68c5 --- /dev/null +++ b/notebooks/adlerdata_in_and_out.ipynb @@ -0,0 +1,318 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "55a1b889-7fb4-4d73-a9d0-23ab1bdb4dcb", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import sqlite3\n", + "\n", + "from adler.dataclasses.AdlerData import AdlerData\n", + "from adler.dataclasses.AdlerPlanetoid import AdlerPlanetoid\n", + "from adler.utilities.tests_utilities import get_test_data_filepath" + ] + }, + { + "cell_type": "markdown", + "id": "5264c132-e86d-4415-bfd4-cb1856d2fc33", + "metadata": {}, + "source": [ + "This is a quick notebook demonstrating how Adler's calculated values can be stored and then retrieved for later." + ] + }, + { + "cell_type": "markdown", + "id": "8baa263b-5f45-4f5a-b13e-565a5e2d181b", + "metadata": {}, + "source": [ + "First, let's make our AdlerPlanetoid object. In this case, we're populating it from a testing SQL database." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b04683c9-36db-4320-b6b7-7ef487aaf02e", + "metadata": {}, + "outputs": [], + "source": [ + "ssoid = \"8268570668335894776\"\n", + "test_db_path = get_test_data_filepath(\"testing_database.db\")\n", + "test_planetoid = AdlerPlanetoid.construct_from_SQL(ssoid, test_db_path, filter_list=[\"g\", \"r\"])" + ] + }, + { + "cell_type": "markdown", + "id": "e1de656a-1a10-418a-8646-f0e57d811dc6", + "metadata": {}, + "source": [ + "Now let's make up some pretend Adler calculated values, and populate the AdlerData object stored in AdlerPlanetoid." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b6e1a766-56fb-4682-8466-b42d5aa80ad2", + "metadata": {}, + "outputs": [], + "source": [ + "g_model_1 = {\n", + " \"model_name\": \"model_1\",\n", + " \"phaseAngle_min\": 31.0,\n", + " \"phaseAngle_range\": 32.0,\n", + " \"nobs\": 33,\n", + " \"arc\": 34.0,\n", + " \"H\": 35.0,\n", + " \"H_err\": 36.0,\n", + " \"phase_parameter_1\": 37.0,\n", + " \"phase_parameter_1_err\": 38.0,\n", + "}\n", + "\n", + "r_model_2 = {\n", + " \"model_name\": \"model_2\",\n", + " \"phaseAngle_min\": 41.0,\n", + " \"phaseAngle_range\": 42.0,\n", + " \"nobs\": 43,\n", + " \"arc\": 44.0,\n", + " \"H\": 45.0,\n", + " \"H_err\": 46.0,\n", + " \"phase_parameter_1\": 47.0,\n", + " \"phase_parameter_1_err\": 48.0,\n", + " \"phase_parameter_2\": 49.0,\n", + " \"phase_parameter_2_err\": 50.0,\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "63635480-5b9f-49a4-97f8-a70cce410829", + "metadata": {}, + "outputs": [], + "source": [ + "test_planetoid.AdlerData.populate_phase_parameters(\"g\", **g_model_1)\n", + "test_planetoid.AdlerData.populate_phase_parameters(\"r\", **r_model_2)" + ] + }, + { + "cell_type": "markdown", + "id": "1c5b2ebe-094f-4dbc-b5ef-594d60ec1b28", + "metadata": {}, + "source": [ + "Now we can write these out." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1e795fe1-afa7-4921-91f7-3dfc38240f80", + "metadata": {}, + "outputs": [], + "source": [ + "database_filepath = \"./gen_test_data/example_AdlerData_database.db\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b7637fbc-2b03-46fa-95f7-0e760b54d9e2", + "metadata": {}, + "outputs": [], + "source": [ + "test_planetoid.AdlerData.write_row_to_database(database_filepath)" + ] + }, + { + "cell_type": "markdown", + "id": "4e9eebdd-d844-4261-8521-c04688d3813a", + "metadata": {}, + "source": [ + "We'll use Pandas to look at what we just wrote out." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5bc42334-e6ad-451a-a3f3-17794206c82b", + "metadata": {}, + "outputs": [], + "source": [ + "con = sqlite3.connect(database_filepath)\n", + "adler_data_out = pd.read_sql(\"SELECT * from AdlerData\", con)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b4cb5d43-33b3-4fca-9a9b-52b781de7fb1", + "metadata": {}, + "outputs": [], + "source": [ + "adler_data_out" + ] + }, + { + "cell_type": "markdown", + "id": "3e77e0ab-1310-40f6-9380-499705849960", + "metadata": {}, + "source": [ + "Note that write_row_to_database() method always appends. So:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "810379ff-75bd-4c81-8ad9-66334e6ff9c5", + "metadata": {}, + "outputs": [], + "source": [ + "test_planetoid.AdlerData.write_row_to_database(database_filepath)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9e89559b-9500-4416-9d11-92bec442406a", + "metadata": {}, + "outputs": [], + "source": [ + "con = sqlite3.connect(database_filepath)\n", + "adler_data_out = pd.read_sql(\"SELECT * from AdlerData\", con)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "894623fa-7128-4518-9ca6-1b2a94d175d1", + "metadata": {}, + "outputs": [], + "source": [ + "adler_data_out" + ] + }, + { + "cell_type": "markdown", + "id": "49b89142-765f-4115-a011-d93c7c12737d", + "metadata": {}, + "source": [ + "Now we have added two rows." + ] + }, + { + "cell_type": "markdown", + "id": "65cefda8-ac98-4b04-ad4e-2175fb22b37c", + "metadata": {}, + "source": [ + "So perhaps we have an AdlerPlanetoid object and this time, we want to load in some previously calculated values for comparison. This is extremely easy. We'll do it on the AdlerPlanetoid object we already made." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5604ea13-4a37-44e2-bb2e-59ec2461e805", + "metadata": {}, + "outputs": [], + "source": [ + "test_planetoid.attach_previous_adler_data(database_filepath)" + ] + }, + { + "cell_type": "markdown", + "id": "8139a2c1-aa8c-4a57-86f6-5d3ac5923063", + "metadata": {}, + "source": [ + "This can be more easily accessed and read:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a926d90a-5d7d-4185-9f00-f06a51e06739", + "metadata": {}, + "outputs": [], + "source": [ + "test_planetoid.PreviousAdlerData.print_data()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "45f810b5-4d9f-46d5-8fcc-d6dddc8e6b0e", + "metadata": {}, + "outputs": [], + "source": [ + "test_planetoid.PreviousAdlerData.get_phase_parameters_in_filter(\"g\", \"model_1\").__dict__" + ] + }, + { + "cell_type": "markdown", + "id": "f0455add-ca30-417b-92c6-6cceb6f67363", + "metadata": {}, + "source": [ + "Or, if you don't want to work with an existing AdlerPlanetoid object, you can directly populate an AdlerData object from a database." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6ded7e1d-1016-4eb7-8442-3ff474c0d715", + "metadata": {}, + "outputs": [], + "source": [ + "adler_data_object = AdlerData(ssoid, [\"g\", \"r\"])\n", + "adler_data_object.populate_from_database(database_filepath)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "66a87ed1-3d99-4698-9c1f-5e6888989f0c", + "metadata": {}, + "outputs": [], + "source": [ + "adler_data_object.print_data()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "41dc5ab2-961e-497a-8dad-c8fb35043923", + "metadata": {}, + "outputs": [], + "source": [ + "adler_data_object.get_phase_parameters_in_filter(\"g\", \"model_1\").__dict__" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8a7ada08-dc87-4f5e-b72f-32caa164c210", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/gen_test_data/example_AdlerData_database.db b/notebooks/gen_test_data/example_AdlerData_database.db new file mode 100644 index 0000000..817257c Binary files /dev/null and b/notebooks/gen_test_data/example_AdlerData_database.db differ diff --git a/src/adler/dataclasses/AdlerData.py b/src/adler/dataclasses/AdlerData.py index e3231f6..72863d8 100644 --- a/src/adler/dataclasses/AdlerData.py +++ b/src/adler/dataclasses/AdlerData.py @@ -1,6 +1,7 @@ import os import sqlite3 import logging +import re import numpy as np from dataclasses import dataclass, field from datetime import datetime, timezone @@ -15,6 +16,7 @@ "phase_parameter_2", "phase_parameter_2_err", ] +ALL_FILTER_LIST = ["u", "g", "r", "i", "z", "y"] logger = logging.getLogger(__name__) @@ -108,6 +110,73 @@ def populate_phase_parameters(self, filter_name, **kwargs): kwargs.get(model_key), ) + def populate_from_database(self, filepath): + """Populates the AdlerData object with information from the most recent timestamped entry for the ssObjectId in a given database. + + Parameters + ----------- + filepath : path-like object + Filepath with the location of the output SQL database. Note that for now, we assume only one table with all the data. + """ + + con = self._get_database_connection(filepath) + cursor = con.cursor() + sql_query = f"""SELECT * from AdlerData where ssObjectId='{self.ssObjectId}' ORDER BY timestamp DESC LIMIT 1""" + query_result = cursor.execute(sql_query) + + try: + fetched_data_raw = query_result.fetchall()[0] + except IndexError: + logger.error("ValueError: No data found in this database for the supplied ssObjectId.") + raise ValueError("No data found in this database for the supplied ssObjectId.") + + fetched_data = [np.nan if v is None else v for v in fetched_data_raw] # replaces Nones with nans + column_list = self._get_database_columns(con, "AdlerData") + con.close() + + filter_bools = [ + any((column_heading.startswith(filter + "_") for column_heading in column_list)) + for filter in ALL_FILTER_LIST + ] + database_filter_list = [b for a, b in zip(filter_bools, ALL_FILTER_LIST) if a] + + if not all([requested_filter in database_filter_list for requested_filter in self.filter_list]): + logger.error( + "ValueError: Data does not exist for some of the requested filters in this database. Filters in database for this object: {}".format( + database_filter_list + ) + ) + raise ValueError( + "Data does not exist for some of the requested filters in this database. Filters in database for this object: {}".format( + database_filter_list + ) + ) + + for filter_name in self.filter_list: + expected_filter_columns = [filter_name + "_" + filter_key for filter_key in FILTER_DEPENDENT_KEYS] + filter_indices_list = [column_list.index(column_name) for column_name in expected_filter_columns] + filter_values = [fetched_data[a] for a in filter_indices_list] + filter_dependent_info = dict(zip(FILTER_DEPENDENT_KEYS, filter_values)) + + self.populate_phase_parameters(filter_name, **filter_dependent_info) + + r = re.compile("^(" + filter_name + "_).*_H$") + model_column_list = list(filter(r.match, column_list)) + models_in_filter = [model[2:-2] for model in model_column_list] + + for model_name in models_in_filter: + expected_model_columns = [ + filter_name + "_" + model_name + "_" + model_key for model_key in MODEL_DEPENDENT_KEYS + ] + model_indices_list = [ + column_list.index(column_name) for column_name in expected_model_columns + ] + model_values = [fetched_data[a] for a in model_indices_list] + model_dependent_info = dict(zip(MODEL_DEPENDENT_KEYS, model_values)) + model_dependent_info["model_name"] = model_name + + self.populate_phase_parameters(filter_name, **model_dependent_info) + def print_data(self): """Convenience method to clearly print the stored values.""" @@ -224,7 +293,7 @@ def get_phase_parameters_in_filter(self, filter_name, model_name=None): return output_obj - def _get_database_connection(self, filepath): + def _get_database_connection(self, filepath, create_new=False): """Returns the connection to the output SQL database, creating it if it does not exist. Parameters @@ -232,6 +301,9 @@ def _get_database_connection(self, filepath): filepath : path-like object Filepath with the location of the output SQL database. + create_new : Boolean + Whether to create the database if it doesn't already exist. Default is False. + Returns ---------- con : sqlite3 Connection object @@ -242,15 +314,20 @@ def _get_database_connection(self, filepath): database_exists = os.path.isfile( filepath ) # check this FIRST as the next statement creates the db if it doesn't exist - con = sqlite3.connect(filepath) - if not database_exists: # we need to make the table and a couple of starter columns + if not database_exists and create_new: # we need to make the table and a couple of starter columns + con = sqlite3.connect(filepath) cur = con.cursor() cur.execute("CREATE TABLE AdlerData(ssObjectId, timestamp)") + elif not database_exists and not create_new: + logger.error("ValueError: Database cannot be found at given filepath.") + raise ValueError("Database cannot be found at given filepath.") + else: + con = sqlite3.connect(filepath) return con - def _get_database_columns(self, con, table_name): + def _get_database_columns(self, con, tablename="AdlerData"): """Gets a list of the current columns in a given table in a SQL database. Parameters @@ -258,8 +335,8 @@ def _get_database_columns(self, con, table_name): con : sqlite3 Connection object The connection to the output SQL database. - table_name : str - The name of the relevant table in the database. + tablename : str + The name of the relevant table in the database. Default is "AdlerData". Returns @@ -270,7 +347,7 @@ def _get_database_columns(self, con, table_name): """ cur = con.cursor() - cur.execute(f"""SELECT * from {table_name} where 1=0""") + cur.execute(f"""SELECT * from {tablename} where 1=0""") return [d[0] for d in cur.description] def _get_row_data_and_columns(self): @@ -351,7 +428,7 @@ def write_row_to_database(self, filepath, table_name="AdlerData"): """ - con = self._get_database_connection(filepath) + con = self._get_database_connection(filepath, create_new=True) row_data, required_columns = self._get_row_data_and_columns() current_columns = self._get_database_columns(con, table_name) diff --git a/src/adler/dataclasses/AdlerPlanetoid.py b/src/adler/dataclasses/AdlerPlanetoid.py index 4592e18..56d4679 100644 --- a/src/adler/dataclasses/AdlerPlanetoid.py +++ b/src/adler/dataclasses/AdlerPlanetoid.py @@ -412,3 +412,18 @@ def SSObject_in_filter(self, filter_name): raise ValueError("Filter {} is not in AdlerPlanetoid.filter_list.".format(filter_name)) return self.SSObject.filter_dependent_values[filter_index] + + def attach_previous_adler_data(self, filepath): + """Attaches and returns an AdlerData object containing the most recent AdlerData + for this ssObjectId. + + Parameters + ----------- + filepath : path-like object + Filepath with the location of the output SQL database. + """ + + self.PreviousAdlerData = AdlerData(self.ssObjectId, self.filter_list) + self.PreviousAdlerData.populate_from_database(filepath) + + return self.PreviousAdlerData diff --git a/tests/adler/dataclasses/test_AdlerData.py b/tests/adler/dataclasses/test_AdlerData.py index c00c656..6c1e1e2 100644 --- a/tests/adler/dataclasses/test_AdlerData.py +++ b/tests/adler/dataclasses/test_AdlerData.py @@ -11,7 +11,7 @@ # setting up the AdlerData object to be used for testing -test_object = AdlerData(666, ["u", "g", "r"]) +test_object = AdlerData("8268570668335894776", ["u", "g", "r"]) u_model_1 = { "model_name": "model_1", @@ -249,4 +249,38 @@ def test_write_row_to_database(tmp_path): # note that because I'm using Pandas there's some small dtype and np.nan/None stuff to clear up # but this makes for a quick streamlined test anyway expected_data = expected_data.replace({np.nan: None}) + expected_data = expected_data.astype({"ssObjectId": str}) pd.testing.assert_frame_equal(expected_data, written_data, check_dtype=False) + + +def test_read_row_from_database(): + # NOTE: the test database here has two rows, one with an earlier timestamp and different data + # So this test also ensures that only the most recent data for the object is pulled. + + db_location = get_test_data_filepath("test_AdlerData_database.db") + + new_object = AdlerData("8268570668335894776", ["u", "g", "r"]) + new_object.populate_from_database(db_location) + + assert new_object.__dict__ == test_object.__dict__ + + with pytest.raises(ValueError) as error_info_1: + empty_data = AdlerData("pretend_object", ["u", "g", "r"]) + empty_data.populate_from_database(db_location) + + assert error_info_1.value.args[0] == "No data found in this database for the supplied ssObjectId." + + with pytest.raises(ValueError) as error_info_2: + bad_filter = AdlerData("8268570668335894776", ["u", "g", "h"]) + bad_filter.populate_from_database(db_location) + + assert ( + error_info_2.value.args[0] + == "Data does not exist for some of the requested filters in this database. Filters in database for this object: ['u', 'g', 'r']" + ) + + with pytest.raises(ValueError) as error_info_3: + bad_filter = AdlerData("8268570668335894776", ["u", "g", "h"]) + bad_filter.populate_from_database("./dummy_location.db") + + assert error_info_3.value.args[0] == "Database cannot be found at given filepath." diff --git a/tests/adler/dataclasses/test_AdlerPlanetoid.py b/tests/adler/dataclasses/test_AdlerPlanetoid.py index 5e956d8..992925a 100644 --- a/tests/adler/dataclasses/test_AdlerPlanetoid.py +++ b/tests/adler/dataclasses/test_AdlerPlanetoid.py @@ -155,3 +155,30 @@ def test_failed_SQL_queries(): assert ( error_info_2.value.args[0] == "No SSObject data for this object could be found for this SSObjectId." ) + + +def test_attach_previous_adlerdata(): + test_planetoid = AdlerPlanetoid.construct_from_SQL(ssoid, test_db_path, filter_list=["g", "r"]) + + db_location = get_test_data_filepath("test_AdlerData_database.db") + + test_planetoid.attach_previous_adler_data(db_location) + + test_output = test_planetoid.PreviousAdlerData.get_phase_parameters_in_filter("g", "model_1") + + expected_output = { + "filter_name": "g", + "phaseAngle_min": 31.0, + "phaseAngle_range": 32.0, + "nobs": 33, + "arc": 34.0, + "model_name": "model_1", + "H": 35.0, + "H_err": 36.0, + "phase_parameter_1": 37.0, + "phase_parameter_1_err": 38.0, + "phase_parameter_2": np.nan, + "phase_parameter_2_err": np.nan, + } + + assert test_output.__dict__ == expected_output diff --git a/tests/data/test_AdlerData_database.db b/tests/data/test_AdlerData_database.db new file mode 100644 index 0000000..31bd8ef Binary files /dev/null and b/tests/data/test_AdlerData_database.db differ diff --git a/tests/data/test_SQL_database_table.csv b/tests/data/test_SQL_database_table.csv index 790d23c..6c34fe1 100644 --- a/tests/data/test_SQL_database_table.csv +++ b/tests/data/test_SQL_database_table.csv @@ -1,2 +1,2 @@ ssObjectId,timestamp,u_phaseAngle_min,u_phaseAngle_range,u_nobs,u_arc,u_model_1_H,u_model_1_H_err,u_model_1_phase_parameter_1,u_model_1_phase_parameter_1_err,u_model_1_phase_parameter_2,u_model_1_phase_parameter_2_err,u_model_2_H,u_model_2_H_err,u_model_2_phase_parameter_1,u_model_2_phase_parameter_1_err,u_model_2_phase_parameter_2,u_model_2_phase_parameter_2_err,g_phaseAngle_min,g_phaseAngle_range,g_nobs,g_arc,g_model_1_H,g_model_1_H_err,g_model_1_phase_parameter_1,g_model_1_phase_parameter_1_err,g_model_1_phase_parameter_2,g_model_1_phase_parameter_2_err,r_phaseAngle_min,r_phaseAngle_range,r_nobs,r_arc,r_model_2_H,r_model_2_H_err,r_model_2_phase_parameter_1,r_model_2_phase_parameter_1_err,r_model_2_phase_parameter_2,r_model_2_phase_parameter_2_err -666,2024-04-18 13:32:07.096776+00:00,11.0,12.0,13,14.0,15.0,16.0,17.0,18.0,,,25.0,26.0,27.0,28.0,29.0,30.0,31.0,32.0,33,34.0,35.0,36.0,37.0,38.0,,,41.0,42.0,43,44.0,45.0,46.0,47.0,48.0,49.0,50.0 +8268570668335894776,2024-04-18 13:32:07.096776+00:00,11.0,12.0,13,14.0,15.0,16.0,17.0,18.0,,,25.0,26.0,27.0,28.0,29.0,30.0,31.0,32.0,33,34.0,35.0,36.0,37.0,38.0,,,41.0,42.0,43,44.0,45.0,46.0,47.0,48.0,49.0,50.0