From 29427838acc739f7edca7e0dbea4943a28e78b0c Mon Sep 17 00:00:00 2001 From: James Robinson Date: Wed, 20 Nov 2024 10:54:21 +0000 Subject: [PATCH] separate into demo and cli scripts (#178) * separate into demo and cli scripts * demo testing nb * demo testing nb * update demo nb * add a date of fit param * adler day draft script * linting * adler cli now runs adler_run.py * phase curve fig file name * add new CLI args * update adler command * update docs for new sigma_clip --------- Co-authored-by: Steph Merritt --- adler-day-testing.db | Bin 0 -> 16384 bytes .../notebooks/outlier_detection_example.ipynb | 6 +- notebooks/adler_demo.ipynb | 645 ++++++++++++++++++ notebooks/adler_demo/adler_demo.ipynb | 2 +- src/adler/adler_demo.py | 361 ++++++++++ src/adler/adler_run.py | 245 +++---- src/adler/objectdata/AdlerData.py | 2 + src/adler/science/Colour.py | 37 +- src/adler/utilities/AdlerCLIArguments.py | 19 + src/adler/utilities/science_utilities.py | 12 +- tests/adler/objectdata/test_AdlerData.py | 593 +++++++++++----- tests/adler/objectdata/test_AdlerPlanetoid.py | 46 +- tests/adler/science/test_Colour.py | 23 +- .../adler/utilities/test_AdlerCLIArguments.py | 103 ++- .../adler/utilities/test_science_utilities.py | 2 +- tests/data/adler_run_test_data.py | 247 +++++++ tests/data/test_AdlerData_database.db | Bin 8192 -> 8192 bytes tests/data/test_SQL_database_table.csv | 4 +- 18 files changed, 1977 insertions(+), 370 deletions(-) create mode 100644 adler-day-testing.db create mode 100644 notebooks/adler_demo.ipynb create mode 100644 src/adler/adler_demo.py create mode 100644 tests/data/adler_run_test_data.py diff --git a/adler-day-testing.db b/adler-day-testing.db new file mode 100644 index 0000000000000000000000000000000000000000..91e515e2c83b15582ab0824a852af1e658c8be5d GIT binary patch literal 16384 zcmeI12~ZSQ8pme@0dWK|Dq=ha-6(Pl{rYY(eoYiK;^;mTk6^NR3Eksc?f(s~k-&bNZ*@l^M!%+-VWz}r9x{ImqesAWZfB$!U z@Be!IpGStpXyh{^wR6-lazfHkB9%&bxm+TV_z53t;bS}o2p7MCRgheze(W)atG<9VWk<+8=&T4IN-D&tRgsaACA`q<_-uPan(P|r&)&CDw zlv=BvqlwXIRmfC*D`Ma#V&M81+9rSvN(_Kn1E5w9sBHw;@ZK2scOoRS;* z0BkXa0WhoyV1p6^V3+|gOb-~=2(aP3{^1&wH39IHw_M+f7y@96F${q5O#mB|7y#o9 zfbn|3_(p&Y@AVJYpsX3dmg`#)LjY_sMm^www!+*ab^N7n2C~+l{wR}?_?)5wq5`4< zq5`4|WS) zwk2)Gwf)38)he&eS?OKjEC1(*PU+-9jFL#~B(}m)2iwvWjwljiFq$G5isKl9W^f$C zNiT$XVXPcs11U5R5ekyQ5yN_5*gy>9(GMkCPb5r)ROzVN@0WLj+Wz)CzYN?C$5;Ms za%Zej zP?BEj9|=dE77ac1R|m972eu37V#xBP3{yWJv=dR`Srz4l*IuePJG>=>_HedcEw+&4wL!um5#b z_UAn2nY_8Tb`;D=2&npW)nll3EKI+9-WS&WeacZA|4`ohS-Fe%4Qu%1M5ilA_j2` zP0MmPAVN6Ja>h=2?sM6noQJ@w**(H8eLoCpCv_Qt=0AXgTOIZuyZ)F*W2Sdc zEX##?y)sY9eR!zcwaq)_vkQ<#t>4pj_P0F3&h;&7^AbKwyD3XozJaRKR8;Aa`>;K8 zd}Q{T2Ru?-?wPbh4$H8NkUsObKrQj=;b0Y92e9+alwjp8+Sl)$#;P>UI#$Q@QUP8 zK7dDeF1l(wD9F1v@X6`SGPsz(aq76F8Ys=(8CzM9qEpJu03=uz(HKFKLL(9cOCTII z2gp(^Nl?Z?*zW7aGlOGbd$$2zPiJPsb+@boN&kq33$Cfl-o)N@HrIjJv^<8 ztIgZ^Pmhi+Ut3hEn`ZBUT0N1{LgNs^@T=>p1R`*pX|4q+gb5R@fse3Smp^JP4qrWX zmGu~Z32xq37Z=^jhW-9$9WGW_@pxjtwe5mVKuDV{H!D_kh3achhL-JJ4__yCYwk?DJ%fb$Emy}9XCNv0$139BOQ^gy;l7Jw z7uY?1xyNpAADyb5IW5Sr7=;m}kPc}IS#C~|C`611FLmPJ2M8M$y~1oWH>@t4;TO~p6hMRYC==FUQbBV9AXIrp~Z8(jX8uS zbG@FBq*;P_n<*@v>)V>sNloQ?Js?5i!VF~Wq?XS0)@A^j%=LOe9H(()T#s5j*IStZ zXfoI90TG6?jBy^tES>Aym@^1X=K4B9Ou-2YZo-%gEuQP8?=aU3d>E%-af(7lJ`3mi zI{)9^CQ9PiOYr|wW%JvwulN7Y1^d9wx=+LrCGrtQ;jzv$$=E95H?3J!nkZ#I4u6QpW8IZ!dGmVv28z`9ku9D@Gl?2 zscUh|5B*xjdlg(ebTnih?3!EMt+-29II}t~c?7Wy&OIEw_2;ZK-s{TA^wRE2;8==N z_sh?}gX*hyt7F@%;nwkG9e-UDuT#Bm&YH)hJErOhFcx8?k-*~axVst4R+H|y4voWh zj1;CDV<)w=JML-*ph5lb; z!u|%18Qr7B-SKidfgiOy-#ul`&JJzW>nbU%%bAA0wVTxuM;_VAX%kG5#2SI`; APXGV_ literal 0 HcmV?d00001 diff --git a/docs/notebooks/outlier_detection_example.ipynb b/docs/notebooks/outlier_detection_example.ipynb index f3af189..82ef920 100644 --- a/docs/notebooks/outlier_detection_example.ipynb +++ b/docs/notebooks/outlier_detection_example.ipynb @@ -177,7 +177,7 @@ "outputs": [], "source": [ "# astropy sigma_clip normally uses the median as the central function\n", - "utils.sigma_clip(res, {\"cenfunc\": \"median\"})" + "utils.sigma_clip(res, cenfunc=\"median\", maxiters=1)" ] }, { @@ -188,7 +188,7 @@ "outputs": [], "source": [ "# assuming that the model is the ground truth, we use zero as the centroid for the residuals\n", - "utils.sigma_clip(res, {\"cenfunc\": utils.zero_func})" + "utils.sigma_clip(res, cenfunc=utils.zero_func, maxiters=1)" ] }, { @@ -296,7 +296,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.13" + "version": "3.9.19" } }, "nbformat": 4, diff --git a/notebooks/adler_demo.ipynb b/notebooks/adler_demo.ipynb new file mode 100644 index 0000000..b2de112 --- /dev/null +++ b/notebooks/adler_demo.ipynb @@ -0,0 +1,645 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "7fafdbd9", + "metadata": { + "execution": { + "iopub.execute_input": "2024-10-22T13:49:22.264374Z", + "iopub.status.busy": "2024-10-22T13:49:22.264159Z", + "iopub.status.idle": "2024-10-22T13:49:24.365469Z", + "shell.execute_reply": "2024-10-22T13:49:24.364557Z", + "shell.execute_reply.started": "2024-10-22T13:49:22.264357Z" + } + }, + "outputs": [], + "source": [ + "from adler.objectdata.AdlerPlanetoid import AdlerPlanetoid\n", + "from adler.science.PhaseCurve import PhaseCurve\n", + "from adler.objectdata.AdlerData import AdlerData\n", + "from adler.utilities.plotting_utilities import plot_errorbar\n", + "import adler.utilities.science_utilities as sci_utils\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib.gridspec as gridspec\n", + "import astropy.units as u" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b9b8d9be", + "metadata": { + "execution": { + "iopub.execute_input": "2024-10-22T13:49:24.367159Z", + "iopub.status.busy": "2024-10-22T13:49:24.366563Z", + "iopub.status.idle": "2024-10-22T13:49:26.858571Z", + "shell.execute_reply": "2024-10-22T13:49:26.857667Z", + "shell.execute_reply.started": "2024-10-22T13:49:24.367135Z" + } + }, + "outputs": [], + "source": [ + "ssoid = \"6098332225018\" # good test object\n", + "# ssoid = \"8268570668335894776\" # NEO\n", + "# ssoid = \"-4973461691235584486\" # NEO\n", + "\n", + "# fname = \"/Users/jrobinson/lsst-adler/notebooks/gen_test_data/adler_demo_testing_database.db\"\n", + "# planetoid = AdlerPlanetoid.construct_from_SQL(ssoid, sql_filename=fname)\n", + "planetoid = AdlerPlanetoid.construct_from_RSP(ssoid)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dfca7299", + "metadata": { + "execution": { + "iopub.execute_input": "2024-10-22T13:49:26.861022Z", + "iopub.status.busy": "2024-10-22T13:49:26.860753Z", + "iopub.status.idle": "2024-10-22T13:49:26.910514Z", + "shell.execute_reply": "2024-10-22T13:49:26.909442Z", + "shell.execute_reply.started": "2024-10-22T13:49:26.861003Z" + } + }, + "outputs": [], + "source": [ + "planetoid.__dict__" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "477268a2-7e9d-4962-a81e-187ab61b7b87", + "metadata": { + "execution": { + "iopub.execute_input": "2024-10-22T13:49:26.911998Z", + "iopub.status.busy": "2024-10-22T13:49:26.911740Z", + "iopub.status.idle": "2024-10-22T13:49:26.917243Z", + "shell.execute_reply": "2024-10-22T13:49:26.916406Z", + "shell.execute_reply.started": "2024-10-22T13:49:26.911979Z" + } + }, + "outputs": [], + "source": [ + "planetoid.MPCORB" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "334fd033", + "metadata": { + "execution": { + "iopub.execute_input": "2024-10-22T13:49:26.918460Z", + "iopub.status.busy": "2024-10-22T13:49:26.918225Z", + "iopub.status.idle": "2024-10-22T13:49:26.965001Z", + "shell.execute_reply": "2024-10-22T13:49:26.963946Z", + "shell.execute_reply.started": "2024-10-22T13:49:26.918437Z" + } + }, + "outputs": [], + "source": [ + "filters = planetoid.filter_list" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "960ee095", + "metadata": { + "execution": { + "iopub.execute_input": "2024-10-22T13:49:26.966552Z", + "iopub.status.busy": "2024-10-22T13:49:26.966297Z", + "iopub.status.idle": "2024-10-22T13:49:26.985388Z", + "shell.execute_reply": "2024-10-22T13:49:26.984534Z", + "shell.execute_reply.started": "2024-10-22T13:49:26.966534Z" + } + }, + "outputs": [], + "source": [ + "# get all obs as dataframe\n", + "df_obs = pd.DataFrame()\n", + "for filt in filters:\n", + " obs = planetoid.observations_in_filter(filt)\n", + " _df_obs = pd.DataFrame(obs.__dict__)\n", + " df_obs = pd.concat([df_obs, _df_obs])\n", + " df_obs = df_obs.reset_index(drop=True)\n", + "# break" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6f188ab5", + "metadata": { + "execution": { + "iopub.execute_input": "2024-10-22T13:49:26.986708Z", + "iopub.status.busy": "2024-10-22T13:49:26.986460Z", + "iopub.status.idle": "2024-10-22T13:49:27.015012Z", + "shell.execute_reply": "2024-10-22T13:49:27.013967Z", + "shell.execute_reply.started": "2024-10-22T13:49:26.986690Z" + } + }, + "outputs": [], + "source": [ + "df_obs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d8a3ca51", + "metadata": { + "execution": { + "iopub.execute_input": "2024-10-22T13:49:27.016477Z", + "iopub.status.busy": "2024-10-22T13:49:27.016231Z", + "iopub.status.idle": "2024-10-22T13:49:27.025836Z", + "shell.execute_reply": "2024-10-22T13:49:27.024894Z", + "shell.execute_reply.started": "2024-10-22T13:49:27.016459Z" + } + }, + "outputs": [], + "source": [ + "# calculate elongation angle\n", + "R = df_obs[\"heliocentricDist\"]\n", + "Delta = df_obs[\"topocentricDist\"]\n", + "alpha = np.radians(df_obs[\"phaseAngle\"])\n", + "\n", + "R_E = np.sqrt((R * R) + (Delta * Delta) - (2.0 * R * Delta * np.cos(alpha)))\n", + "df_obs[\"elong\"] = np.degrees(np.arccos(((R_E * R_E) + (Delta * Delta) - (R * R)) / (2.0 * R_E * Delta)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0d45807d", + "metadata": { + "execution": { + "iopub.execute_input": "2024-10-22T13:49:27.048396Z", + "iopub.status.busy": "2024-10-22T13:49:27.048063Z", + "iopub.status.idle": "2024-10-22T13:49:27.317638Z", + "shell.execute_reply": "2024-10-22T13:49:27.316589Z", + "shell.execute_reply.started": "2024-10-22T13:49:27.048373Z" + } + }, + "outputs": [], + "source": [ + "x_plot = \"midPointMjdTai\"\n", + "y_plot = \"elong\"\n", + "df_plot = df_obs\n", + "\n", + "fig = plt.figure()\n", + "gs = gridspec.GridSpec(1, 1)\n", + "ax1 = plt.subplot(gs[0, 0])\n", + "\n", + "ax1.scatter(df_plot[x_plot], df_plot[y_plot])\n", + "\n", + "ax1.set_xlabel(x_plot)\n", + "ax1.set_ylabel(y_plot)\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8a0f5291", + "metadata": { + "execution": { + "iopub.execute_input": "2024-10-22T13:49:27.319596Z", + "iopub.status.busy": "2024-10-22T13:49:27.319182Z", + "iopub.status.idle": "2024-10-22T13:49:27.355353Z", + "shell.execute_reply": "2024-10-22T13:49:27.354328Z", + "shell.execute_reply.started": "2024-10-22T13:49:27.319562Z" + } + }, + "outputs": [], + "source": [ + "tmin = np.amin(np.floor(df_obs[\"midPointMjdTai\"])) # mjd\n", + "tmax = np.amax(np.floor(df_obs[\"midPointMjdTai\"])) + 1 # mjd\n", + "tmin, tmax" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0a293038", + "metadata": { + "execution": { + "iopub.execute_input": "2024-10-22T13:49:27.357205Z", + "iopub.status.busy": "2024-10-22T13:49:27.356817Z", + "iopub.status.idle": "2024-10-22T13:49:27.669853Z", + "shell.execute_reply": "2024-10-22T13:49:27.669093Z", + "shell.execute_reply.started": "2024-10-22T13:49:27.357176Z" + } + }, + "outputs": [], + "source": [ + "# cumulative data in filter\n", + "x_plot = \"midPointMjdTai\"\n", + "df_plot = df_obs.sort_values(x_plot)\n", + "\n", + "fig = plt.figure()\n", + "gs = gridspec.GridSpec(1, 1)\n", + "ax1 = plt.subplot(gs[0, 0])\n", + "\n", + "bins = np.arange(tmin, tmax + 1)\n", + "\n", + "values, base = np.histogram(df_plot[x_plot], bins=bins)\n", + "cumulative = np.cumsum(values)\n", + "ax1.plot(base[:-1] - base[0], cumulative)\n", + "\n", + "data_mask = np.diff(cumulative) > 0\n", + "data_nights = base[1:-1][data_mask]\n", + "N_data = cumulative[1:][data_mask]\n", + "\n", + "ax1.scatter(data_nights - data_nights[0], N_data)\n", + "\n", + "ax1.set_xlabel(x_plot)\n", + "ax1.set_ylabel(\"number\")\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09c7d0cb", + "metadata": { + "execution": { + "iopub.execute_input": "2024-10-22T13:49:27.672870Z", + "iopub.status.busy": "2024-10-22T13:49:27.672622Z", + "iopub.status.idle": "2024-10-22T13:49:27.679269Z", + "shell.execute_reply": "2024-10-22T13:49:27.678468Z", + "shell.execute_reply.started": "2024-10-22T13:49:27.672852Z" + } + }, + "outputs": [], + "source": [ + "data_nights" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d57a9e80", + "metadata": { + "execution": { + "iopub.execute_input": "2024-10-22T13:49:27.680435Z", + "iopub.status.busy": "2024-10-22T13:49:27.680218Z", + "iopub.status.idle": "2024-10-22T13:49:27.695007Z", + "shell.execute_reply": "2024-10-22T13:49:27.694150Z", + "shell.execute_reply.started": "2024-10-22T13:49:27.680419Z" + } + }, + "outputs": [], + "source": [ + "N_data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "504569cb", + "metadata": { + "execution": { + "iopub.execute_input": "2024-10-22T13:49:27.696425Z", + "iopub.status.busy": "2024-10-22T13:49:27.696205Z", + "iopub.status.idle": "2024-10-22T13:49:27.710841Z", + "shell.execute_reply": "2024-10-22T13:49:27.710150Z", + "shell.execute_reply.started": "2024-10-22T13:49:27.696408Z" + } + }, + "outputs": [], + "source": [ + "# create the empty AdlerData object\n", + "adler_data = AdlerData(ssoid, planetoid.filter_list)\n", + "adler_data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "87a6de72", + "metadata": { + "execution": { + "iopub.execute_input": "2024-10-22T13:49:27.712024Z", + "iopub.status.busy": "2024-10-22T13:49:27.711804Z", + "iopub.status.idle": "2024-10-22T13:49:27.750033Z", + "shell.execute_reply": "2024-10-22T13:49:27.749252Z", + "shell.execute_reply.started": "2024-10-22T13:49:27.712006Z" + } + }, + "outputs": [], + "source": [ + "planetoid.SSObject" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8de34419", + "metadata": { + "execution": { + "iopub.execute_input": "2024-10-22T13:49:27.751267Z", + "iopub.status.busy": "2024-10-22T13:49:27.751024Z", + "iopub.status.idle": "2024-10-22T13:49:27.756256Z", + "shell.execute_reply": "2024-10-22T13:49:27.755486Z", + "shell.execute_reply.started": "2024-10-22T13:49:27.751248Z" + } + }, + "outputs": [], + "source": [ + "planetoid.SSObject_in_filter(\"r\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cd3e6491", + "metadata": { + "execution": { + "iopub.execute_input": "2024-10-22T13:49:27.757763Z", + "iopub.status.busy": "2024-10-22T13:49:27.757542Z", + "iopub.status.idle": "2024-10-22T13:49:27.770935Z", + "shell.execute_reply": "2024-10-22T13:49:27.770130Z", + "shell.execute_reply.started": "2024-10-22T13:49:27.757745Z" + } + }, + "outputs": [], + "source": [ + "# TODO: we need a translation between planetoid.SSObject attributes and AdlerData attributes\n", + "ADLER_SSOBJECT = {\"phase_parameter_1\": \"G12\"}\n", + "SSOBJECT_ADLER = {v: k for k, v in ADLER_SSOBJECT.items()}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71776f60", + "metadata": { + "execution": { + "iopub.execute_input": "2024-10-22T13:49:27.772332Z", + "iopub.status.busy": "2024-10-22T13:49:27.772091Z", + "iopub.status.idle": "2024-10-22T13:49:27.786653Z", + "shell.execute_reply": "2024-10-22T13:49:27.785661Z", + "shell.execute_reply.started": "2024-10-22T13:49:27.772315Z" + } + }, + "outputs": [], + "source": [ + "ADLER_SSOBJECT, SSOBJECT_ADLER" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c74ef722", + "metadata": { + "execution": { + "iopub.execute_input": "2024-10-22T13:49:27.788411Z", + "iopub.status.busy": "2024-10-22T13:49:27.788064Z", + "iopub.status.idle": "2024-10-22T13:49:27.799860Z", + "shell.execute_reply": "2024-10-22T13:49:27.799133Z", + "shell.execute_reply.started": "2024-10-22T13:49:27.788382Z" + } + }, + "outputs": [], + "source": [ + "sso_r = planetoid.SSObject_in_filter(\"r\")\n", + "sso_r.H, sso_r.G12" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0d979da6", + "metadata": { + "execution": { + "iopub.execute_input": "2024-10-22T13:49:27.801232Z", + "iopub.status.busy": "2024-10-22T13:49:27.801000Z", + "iopub.status.idle": "2024-10-22T13:49:36.318337Z", + "shell.execute_reply": "2024-10-22T13:49:36.317155Z", + "shell.execute_reply.started": "2024-10-22T13:49:27.801215Z" + } + }, + "outputs": [], + "source": [ + "N_pc_fit = 25 # minimum number of observations to fit phase curve parameter (otherwise just fit H)\n", + "mod_name = \"HG12_Pen16\" # model name\n", + "\n", + "# create the empty AdlerData object\n", + "adler_data = AdlerData(ssoid, planetoid.filter_list)\n", + "\n", + "ad_params_list = []\n", + "\n", + "for n in data_nights:\n", + " print(n)\n", + " df_n = df_obs[df_obs[\"midPointMjdTai\"] < n]\n", + " print(len(df_n), np.unique(df_n[\"filter_name\"]))\n", + " print(df_n.value_counts(\"filter_name\"))\n", + " # break\n", + "\n", + " for filt in np.unique(df_n[\"filter_name\"]):\n", + " _df_n = df_n[df_n[\"filter_name\"] == filt]\n", + " print(filt, len(_df_n))\n", + "\n", + " # Try get parameters from AdlerData otherwise, use the SSObject parameters from alert\n", + " try:\n", + " params = adler_data.get_phase_parameters_in_filter(filt, model_name=mod_name)\n", + " print(\"use AdlerData parameters\")\n", + " phase_parameter_1 = \"phase_parameter_1\"\n", + "\n", + " except ValueError:\n", + " params = planetoid.SSObject_in_filter(filt)\n", + " print(\"use SSObject parameters\")\n", + " phase_parameter_1 = ADLER_SSOBJECT[\"phase_parameter_1\"]\n", + "\n", + " print(params)\n", + "\n", + " if not hasattr(params.H, \"unit\"):\n", + " params.H *= u.mag\n", + "\n", + " # Define the initial simple phase curve filter model with fixed G12\n", + " pc = PhaseCurve(\n", + " H=params.H,\n", + " phase_parameter_1=getattr(params, phase_parameter_1),\n", + " model_name=mod_name,\n", + " )\n", + "\n", + " # do we fix phase parameter?\n", + " if len(_df_n) < N_pc_fit:\n", + " pc.model_function.G12.fixed = True\n", + " else:\n", + " pc.model_function.G12.fixed = False\n", + "\n", + " # do a HG12_Pen16 fit to the past data\n", + " pc_fit = pc.FitModel(\n", + " np.array(_df_n[\"phaseAngle\"]) * u.deg,\n", + " np.array(_df_n[\"reduced_mag\"]) * u.mag,\n", + " np.array(_df_n[\"magErr\"]) * u.mag,\n", + " )\n", + " pc_fit = pc.InitModelSbpy(pc_fit)\n", + " print(filt, pc_fit.__dict__)\n", + "\n", + " # define a dict of all values, including metadata\n", + " # TODO: include units?\n", + " # TODO: add a model fit timestamp parameter?\n", + " ad_params = pc_fit.__dict__\n", + " ad_params[\"phaseAngle_min\"] = np.amin(_df_n[\"phaseAngle\"]) # * u.deg\n", + " ad_params[\"phaseAngle_range\"] = np.ptp(_df_n[\"phaseAngle\"]) # * u.deg\n", + " ad_params[\"arc\"] = np.ptp(_df_n[\"midPointMjdTai\"]) # * u.d\n", + " ad_params[\"nobs\"] = len(_df_n)\n", + " ad_params[\"modelFitMjd\"] = np.amax(_df_n[\"midPointMjdTai\"])\n", + "\n", + " # clean the dict of units\n", + " for x in ad_params:\n", + " if hasattr(ad_params[x], \"unit\"):\n", + " ad_params[x] = ad_params[x].value\n", + "\n", + " # store values in AdlerData\n", + " adler_data.populate_phase_parameters(filt, **ad_params)\n", + "\n", + " ad_params[\"filter_name\"] = filt\n", + " ad_params_list.append(ad_params)\n", + "\n", + "# break\n", + "\n", + "# break" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e1bdad8b-e2e2-4edf-82c4-7d653921ba57", + "metadata": { + "execution": { + "iopub.execute_input": "2024-10-22T13:49:36.319502Z", + "iopub.status.busy": "2024-10-22T13:49:36.319239Z", + "iopub.status.idle": "2024-10-22T13:49:36.354366Z", + "shell.execute_reply": "2024-10-22T13:49:36.353090Z", + "shell.execute_reply.started": "2024-10-22T13:49:36.319475Z" + } + }, + "outputs": [], + "source": [ + "ad_params" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ed4d567b", + "metadata": { + "execution": { + "iopub.execute_input": "2024-10-22T13:49:36.356222Z", + "iopub.status.busy": "2024-10-22T13:49:36.355886Z", + "iopub.status.idle": "2024-10-22T13:49:36.622298Z", + "shell.execute_reply": "2024-10-22T13:49:36.621538Z", + "shell.execute_reply.started": "2024-10-22T13:49:36.356193Z" + } + }, + "outputs": [], + "source": [ + "fig = plot_errorbar(planetoid, filt_list=[\"r\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "04139eac", + "metadata": { + "execution": { + "iopub.execute_input": "2024-10-22T13:49:36.623648Z", + "iopub.status.busy": "2024-10-22T13:49:36.623396Z", + "iopub.status.idle": "2024-10-22T13:49:36.629099Z", + "shell.execute_reply": "2024-10-22T13:49:36.628368Z", + "shell.execute_reply.started": "2024-10-22T13:49:36.623628Z" + } + }, + "outputs": [], + "source": [ + "ad_params" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0910c4ab", + "metadata": { + "execution": { + "iopub.execute_input": "2024-10-22T13:49:36.630700Z", + "iopub.status.busy": "2024-10-22T13:49:36.630138Z", + "iopub.status.idle": "2024-10-22T13:49:37.427375Z", + "shell.execute_reply": "2024-10-22T13:49:37.426547Z", + "shell.execute_reply.started": "2024-10-22T13:49:36.630677Z" + } + }, + "outputs": [], + "source": [ + "x_plot = \"modelFitMjd\"\n", + "y_plot = \"H\"\n", + "\n", + "for y_plot in [\n", + " \"H\",\n", + " \"phase_parameter_1\",\n", + " # \"phaseAngle_min\",\n", + " # \"phaseAngle_range\",\n", + " # \"arc\",\n", + " # \"nobs\",\n", + "]:\n", + " df_plot = pd.DataFrame(ad_params_list)\n", + "\n", + " fig = plt.figure()\n", + " gs = gridspec.GridSpec(1, 1)\n", + " ax1 = plt.subplot(gs[0, 0])\n", + "\n", + " for filt in np.unique(df_plot[\"filter_name\"]):\n", + " _df_plot = df_plot[df_plot[\"filter_name\"] == filt]\n", + " ax1.scatter(_df_plot[x_plot], _df_plot[y_plot], label=filt)\n", + " ax1.plot(_df_plot[x_plot], _df_plot[y_plot])\n", + " # ax1.scatter(_df_plot.index.values, _df_plot[y_plot])\n", + "\n", + " # ax1.set_xlabel(x_plot)\n", + " ax1.set_ylabel(y_plot)\n", + "\n", + " ax1.legend()\n", + "\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "314a2b2d", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "adler-dev", + "language": "python", + "name": "adler-dev" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/adler_demo/adler_demo.ipynb b/notebooks/adler_demo/adler_demo.ipynb index f8fed9c..17cd458 100644 --- a/notebooks/adler_demo/adler_demo.ipynb +++ b/notebooks/adler_demo/adler_demo.ipynb @@ -582,7 +582,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.13" + "version": "3.9.19" } }, "nbformat": 4, diff --git a/src/adler/adler_demo.py b/src/adler/adler_demo.py new file mode 100644 index 0000000..ce34d7e --- /dev/null +++ b/src/adler/adler_demo.py @@ -0,0 +1,361 @@ +import logging +import argparse +import astropy.units as u +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +import matplotlib.gridspec as gridspec +import os + +from adler.objectdata.AdlerPlanetoid import AdlerPlanetoid +from adler.objectdata.AdlerData import AdlerData +from adler.science.PhaseCurve import PhaseCurve +from adler.science.Colour import col_obs_ref +from adler.utilities.AdlerCLIArguments import AdlerCLIArguments +from adler.utilities.adler_logging import setup_adler_logging +from adler.utilities.readin_utilities import read_in_SSObjectID_file +from adler.utilities.plotting_utilities import plot_errorbar +import adler.utilities.science_utilities as sci_utils + +logger = logging.getLogger(__name__) + + +def runAdlerDemo(cli_args): + logger.info("Beginning Adler.") + + # adler parameters + N_pc_fit = 10 # minimum number of data points to fit phase curve + diff_cut = 1.0 # magnitude difference used to identify outliers + obs_cols = ["diaSourceId", "midPointMjdTai", "outlier"] # observation columns to use + phase_model = "HG12_Pen16" # which phase curve model to fit + + # Define colour parameters + # set number of reference observations to use for colour estimate + N_ref = 5 + + if cli_args.ssObjectId_list: + ssObjectId_list = read_in_SSObjectID_file(cli_args.ssObjectId_list) + else: + ssObjectId_list = [cli_args.ssObjectId] + + # consider each ssObjectId in the list separately + for i, ssObjectId in enumerate(ssObjectId_list): + logger.info("Processing object {}/{}.".format(i + 1, len(ssObjectId_list))) + logger.info("Ingesting all data for object {} from RSP...".format(cli_args.ssObjectId)) + logger.info( + "Query data in the range: {} <= date <= {}".format(cli_args.date_range[0], cli_args.date_range[1]) + ) # the adler planetoid date_range is used in an SQL BETWEEN statement which is inclusive + print( + "Query data in the range: {} <= date <= {}".format(cli_args.date_range[0], cli_args.date_range[1]) + ) # the adler planetoid date_range is used in an SQL BETWEEN statement which is inclusive + logger.info("Consider the filters: {}".format(cli_args.filter_list)) + + # load ssObjectId data + if cli_args.sql_filename: # load from a local database, if provided + msg = "query sql database {}".format(cli_args.sql_filename) + logger.info(msg) + print(msg) + planetoid = AdlerPlanetoid.construct_from_SQL( + ssObjectId, + filter_list=cli_args.filter_list, + date_range=cli_args.date_range, + sql_filename=cli_args.sql_filename, + ) + else: # otherwise load from the Rubin Science Platform + msg = "query RSP" + logger.info(msg) + print(msg) + planetoid = AdlerPlanetoid.construct_from_RSP( + ssObjectId, cli_args.filter_list, cli_args.date_range + ) + + # TODO: Here we would load the AdlerData object from our data tables + adler_data = AdlerData(ssObjectId, planetoid.filter_list) + print(adler_data.__dict__) + + logger.info("Data successfully ingested.") + + # now let's do some phase curves! + logger.info("Calculating phase curves...") + + # operate on each filter in turn + for filt in cli_args.filter_list: + logger.info("fit {} filter data".format(filt)) + + # get the filter SSObject metadata + try: + sso = planetoid.SSObject_in_filter(filt) + except: + logger.info("error loading SSObject in filter {}".format(filt)) + continue + + # get the observations + obs = planetoid.observations_in_filter(filt) + df_obs = pd.DataFrame(obs.__dict__) + df_obs["outlier"] = [False] * len(df_obs) + logger.info("{} observations retrieved".format(len(df_obs))) + + # load and merge the previous obs + # TODO: replace this part with classifications loaded from adlerData + save_file = "{}/df_outlier_{}_{}.csv".format(cli_args.outpath, cli_args.ssObjectId, filt) + if os.path.isfile(save_file): + logger.info("load previously classified observations: {}".format(save_file)) + _df_obs = pd.read_csv(save_file, index_col=0) + df_obs = df_obs.merge(_df_obs, on=["diaSourceId", "midPointMjdTai"], how="left") + df_obs.loc[pd.isnull(df_obs["outlier_y"]), "outlier_y"] = ( + False # ensure that classifications exist (nan entries can only be false?). Weird behaviour here for g filter, is it to do with when new g obs appear relative to r/i etc? + ) + df_obs = df_obs.rename({"outlier_y": "outlier"}, axis=1) + df_obs = df_obs.drop("outlier_x", axis=1) + else: + logger.info("no previously classified observations to load") + + # define the date range to for new observations taken in the night to be analysed + logger.info( + "Most recent {} filter observation in query: date = {}".format( + filt, np.amax(df_obs["midPointMjdTai"]) + ) + ) + t1 = int(np.amax(df_obs["midPointMjdTai"])) + 1 + t0 = t1 - 1 + + # get all past observations + # mask = df_obs["midPointMjdTai"] < t0 + mask = (df_obs["midPointMjdTai"] < t0) & (df_obs["outlier"] == False) # reject any past outliers + + # split observations into "old" and "new" + df_obs_old = df_obs[(mask)] + df_obs_new = df_obs[~mask] + logger.info("Previous observations (date < {}): {}".format(t0, len(df_obs_old))) + logger.info("New observations ({} <= date < {}): {}".format(t0, t1, len(df_obs_new))) + + # Determine the reference phase curve model + # TODO: We would load the best phase curve model available in AdlerData here + + # we need sufficient past observations to fit the phase curve model + if len(df_obs_old) < 2: + print("save {}".format(save_file)) + df_save = df_obs[obs_cols] + df_save.to_csv(save_file) + print("insufficient data, use default SSObject phase model and continue") + logger.info("insufficient data, use default SSObject phase model and continue") + + # use the default SSObject phase parameter if there is no better information + pc_dict = { + "H": sso.H * u.mag, + "H_err": sso.Herr * u.mag, + "phase_parameter_1": sso.G12, + "phase_parameter_1_err": sso.G12err, + "model_name": "HG12_Pen16", + } + adler_data.populate_phase_parameters(filt, **pc_dict) + continue + + # initial simple phase curve filter model with fixed G12 + pc = PhaseCurve( + H=sso.H * u.mag, + phase_parameter_1=0.62, + model_name="HG12_Pen16", + ) + + # only fit G12 when sufficient data is available + if len(df_obs_old) < N_pc_fit: + pc.model_function.G12.fixed = True + else: + pc.model_function.G12.fixed = False + + # do a HG12_Pen16 fit to the past data + pc_fit = pc.FitModel( + np.array(df_obs_old["phaseAngle"]) * u.deg, + np.array(df_obs_old["reduced_mag"]) * u.mag, + np.array(df_obs_old["magErr"]) * u.mag, + ) + pc_fit = pc.InitModelSbpy(pc_fit) + + # TODO: Here the best fit should be pushed back to our AdlerData tables + adler_data.populate_phase_parameters(filt, **pc_fit.__dict__) + + # find outliers in new data + # calculate data - model residuals + res = (np.array(df_obs_new["reduced_mag"]) * u.mag) - pc_fit.ReducedMag( + np.array(df_obs_new["phaseAngle"]) * u.deg + ) + outlier_flag = sci_utils.outlier_diff(res.value, diff_cut=diff_cut) + df_obs.loc[~mask, "outlier"] = outlier_flag + + # save the df_obs subset with outlier classification + df_save = df_obs[obs_cols] + print("save classifications: {}".format(save_file)) + logger.info("save classifications: {}".format(save_file)) + df_save.to_csv(save_file) + + # make a plot + fig = plot_errorbar(planetoid, filt_list=[]) + ax1 = fig.axes[0] + ax1.scatter(df_obs_old["phaseAngle"], df_obs_old["reduced_mag"], c="C0") + alpha = np.linspace(0, np.amax(obs.phaseAngle)) * u.deg + ax1.plot(alpha.value, pc_fit.ReducedMag(alpha).value, label="t={}".format(int(t0))) + ax1.scatter( + df_obs_new["phaseAngle"], df_obs_new["reduced_mag"], edgecolor="r", facecolor="none", zorder=3 + ) + out_mask = df_obs["outlier"] == True + ax1.scatter( + df_obs.loc[out_mask]["phaseAngle"], + df_obs.loc[out_mask]["reduced_mag"], + c="r", + marker="x", + s=75, + zorder=3, + ) + fig_file = "{}/plots/phase_curve_{}_{}_{}.png".format( + cli_args.outpath, cli_args.ssObjectId, filt, int(t0) + ) + # TODO: make the plots folder if it does not already exist? + print("Save figure: {}".format(fig_file)) + logger.info("Save figure: {}".format(fig_file)) + fig = plot_errorbar(planetoid, fig=fig, filename=fig_file) # TODO: add titles with filter name? + plt.close() + + # analyse colours for the filters provided + logger.info("Calculate colours: {}".format(cli_args.colour_list)) + + # if requested cycle through the filters, calculating a colour relative to the next filter + if not cli_args.colour_list: + colour_list = [] + else: + colour_list = cli_args.colour_list + + # note that the order in which cli_args.filter_list is passed will determine which colours are calculated + for colour in colour_list: + col_filts = colour.split("-") + filt_obs = col_filts[0] + filt_ref = col_filts[1] + + if ~np.isin([filt_obs, filt_ref], planetoid.filter_list).all(): + missing_filts = np.array([filt_obs, filt_ref])[ + ~np.isin([filt_obs, filt_ref], planetoid.filter_list) + ] + logger.info( + "Filter(s) {} are missing for determining {} colour".format(missing_filts, colour) + ) + continue + + logger.info("Determine {} colour".format(colour)) + + # TODO: replace this with a colour loaded from adlerData + save_file_colour = "{}/df_colour_{}_{}.csv".format(cli_args.outpath, cli_args.ssObjectId, colour) + if os.path.isfile(save_file_colour): + print("load previous colours from file: {}".format(save_file_colour)) + df_col = pd.read_csv(save_file_colour, index_col=0) + # Check the last colour calculation date (x_obs) to avoid recalculation + obs = planetoid.observations_in_filter(filt_obs) + df_obs = pd.DataFrame(obs.__dict__) + if np.amax(df_col["midPointMjdTai"]) >= np.amax(df_obs["midPointMjdTai"]): + print("colour already calculated, skip") + continue + + else: + df_col = pd.DataFrame() + + # determine the filt_obs - filt_ref colour + # generate a plot + col_dict = col_obs_ref( + planetoid, + adler_data, + phase_model=phase_model, + filt_obs=filt_obs, + filt_ref=filt_ref, + N_ref=N_ref, + # x1 = x1, + # x2 = x2, + plot_dir="{}/plots".format(cli_args.outpath), + ) + + print(col_dict) + + # save the colour data + print("Append new colour and save to file: {}".format(save_file_colour)) + df_col = pd.concat([df_col, pd.DataFrame([col_dict])]) + df_col = df_col.reset_index(drop=True) + df_col.to_csv(save_file_colour) + + # TODO: reject unreliable colours, e.g. high colErr or delta_t_col + # TODO: determine if colour is outlying + # compare this new colour to previous colour(s) + + +def main(): + parser = argparse.ArgumentParser(description="Runs Adler for select planetoid(s) and given user input.") + + # the below group ensures that AT LEAST one of the below arguments is included, but NOT both + input_group = parser.add_mutually_exclusive_group(required=True) + input_group.add_argument("-s", "--ssObjectId", help="SSObject ID of planetoid.", type=str, default=None) + input_group.add_argument( + "-sl", + "--ssObjectId_list", + help="Filepath to text file listing multiple SSObjectIds.", + type=str, + default=None, + ) + + optional_group = parser.add_argument_group("Optional arguments") + optional_group.add_argument( + "-f", + "--filter_list", + help="Filters to be analysed.", + nargs="*", + type=str, + default=["u", "g", "r", "i", "z", "y"], + ) + optional_group.add_argument( + "-c", + "--colour_list", + help="Colours to be analysed.", + nargs="*", + type=str, + # default=["g-r", "r-i"], + default=None, + ) + optional_group.add_argument( + "-d", + "--date_range", + help="Minimum and maximum MJD(TAI) of required observations. Default is to pull all observations.", + nargs=2, + type=float, + default=[60000.0, 67300.0], + ) + optional_group.add_argument( + "-o", + "--outpath", + help="Output path location. Default is current working directory.", # TODO: make adler create the outpath directory on start up if it does not exist? Also the "plots" dir within? + type=str, + default="./", + ) + optional_group.add_argument( + "-n", + "--db_name", + help="Stem filename of output database. If this doesn't exist, it will be created. Default: adler_out.", + type=str, + default="adler_out", + ) + optional_group.add_argument( + "-i", + "--sql_filename", + help="Optional input path location of a sql database file containing observations", + type=str, + default=None, + ) + + args = parser.parse_args() + + cli_args = AdlerCLIArguments(args) + + adler_logger = setup_adler_logging(cli_args.outpath) + + cli_args.logger = adler_logger + + runAdlerDemo(cli_args) + + +if __name__ == "__main__": + main() diff --git a/src/adler/adler_run.py b/src/adler/adler_run.py index 4efe0a2..a3b48a8 100644 --- a/src/adler/adler_run.py +++ b/src/adler/adler_run.py @@ -1,6 +1,7 @@ import logging import argparse import astropy.units as u +from astropy.time import Time import numpy as np import pandas as pd import matplotlib.pyplot as plt @@ -9,7 +10,7 @@ from adler.objectdata.AdlerPlanetoid import AdlerPlanetoid from adler.objectdata.AdlerData import AdlerData -from adler.science.PhaseCurve import PhaseCurve +from adler.science.PhaseCurve import PhaseCurve, ADLER_SBPY_DICT from adler.science.Colour import col_obs_ref from adler.utilities.AdlerCLIArguments import AdlerCLIArguments from adler.utilities.adler_logging import setup_adler_logging @@ -27,7 +28,19 @@ def runAdler(cli_args): N_pc_fit = 10 # minimum number of data points to fit phase curve diff_cut = 1.0 # magnitude difference used to identify outliers obs_cols = ["diaSourceId", "midPointMjdTai", "outlier"] # observation columns to use - phase_model = "HG12_Pen16" # which phase curve model to fit + phase_model = cli_args.phase_model # which phase curve model to fit + + # get the name of the phase parameter + # TODO: make an option for two parameter HG1G2 + phase_parameter_1 = ADLER_SBPY_DICT[phase_model]["phase_parameter_1"] + print(phase_model, phase_parameter_1) + + # set default phase parameter values + # TODO: make an option for two parameter HG1G2 + if phase_model == "HG": + phase_param_1_default = 0.15 + else: + phase_param_1_default = 0.62 # Define colour parameters # set number of reference observations to use for colour estimate @@ -78,6 +91,9 @@ def runAdler(cli_args): # now let's do some phase curves! logger.info("Calculating phase curves...") + # make an empty figure + fig = plot_errorbar(planetoid, filt_list=[]) + # operate on each filter in turn for filt in cli_args.filter_list: logger.info("fit {} filter data".format(filt)) @@ -95,127 +111,94 @@ def runAdler(cli_args): df_obs["outlier"] = [False] * len(df_obs) logger.info("{} observations retrieved".format(len(df_obs))) - # load and merge the previous obs - # TODO: replace this part with classifications loaded from adlerData - save_file = "{}/df_outlier_{}_{}.csv".format(cli_args.outpath, cli_args.ssObjectId, filt) - if os.path.isfile(save_file): - logger.info("load previously classified observations: {}".format(save_file)) - _df_obs = pd.read_csv(save_file, index_col=0) - df_obs = df_obs.merge(_df_obs, on=["diaSourceId", "midPointMjdTai"], how="left") - df_obs.loc[pd.isnull(df_obs["outlier_y"]), "outlier_y"] = ( - False # ensure that classifications exist (nan entries can only be false?). Weird behaviour here for g filter, is it to do with when new g obs appear relative to r/i etc? - ) - df_obs = df_obs.rename({"outlier_y": "outlier"}, axis=1) - df_obs = df_obs.drop("outlier_x", axis=1) - else: - logger.info("no previously classified observations to load") - - # define the date range to for new observations taken in the night to be analysed - logger.info( - "Most recent {} filter observation in query: date = {}".format( - filt, np.amax(df_obs["midPointMjdTai"]) - ) - ) - t1 = int(np.amax(df_obs["midPointMjdTai"])) + 1 - t0 = t1 - 1 - - # get all past observations - # mask = df_obs["midPointMjdTai"] < t0 - mask = (df_obs["midPointMjdTai"] < t0) & (df_obs["outlier"] == False) # reject any past outliers - - # split observations into "old" and "new" - df_obs_old = df_obs[(mask)] - df_obs_new = df_obs[~mask] - logger.info("Previous observations (date < {}): {}".format(t0, len(df_obs_old))) - logger.info("New observations ({} <= date < {}): {}".format(t0, t1, len(df_obs_new))) - # Determine the reference phase curve model # TODO: We would load the best phase curve model available in AdlerData here - # we need sufficient past observations to fit the phase curve model - if len(df_obs_old) < 2: - print("save {}".format(save_file)) - df_save = df_obs[obs_cols] - df_save.to_csv(save_file) - print("insufficient data, use default SSObject phase model and continue") - logger.info("insufficient data, use default SSObject phase model and continue") - - # use the default SSObject phase parameter if there is no better information - pc_dict = { - "H": sso.H * u.mag, - "H_err": sso.Herr * u.mag, - "phase_parameter_1": sso.G12, - "phase_parameter_1_err": sso.G12err, - "model_name": "HG12_Pen16", - } - adler_data.populate_phase_parameters(filt, **pc_dict) - continue - - # initial simple phase curve filter model with fixed G12 + # initial simple phase curve filter model with fixed phase_parameter + # use the ssObject value for H as initial guess, this is in HG12_Pen16 + # TODO: use the ssObject value for phase parameter as initial guess? pc = PhaseCurve( - H=sso.H * u.mag, - phase_parameter_1=0.62, - model_name="HG12_Pen16", + # H=sso.H * u.mag, + H=sso.H, + phase_parameter_1=phase_param_1_default, + model_name=phase_model, ) - # only fit G12 when sufficient data is available - if len(df_obs_old) < N_pc_fit: - pc.model_function.G12.fixed = True + # only fit phase_parameter when sufficient data is available + if len(df_obs) < N_pc_fit: + msg = "Do not fit {}, use {}={:.2f}".format( + phase_parameter_1, phase_parameter_1, pc.phase_parameter_1 + ) + logger.info(msg) + # pc.model_function.G12.fixed = True + getattr(pc.model_function, phase_parameter_1).fixed = True else: - pc.model_function.G12.fixed = False + # pc.model_function.G12.fixed = False + getattr(pc.model_function, phase_parameter_1).fixed = False - # do a HG12_Pen16 fit to the past data + # do a phase model fit to the past data pc_fit = pc.FitModel( - np.array(df_obs_old["phaseAngle"]) * u.deg, - np.array(df_obs_old["reduced_mag"]) * u.mag, - np.array(df_obs_old["magErr"]) * u.mag, + # np.array(df_obs["phaseAngle"]) * u.deg, + # np.array(df_obs["reduced_mag"]) * u.mag, + # np.array(df_obs["magErr"]) * u.mag, + np.radians(np.array(df_obs["phaseAngle"])), + np.array(df_obs["reduced_mag"]), + np.array(df_obs["magErr"]), ) pc_fit = pc.InitModelSbpy(pc_fit) - # TODO: Here the best fit should be pushed back to our AdlerData tables - adler_data.populate_phase_parameters(filt, **pc_fit.__dict__) + # Store the fitted values, and metadata, in an AdlerData object + ad_params = pc_fit.__dict__ + ad_params["phaseAngle_min"] = np.amin(df_obs["phaseAngle"]) # * u.deg + ad_params["phaseAngle_range"] = np.ptp(df_obs["phaseAngle"]) # * u.deg + ad_params["arc"] = np.ptp(df_obs["midPointMjdTai"]) # * u.d + ad_params["nobs"] = len(df_obs) + ad_params["modelFitMjd"] = Time.now().mjd + # adler_data.populate_phase_parameters(filt, **pc_fit.__dict__) + # TODO: replace any None with np.nan? e.g. phase_parameter_2? + adler_data.populate_phase_parameters(filt, **ad_params) - # find outliers in new data - # calculate data - model residuals - res = (np.array(df_obs_new["reduced_mag"]) * u.mag) - pc_fit.ReducedMag( - np.array(df_obs_new["phaseAngle"]) * u.deg - ) - outlier_flag = sci_utils.outlier_diff(res.value, diff_cut=diff_cut) - df_obs.loc[~mask, "outlier"] = outlier_flag - - # save the df_obs subset with outlier classification - df_save = df_obs[obs_cols] - print("save classifications: {}".format(save_file)) - logger.info("save classifications: {}".format(save_file)) - df_save.to_csv(save_file) + print(adler_data.get_phase_parameters_in_filter(filt, phase_model).__dict__) - # make a plot - fig = plot_errorbar(planetoid, filt_list=[]) + # add to plot ax1 = fig.axes[0] - ax1.scatter(df_obs_old["phaseAngle"], df_obs_old["reduced_mag"], c="C0") + # TODO: set colours based on filter + ax1.scatter(df_obs["phaseAngle"], df_obs["reduced_mag"]) alpha = np.linspace(0, np.amax(obs.phaseAngle)) * u.deg - ax1.plot(alpha.value, pc_fit.ReducedMag(alpha).value, label="t={}".format(int(t0))) - ax1.scatter( - df_obs_new["phaseAngle"], df_obs_new["reduced_mag"], edgecolor="r", facecolor="none", zorder=3 - ) - out_mask = df_obs["outlier"] == True - ax1.scatter( - df_obs.loc[out_mask]["phaseAngle"], - df_obs.loc[out_mask]["reduced_mag"], - c="r", - marker="x", - s=75, - zorder=3, + ax1.plot( + alpha.value, + # pc_fit.ReducedMag(alpha).value, + # label="{}, H={:.2f}, G12={:.2f}".format(filt, pc_fit.H.value, pc_fit.phase_parameter_1), + pc_fit.ReducedMag(alpha), + label="{}, H={:.2f}, {}={:.2f}".format( + filt, pc_fit.H, phase_parameter_1, pc_fit.phase_parameter_1 + ), ) - fig_file = "{}/plots/phase_curve_{}_{}_{}.png".format( - cli_args.outpath, cli_args.ssObjectId, filt, int(t0) + ax1.legend() + + # TODO: Use a CLI arg flag to open figure interactively instead of saving? + if cli_args.plot_show: + plt.show() + # Save figures at the outpath location + else: + fig_file = "{}/phase_curve_{}_{}_{}.png".format( + cli_args.outpath, cli_args.ssObjectId, phase_model, int(np.amax(df_obs["midPointMjdTai"])) ) - # TODO: make the plots folder if it does not already exist? - print("Save figure: {}".format(fig_file)) - logger.info("Save figure: {}".format(fig_file)) + msg = "Save figure: {}".format(fig_file) + print(msg) + logger.info(msg) fig = plot_errorbar(planetoid, fig=fig, filename=fig_file) # TODO: add titles with filter name? plt.close() + # Output adler values to a database if a db_name is provided + print(adler_data.__dict__) + if cli_args.db_name: + adler_db = "{}/{}".format(cli_args.outpath, cli_args.db_name) + msg = "write to {}".format(adler_db) + print(msg) + logger.info(msg) + adler_data.write_row_to_database(adler_db) + # analyse colours for the filters provided logger.info("Calculate colours: {}".format(cli_args.colour_list)) @@ -242,23 +225,13 @@ def runAdler(cli_args): logger.info("Determine {} colour".format(colour)) - # TODO: replace this with a colour loaded from adlerData - save_file_colour = "{}/df_colour_{}_{}.csv".format(cli_args.outpath, cli_args.ssObjectId, colour) - if os.path.isfile(save_file_colour): - print("load previous colours from file: {}".format(save_file_colour)) - df_col = pd.read_csv(save_file_colour, index_col=0) - # Check the last colour calculation date (x_obs) to avoid recalculation - obs = planetoid.observations_in_filter(filt_obs) - df_obs = pd.DataFrame(obs.__dict__) - if np.amax(df_col["midPointMjdTai"]) >= np.amax(df_obs["midPointMjdTai"]): - print("colour already calculated, skip") - continue - - else: - df_col = pd.DataFrame() - # determine the filt_obs - filt_ref colour # generate a plot + if cli_args.plot_show: + plot_dir = None + else: + plot_dir = cli_args.outpath + col_dict = col_obs_ref( planetoid, adler_data, @@ -268,21 +241,12 @@ def runAdler(cli_args): N_ref=N_ref, # x1 = x1, # x2 = x2, - plot_dir="{}/plots".format(cli_args.outpath), + plot_dir=plot_dir, + plot_show=cli_args.plot_show, ) print(col_dict) - # save the colour data - print("Append new colour and save to file: {}".format(save_file_colour)) - df_col = pd.concat([df_col, pd.DataFrame([col_dict])]) - df_col = df_col.reset_index(drop=True) - df_col.to_csv(save_file_colour) - - # TODO: reject unreliable colours, e.g. high colErr or delta_t_col - # TODO: determine if colour is outlying - # compare this new colour to previous colour(s) - def main(): parser = argparse.ArgumentParser(description="Runs Adler for select planetoid(s) and given user input.") @@ -327,24 +291,41 @@ def main(): optional_group.add_argument( "-o", "--outpath", - help="Output path location. Default is current working directory.", # TODO: make adler create the outpath directory on start up if it does not exist? Also the "plots" dir within? + help="Output path location. Default is current working directory.", # TODO: make adler create the outpath directory on start up if it does not exist? type=str, default="./", ) optional_group.add_argument( "-n", "--db_name", - help="Stem filename of output database. If this doesn't exist, it will be created. Default: adler_out.", + # help="Stem filename of output database. If this doesn't exist, it will be created. Default: adler_out.", + # type=str, + # default="adler_out", + help="Optional filename of output database, used to store Adler results in a db if provided.", type=str, - default="adler_out", + default=None, ) optional_group.add_argument( "-i", "--sql_filename", - help="Optional input path location of a sql database file containing observations", + help="Optional input path location of a sql database file containing observations.", type=str, default=None, ) + optional_group.add_argument( + "-p", + "--plot_show", + help="Optional flag to display plots interactively instead of saving to file.", + action="store_true", + ) + # TODO: Add a model_name parameter + optional_group.add_argument( + "-m", + "--phase_model", + help="Select the phase parameter model_name. LIST OPTIONS AND DEFAULT", + type=str, + default="HG12_Pen16", + ) args = parser.parse_args() diff --git a/src/adler/objectdata/AdlerData.py b/src/adler/objectdata/AdlerData.py index 72863d8..d1f94f1 100644 --- a/src/adler/objectdata/AdlerData.py +++ b/src/adler/objectdata/AdlerData.py @@ -15,6 +15,7 @@ "phase_parameter_1_err", "phase_parameter_2", "phase_parameter_2_err", + "modelFitMjd", ] ALL_FILTER_LIST = ["u", "g", "r", "i", "z", "y"] @@ -522,6 +523,7 @@ class PhaseModelDependentAdler: phase_parameter_1_err: float = np.nan phase_parameter_2: float = np.nan phase_parameter_2_err: float = np.nan + modelFitMjd: float = np.nan class PhaseParameterOutput: diff --git a/src/adler/science/Colour.py b/src/adler/science/Colour.py index 37c2d58..4042eb4 100644 --- a/src/adler/science/Colour.py +++ b/src/adler/science/Colour.py @@ -19,6 +19,7 @@ def col_obs_ref( x1=None, x2=None, plot_dir=None, + plot_show=False, ): """A function to calculate the colour of an Adler planetoid object. An observation in a given filter (filt_obs) is compared to previous observation in a reference filter (filt_ref). @@ -99,7 +100,8 @@ def col_obs_ref( x_obs = df_obs.iloc[i][x_col] y_obs = df_obs.iloc[i][y_col] yerr_obs = df_obs.iloc[i][yerr_col] - obsId = df_obs.iloc[i][obsId_col] + # obsId = df_obs.iloc[i][obsId_col] # NB: for some reason iloc here doesn't preserve the int64 dtype of obsId_col...? + obsId = df_obs[obsId_col].iloc[i] # select observations in the reference filter from before the new obs ref_mask = df_obs_ref[x_col] < x_obs @@ -112,11 +114,11 @@ def col_obs_ref( # select only the N_ref ref obs for comparison _df_obs_ref = df_obs_ref[ref_mask].iloc[-_N_ref:] - print(len(_df_obs_ref)) - print(np.array(_df_obs_ref[x_col])) + # print(len(_df_obs_ref)) + # print(np.array(_df_obs_ref[x_col])) if len(_df_obs_ref) == 0: - print("no reference observations") # TODO: add proper error handling and logging here - return df_obs + # print("insufficient reference observations") # TODO: add proper error handling and logging here + return # determine reference observation values y_ref = np.mean(_df_obs_ref[y_col]) # TODO: add option to choose statistic, e.g. mean or median? @@ -127,7 +129,9 @@ def col_obs_ref( # Create the colour dict col_dict = {} - col_dict[obsId_col] = obsId + col_dict[obsId_col] = np.int64( + obsId + ) # store id as an int to make sure it doesn't get stored as float e notation! col_dict[x_col] = x_obs col_dict[colour] = y_obs - y_ref col_dict[delta_t_col] = x_obs - x2_ref @@ -141,7 +145,7 @@ def col_obs_ref( # need to test error case where there are no r filter obs yet # TODO: add a plotting option? - if plot_dir: + if plot_dir or plot_show: fig = plt.figure() gs = gridspec.GridSpec(1, 1) ax1 = plt.subplot(gs[0, 0]) @@ -160,13 +164,16 @@ def col_obs_ref( ax1.legend() ax1.invert_yaxis() - fname = "{}/colour_plot_{}_{}-{}_{}.png".format( - plot_dir, planetoid.ssObjectId, filt_obs, filt_ref, int(x_obs) - ) - print("Save figure: {}".format(fname)) - plt.savefig(fname, facecolor="w", transparent=True, bbox_inches="tight") - - # plt.show() # TODO: add option to display figure, or to return the fig object? - plt.close() + if plot_dir: + fname = "{}/colour_plot_{}_{}-{}_{}_{}.png".format( + plot_dir, planetoid.ssObjectId, filt_obs, filt_ref, phase_model, int(x_obs) + ) + print("Save figure: {}".format(fname)) + plt.savefig(fname, facecolor="w", transparent=True, bbox_inches="tight") + + if plot_show: + plt.show() # TODO: add option to display figure, or to return the fig object? + else: + plt.close() return col_dict diff --git a/src/adler/utilities/AdlerCLIArguments.py b/src/adler/utilities/AdlerCLIArguments.py index 4b6e7bd..3a717b3 100644 --- a/src/adler/utilities/AdlerCLIArguments.py +++ b/src/adler/utilities/AdlerCLIArguments.py @@ -25,6 +25,8 @@ def __init__(self, args): self.outpath = args.outpath self.db_name = args.db_name self.sql_filename = args.sql_filename + self.plot_show = args.plot_show + self.phase_model = args.phase_model self.validate_arguments() @@ -47,10 +49,14 @@ def validate_arguments(self): if self.colour_list: self._validate_colour_list() + if self.phase_model: + self._validate_phase_model() + def _validate_filter_list(self): """Validation checks for the filter_list command-line argument.""" expected_filters = ["u", "g", "r", "i", "z", "y"] + # TODO: more informative error message, show an example of required filter_list format if not set(self.filter_list).issubset(expected_filters): logging.error( "Unexpected filters found in --filter_list command-line argument. --filter_list must be a list of LSST filters." @@ -148,3 +154,16 @@ def _validate_sql_filename(self): raise ValueError( "The file supplied for the command-line argument --sql_filename cannot be found." ) + + def _validate_phase_model(self): + """Validation checks for the phase_model command-line argument.""" + expected_models = ["HG", "HG1G2", "HG12", "HG12_Pen16", "LinearPhaseFunc"] + err_msg_model = ( + "Unexpected model in --phase_model command-line arguments. Please select from {}".format( + expected_models + ) + ) + + if self.phase_model not in expected_models: + logging.error(err_msg_model) + raise ValueError(err_msg_model) diff --git a/src/adler/utilities/science_utilities.py b/src/adler/utilities/science_utilities.py index 90ff1df..7bcf46a 100644 --- a/src/adler/utilities/science_utilities.py +++ b/src/adler/utilities/science_utilities.py @@ -78,7 +78,7 @@ def zero_func(x, axis=None): return 0 -def sigma_clip(data_res, kwargs={"maxiters": 1, "cenfunc": zero_func}): +def sigma_clip(data_res, **kwargs): """Wrapper function for astropy.stats.sigma_clip, here we define the default centre of the data (the data - model residuals) to be zero Parameters @@ -87,7 +87,11 @@ def sigma_clip(data_res, kwargs={"maxiters": 1, "cenfunc": zero_func}): data_res: array The residuals of the data compared to the model. kwargs: dict - Dictionary of keyword arguments from astropy.stats.sigma_clip + Dictionary of keyword arguments from astropy.stats.sigma_clip, namely: + sigma : default 3 + maxiters: default 5 + cenfunc: default 'median' + Returns ----------- @@ -206,7 +210,9 @@ def get_df_obs_filt(planetoid, filt, x_col="midPointMjdTai", x1=None, x2=None, c if "AbsMag" in col_list: # calculate the model absolute magnitude # TODO: add robustness to the units, phaseAngle and reduced_mag must match pc_model - df_obs["AbsMag"] = pc_model.AbsMag(obs.phaseAngle * u.deg, obs.reduced_mag * u.mag).value + # For now we must assume that there are no units, and that degrees have been passed... + # df_obs["AbsMag"] = pc_model.AbsMag(obs.phaseAngle * u.deg, obs.reduced_mag * u.mag).value + df_obs["AbsMag"] = pc_model.AbsMag(np.radians(obs.phaseAngle), obs.reduced_mag) # select only the required columns df_obs = df_obs[col_list] diff --git a/tests/adler/objectdata/test_AdlerData.py b/tests/adler/objectdata/test_AdlerData.py index dd4af55..4751521 100644 --- a/tests/adler/objectdata/test_AdlerData.py +++ b/tests/adler/objectdata/test_AdlerData.py @@ -4,219 +4,394 @@ import pandas as pd import sqlite3 -from numpy.testing import assert_array_equal +from numpy.testing import assert_array_equal, assert_almost_equal from adler.objectdata.AdlerData import AdlerData from adler.utilities.tests_utilities import get_test_data_filepath # setting up the AdlerData object to be used for testing -test_object = AdlerData("8268570668335894776", ["u", "g", "r"]) - -u_model_1 = { - "model_name": "model_1", - "phaseAngle_min": 11.0, - "phaseAngle_range": 12.0, - "nobs": 13, - "arc": 14.0, - "H": 15.0, - "H_err": 16.0, - "phase_parameter_1": 17.0, - "phase_parameter_1_err": 18.0, -} -u_model_2 = { - "model_name": "model_2", - "H": 25.0, - "H_err": 26.0, - "phase_parameter_1": 27.0, - "phase_parameter_1_err": 28.0, - "phase_parameter_2": 29.0, - "phase_parameter_2_err": 30.0, -} +test_object = AdlerData("8268570668335894776", ["g", "r", "i"]) +model_1 = "HG12_Pen16" +model_2 = "HG" +# test data was generated in the tests/data dir using: +# python adler_run_test_data.py +# the output is copied here g_model_1 = { - "model_name": "model_1", - "phaseAngle_min": 31.0, - "phaseAngle_range": 32.0, - "nobs": 33, - "arc": 34.0, - "H": 35.0, - "H_err": 36.0, - "phase_parameter_1": 37.0, - "phase_parameter_1_err": 38.0, + "filter_name": "g", + "phaseAngle_min": 27.426668167114258, + "phaseAngle_range": 36.17388343811035, + "nobs": 9, + "arc": 3306.0079999999944, + "model_name": "HG12_Pen16", + "H": 20.719465178426468, + "H_err": 0.018444134023977106, + "phase_parameter_1": 0.62, + "phase_parameter_1_err": np.nan, + "phase_parameter_2": np.nan, + "phase_parameter_2_err": np.nan, +} +r_model_1 = { + "filter_name": "r", + "phaseAngle_min": 2.553332567214966, + "phaseAngle_range": 124.23803400993347, + "nobs": 38, + "arc": 3338.0655999999944, + "model_name": "HG12_Pen16", + "H": 19.92863542616601, + "H_err": 0.018525355171274356, + "phase_parameter_1": 1.0, + "phase_parameter_1_err": 0.05300829494059732, + "phase_parameter_2": np.nan, + "phase_parameter_2_err": np.nan, +} +i_model_1 = { + "filter_name": "i", + "phaseAngle_min": 10.0595064163208, + "phaseAngle_range": 101.74150371551514, + "nobs": 32, + "arc": 3342.0585599999977, + "model_name": "HG12_Pen16", + "H": 19.653155995892117, + "H_err": 0.01415178691419005, + "phase_parameter_1": 1.0, + "phase_parameter_1_err": 0.01516569963597136, + "phase_parameter_2": np.nan, + "phase_parameter_2_err": np.nan, +} +g_model_2 = { + "filter_name": "g", + "phaseAngle_min": 27.426668167114258, + "phaseAngle_range": 36.17388343811035, + "nobs": 9, + "arc": 3306.0079999999944, + "model_name": "HG", + "H": 20.72954332871528, + "H_err": 0.018444134023977106, + "phase_parameter_1": 0.15, + "phase_parameter_1_err": np.nan, + "phase_parameter_2": np.nan, + "phase_parameter_2_err": np.nan, } - r_model_2 = { - "model_name": "model_2", - "phaseAngle_min": 41.0, - "phaseAngle_range": 42.0, - "nobs": 43, - "arc": 44.0, - "H": 45.0, - "H_err": 46.0, - "phase_parameter_1": 47.0, - "phase_parameter_1_err": 48.0, - "phase_parameter_2": 49.0, - "phase_parameter_2_err": 50.0, + "filter_name": "r", + "phaseAngle_min": 2.553332567214966, + "phaseAngle_range": 124.23803400993347, + "nobs": 38, + "arc": 3338.0655999999944, + "model_name": "HG", + "H": 18.879873513575124, + "H_err": 0.007456882346021157, + "phase_parameter_1": -0.253, + "phase_parameter_1_err": 1.6701987135262256e-05, + "phase_parameter_2": np.nan, + "phase_parameter_2_err": np.nan, +} +i_model_2 = { + "filter_name": "i", + "phaseAngle_min": 10.0595064163208, + "phaseAngle_range": 101.74150371551514, + "nobs": 32, + "arc": 3342.0585599999977, + "model_name": "HG", + "H": 18.628894992991583, + "H_err": 0.01716803531553185, + "phase_parameter_1": -0.253, + "phase_parameter_1_err": 0.00014324314956736168, + "phase_parameter_2": np.nan, + "phase_parameter_2_err": np.nan, } -test_object.populate_phase_parameters("u", **u_model_1) -test_object.populate_phase_parameters("u", **u_model_2) -test_object.populate_phase_parameters("g", **g_model_1) -test_object.populate_phase_parameters("r", **r_model_2) +# u_model_1 = { +# "model_name": "model_1", +# "phaseAngle_min": 11.0, +# "phaseAngle_range": 12.0, +# "nobs": 13, +# "arc": 14.0, +# "H": 15.0, +# "H_err": 16.0, +# "phase_parameter_1": 17.0, +# "phase_parameter_1_err": 18.0, +# } + +# u_model_2 = { +# "model_name": "model_2", +# "H": 25.0, +# "H_err": 26.0, +# "phase_parameter_1": 27.0, +# "phase_parameter_1_err": 28.0, +# "phase_parameter_2": 29.0, +# "phase_parameter_2_err": 30.0, +# } + +# g_model_1 = { +# "model_name": "model_1", +# "phaseAngle_min": 31.0, +# "phaseAngle_range": 32.0, +# "nobs": 33, +# "arc": 34.0, +# "H": 35.0, +# "H_err": 36.0, +# "phase_parameter_1": 37.0, +# "phase_parameter_1_err": 38.0, +# } + +# r_model_2 = { +# "model_name": "model_2", +# "phaseAngle_min": 41.0, +# "phaseAngle_range": 42.0, +# "nobs": 43, +# "arc": 44.0, +# "H": 45.0, +# "H_err": 46.0, +# "phase_parameter_1": 47.0, +# "phase_parameter_1_err": 48.0, +# "phase_parameter_2": 49.0, +# "phase_parameter_2_err": 50.0, +# } + +_g_model_1 = g_model_1.copy() +del _g_model_1["filter_name"] +_g_model_2 = g_model_2.copy() +del _g_model_2["filter_name"] +_r_model_1 = r_model_1.copy() +del _r_model_1["filter_name"] +_i_model_2 = i_model_2.copy() +del _i_model_2["filter_name"] + +test_object.populate_phase_parameters("g", **_g_model_1) +test_object.populate_phase_parameters("g", **_g_model_2) +test_object.populate_phase_parameters("r", **_r_model_1) +test_object.populate_phase_parameters("i", **_i_model_2) def test_populate_phase_parameters(): # test to make sure the object is correctly populated - assert test_object.filter_list == ["u", "g", "r"] - - assert test_object.filter_dependent_values[0].model_list == ["model_1", "model_2"] - assert test_object.filter_dependent_values[1].model_list == ["model_1"] - assert test_object.filter_dependent_values[2].model_list == ["model_2"] - - assert_array_equal([a.phaseAngle_min for a in test_object.filter_dependent_values], [11.0, 31.0, 41.0]) - assert_array_equal([a.phaseAngle_range for a in test_object.filter_dependent_values], [12.0, 32.0, 42.0]) - assert_array_equal([a.nobs for a in test_object.filter_dependent_values], [13, 33, 43]) - assert_array_equal([a.arc for a in test_object.filter_dependent_values], [14.0, 34.0, 44.0]) - - assert test_object.filter_dependent_values[0].model_dependent_values[0].__dict__ == { - "filter_name": "u", - "model_name": "model_1", - "H": 15.0, - "H_err": 16.0, - "phase_parameter_1": 17.0, - "phase_parameter_1_err": 18.0, - "phase_parameter_2": np.nan, - "phase_parameter_2_err": np.nan, - } - - assert test_object.filter_dependent_values[0].model_dependent_values[1].__dict__ == { - "filter_name": "u", - "model_name": "model_2", - "H": 25.0, - "H_err": 26.0, - "phase_parameter_1": 27.0, - "phase_parameter_1_err": 28.0, - "phase_parameter_2": 29.0, - "phase_parameter_2_err": 30.0, - } - - assert test_object.filter_dependent_values[1].model_dependent_values[0].__dict__ == { - "filter_name": "g", - "model_name": "model_1", - "H": 35.0, - "H_err": 36.0, - "phase_parameter_1": 37.0, - "phase_parameter_1_err": 38.0, - "phase_parameter_2": np.nan, - "phase_parameter_2_err": np.nan, - } - - assert test_object.filter_dependent_values[2].model_dependent_values[0].__dict__ == { - "filter_name": "r", - "model_name": "model_2", - "H": 45.0, - "H_err": 46.0, - "phase_parameter_1": 47.0, - "phase_parameter_1_err": 48.0, - "phase_parameter_2": 49.0, - "phase_parameter_2_err": 50.0, - } + assert test_object.filter_list == ["g", "r", "i"] + + assert test_object.filter_dependent_values[0].model_list == [model_1, model_2] + assert test_object.filter_dependent_values[1].model_list == [model_1] + assert test_object.filter_dependent_values[2].model_list == [model_2] + + assert_almost_equal( + [a.phaseAngle_min for a in test_object.filter_dependent_values], + [g_model_1["phaseAngle_min"], r_model_1["phaseAngle_min"], i_model_1["phaseAngle_min"]], + ) + assert_array_equal( + [a.phaseAngle_range for a in test_object.filter_dependent_values], + [g_model_1["phaseAngle_range"], r_model_1["phaseAngle_range"], i_model_1["phaseAngle_range"]], + ) + assert_array_equal( + [a.nobs for a in test_object.filter_dependent_values], + [g_model_1["nobs"], r_model_1["nobs"], i_model_1["nobs"]], + ) + assert_array_equal( + [a.arc for a in test_object.filter_dependent_values], + [g_model_1["arc"], r_model_1["arc"], i_model_1["arc"]], + ) + + # assert test_object.filter_dependent_values[0].model_dependent_values[0].__dict__ == g_model_1 + # { + # "filter_name": "u", + # "model_name": "model_1", + # "H": 15.0, + # "H_err": 16.0, + # "phase_parameter_1": 17.0, + # "phase_parameter_1_err": 18.0, + # "phase_parameter_2": np.nan, + # "phase_parameter_2_err": np.nan, + # } + + # assert test_object.filter_dependent_values[0].model_dependent_values[1].__dict__ == g_model_2 + # { + # "filter_name": "u", + # "model_name": "model_2", + # "H": 25.0, + # "H_err": 26.0, + # "phase_parameter_1": 27.0, + # "phase_parameter_1_err": 28.0, + # "phase_parameter_2": 29.0, + # "phase_parameter_2_err": 30.0, + # } + + # assert test_object.filter_dependent_values[1].model_dependent_values[0].__dict__ == r_model_1 + # { + # "filter_name": "g", + # "model_name": "model_1", + # "H": 35.0, + # "H_err": 36.0, + # "phase_parameter_1": 37.0, + # "phase_parameter_1_err": 38.0, + # "phase_parameter_2": np.nan, + # "phase_parameter_2_err": np.nan, + # } + + # assert test_object.filter_dependent_values[2].model_dependent_values[0].__dict__ == i_model_2 + # { + # "filter_name": "r", + # "model_name": "model_2", + # "H": 45.0, + # "H_err": 46.0, + # "phase_parameter_1": 47.0, + # "phase_parameter_1_err": 48.0, + # "phase_parameter_2": 49.0, + # "phase_parameter_2_err": 50.0, + # } + + test_dict_list = [ + test_object.get_phase_parameters_in_filter("g", model_1).__dict__, + test_object.get_phase_parameters_in_filter("g", model_2).__dict__, + test_object.get_phase_parameters_in_filter("r", model_1).__dict__, + test_object.get_phase_parameters_in_filter("i", model_2).__dict__, + ] + expect_dict_list = [g_model_1, g_model_2, r_model_1, i_model_2] + + for test_dict, expect_dict in zip(test_dict_list, expect_dict_list): + for x in test_dict.keys(): + # print(x) + test_val = test_dict[x] + expect_val = expect_dict[x] + if type(expect_val) == str: + assert test_val == expect_val + else: + assert_almost_equal(test_val, expect_val) # check to make sure model-dependent parameter is correctly updated (then return it to previous) - test_object.populate_phase_parameters("u", model_name="model_1", H=99.0) - assert test_object.filter_dependent_values[0].model_dependent_values[0].H == 99.0 - test_object.populate_phase_parameters("u", model_name="model_1", H=15.0) + test_object.populate_phase_parameters("g", model_name=model_1, H=99.0) + assert test_object.get_phase_parameters_in_filter("g", model_1).H == 99.0 + test_object.populate_phase_parameters("g", model_name=model_1, H=g_model_1["H"]) # check to make sure filter-dependent parameter is correctly updated (then return it to previous) - test_object.populate_phase_parameters("u", nobs=99) - assert test_object.filter_dependent_values[0].nobs == 99 - test_object.populate_phase_parameters("u", nobs=13) + # print(test_object.__dict__) + test_object.populate_phase_parameters("r", model_name=model_1, nobs=99) + # print(test_object.__dict__) + # print(r_model_1["nobs"]) + # print(test_object.get_phase_parameters_in_filter("r",model_1).__dict__) + # print(test_object.get_phase_parameters_in_filter("r",model_1).nobs) + # assert test_object.filter_dependent_values[0].nobs == 99 # TODO: the indexing in this line does not access the correct filter + assert test_object.get_phase_parameters_in_filter("r", model_1).nobs == 99 + # print(r_model_1["nobs"]) + test_object.populate_phase_parameters("r", model_name=model_1, nobs=r_model_1["nobs"]) + # print(test_object.get_phase_parameters_in_filter("r",model_1).nobs) # testing to make sure the correct error messages trigger + test_filt = "y" with pytest.raises(ValueError) as error_info_1: - test_object.populate_phase_parameters("y") + test_object.populate_phase_parameters(test_filt) - assert error_info_1.value.args[0] == "Filter y does not exist in AdlerData.filter_list." + assert error_info_1.value.args[0] == "Filter {} does not exist in AdlerData.filter_list.".format( + test_filt + ) with pytest.raises(NameError) as error_info_2: - test_object.populate_phase_parameters("u", H=4.0) + test_object.populate_phase_parameters("r", H=4.0) assert error_info_2.value.args[0] == "No model name given. Cannot update model-specific phase parameters." def test_get_phase_parameters_in_filter(): - assert test_object.get_phase_parameters_in_filter("u", "model_1").__dict__ == { - "filter_name": "u", - "phaseAngle_min": 11.0, - "phaseAngle_range": 12.0, - "nobs": 13, - "arc": 14.0, - "model_name": "model_1", - "H": 15.0, - "H_err": 16.0, - "phase_parameter_1": 17.0, - "phase_parameter_1_err": 18.0, - "phase_parameter_2": np.nan, - "phase_parameter_2_err": np.nan, - } - - assert test_object.get_phase_parameters_in_filter("u", "model_2").__dict__ == { - "filter_name": "u", - "phaseAngle_min": 11.0, - "phaseAngle_range": 12.0, - "nobs": 13, - "arc": 14.0, - "model_name": "model_2", - "H": 25.0, - "H_err": 26.0, - "phase_parameter_1": 27.0, - "phase_parameter_1_err": 28.0, - "phase_parameter_2": 29.0, - "phase_parameter_2_err": 30.0, - } - - assert test_object.get_phase_parameters_in_filter("g", "model_1").__dict__ == { - "filter_name": "g", - "phaseAngle_min": 31.0, - "phaseAngle_range": 32.0, - "nobs": 33, - "arc": 34.0, - "model_name": "model_1", - "H": 35.0, - "H_err": 36.0, - "phase_parameter_1": 37.0, - "phase_parameter_1_err": 38.0, - "phase_parameter_2": np.nan, - "phase_parameter_2_err": np.nan, - } - - assert test_object.get_phase_parameters_in_filter("r", "model_2").__dict__ == { - "filter_name": "r", - "phaseAngle_min": 41.0, - "phaseAngle_range": 42.0, - "nobs": 43, - "arc": 44.0, - "model_name": "model_2", - "H": 45.0, - "H_err": 46.0, - "phase_parameter_1": 47.0, - "phase_parameter_1_err": 48.0, - "phase_parameter_2": 49.0, - "phase_parameter_2_err": 50.0, - } + # assert test_object.get_phase_parameters_in_filter("g", model_1).__dict__ == g_model_1 + # { + # "filter_name": "u", + # "phaseAngle_min": 11.0, + # "phaseAngle_range": 12.0, + # "nobs": 13, + # "arc": 14.0, + # "model_name": "model_1", + # "H": 15.0, + # "H_err": 16.0, + # "phase_parameter_1": 17.0, + # "phase_parameter_1_err": 18.0, + # "phase_parameter_2": np.nan, + # "phase_parameter_2_err": np.nan, + # } + + # assert test_object.get_phase_parameters_in_filter("g", model_2).__dict__ == g_model_2 + # { + # "filter_name": "u", + # "phaseAngle_min": 11.0, + # "phaseAngle_range": 12.0, + # "nobs": 13, + # "arc": 14.0, + # "model_name": "model_2", + # "H": 25.0, + # "H_err": 26.0, + # "phase_parameter_1": 27.0, + # "phase_parameter_1_err": 28.0, + # "phase_parameter_2": 29.0, + # "phase_parameter_2_err": 30.0, + # } + + # assert test_object.get_phase_parameters_in_filter("r", model_1).__dict__ == r_model_1 + # { + # "filter_name": "g", + # "phaseAngle_min": 31.0, + # "phaseAngle_range": 32.0, + # "nobs": 33, + # "arc": 34.0, + # "model_name": "model_1", + # "H": 35.0, + # "H_err": 36.0, + # "phase_parameter_1": 37.0, + # "phase_parameter_1_err": 38.0, + # "phase_parameter_2": np.nan, + # "phase_parameter_2_err": np.nan, + # } + + # assert test_object.get_phase_parameters_in_filter("i", model_2).__dict__ == i_model_2 + # { + # "filter_name": "r", + # "phaseAngle_min": 41.0, + # "phaseAngle_range": 42.0, + # "nobs": 43, + # "arc": 44.0, + # "model_name": "model_2", + # "H": 45.0, + # "H_err": 46.0, + # "phase_parameter_1": 47.0, + # "phase_parameter_1_err": 48.0, + # "phase_parameter_2": 49.0, + # "phase_parameter_2_err": 50.0, + # } + + test_dict_list = [ + test_object.get_phase_parameters_in_filter("g", model_1).__dict__, + test_object.get_phase_parameters_in_filter("g", model_2).__dict__, + test_object.get_phase_parameters_in_filter("r", model_1).__dict__, + test_object.get_phase_parameters_in_filter("i", model_2).__dict__, + ] + expect_dict_list = [g_model_1, g_model_2, r_model_1, i_model_2] + + for test_dict, expect_dict in zip(test_dict_list, expect_dict_list): + for x in test_dict.keys(): + # print(x) + test_val = test_dict[x] + expect_val = expect_dict[x] + if type(expect_val) == str: + assert test_val == expect_val + else: + assert_almost_equal(test_val, expect_val) # checking the error messages + test_filt = "f" with pytest.raises(ValueError) as error_info_1: - error_dict = test_object.get_phase_parameters_in_filter("f", model_name="model_2") + error_dict = test_object.get_phase_parameters_in_filter(test_filt, model_name=model_2) - assert error_info_1.value.args[0] == "Filter f does not exist in AdlerData.filter_list." + assert error_info_1.value.args[0] == "Filter {} does not exist in AdlerData.filter_list.".format( + test_filt + ) + test_filt = "r" with pytest.raises(ValueError) as error_info_2: - error_dict_2 = test_object.get_phase_parameters_in_filter("r", model_name="model_1") + error_dict_2 = test_object.get_phase_parameters_in_filter(test_filt, model_name=model_2) - assert error_info_2.value.args[0] == "Model model_1 does not exist for filter r in AdlerData.model_lists." + print(error_info_2.value.args[0]) + print("Model {} does not exist for filter {} in AdlerData.model_lists.".format(model_2, test_filt)) + assert error_info_2.value.args[ + 0 + ] == "Model {} does not exist for filter {} in AdlerData.model_lists.".format(model_2, test_filt) # here the capsys fixture captures any output to the terminal @@ -226,13 +401,17 @@ def test_print_data(capsys): # get what was printed to the terminal captured = capsys.readouterr() - expected = "Filter: u\nPhase angle minimum: 11.0\nPhase angle range: 12.0\nNumber of observations: 13\nArc: 14.0\nModel: model_1.\n\tH: 15.0\n\tH error: 16.0\n\tPhase parameter 1: 17.0\n\tPhase parameter 1 error: 18.0\n\tPhase parameter 2: nan\n\tPhase parameter 2 error: nan\nModel: model_2.\n\tH: 25.0\n\tH error: 26.0\n\tPhase parameter 1: 27.0\n\tPhase parameter 1 error: 28.0\n\tPhase parameter 2: 29.0\n\tPhase parameter 2 error: 30.0\n\n\nFilter: g\nPhase angle minimum: 31.0\nPhase angle range: 32.0\nNumber of observations: 33\nArc: 34.0\nModel: model_1.\n\tH: 35.0\n\tH error: 36.0\n\tPhase parameter 1: 37.0\n\tPhase parameter 1 error: 38.0\n\tPhase parameter 2: nan\n\tPhase parameter 2 error: nan\n\n\nFilter: r\nPhase angle minimum: 41.0\nPhase angle range: 42.0\nNumber of observations: 43\nArc: 44.0\nModel: model_2.\n\tH: 45.0\n\tH error: 46.0\n\tPhase parameter 1: 47.0\n\tPhase parameter 1 error: 48.0\n\tPhase parameter 2: 49.0\n\tPhase parameter 2 error: 50.0\n\n\n" + expected = "Filter: g\nPhase angle minimum: 27.426668167114258\nPhase angle range: 36.17388343811035\nNumber of observations: 9\nArc: 3306.0079999999944\nModel: HG12_Pen16.\n\tH: 20.719465178426468\n\tH error: 0.018444134023977106\n\tPhase parameter 1: 0.62\n\tPhase parameter 1 error: nan\n\tPhase parameter 2: nan\n\tPhase parameter 2 error: nan\nModel: HG.\n\tH: 20.72954332871528\n\tH error: 0.018444134023977106\n\tPhase parameter 1: 0.15\n\tPhase parameter 1 error: nan\n\tPhase parameter 2: nan\n\tPhase parameter 2 error: nan\n\n\nFilter: r\nPhase angle minimum: 2.553332567214966\nPhase angle range: 124.23803400993347\nNumber of observations: 38\nArc: 3338.0655999999944\nModel: HG12_Pen16.\n\tH: 19.92863542616601\n\tH error: 0.018525355171274356\n\tPhase parameter 1: 1.0\n\tPhase parameter 1 error: 0.05300829494059732\n\tPhase parameter 2: nan\n\tPhase parameter 2 error: nan\n\n\nFilter: i\nPhase angle minimum: 10.0595064163208\nPhase angle range: 101.74150371551514\nNumber of observations: 32\nArc: 3342.0585599999977\nModel: HG.\n\tH: 18.628894992991583\n\tH error: 0.01716803531553185\n\tPhase parameter 1: -0.253\n\tPhase parameter 1 error: 0.00014324314956736168\n\tPhase parameter 2: nan\n\tPhase parameter 2 error: nan\n\n\n" + # print(captured) + # print(expected) assert captured.out == expected def test_write_row_to_database(tmp_path): db_location = os.path.join(tmp_path, "test_AdlerData_database.db") + # print(db_location) + # print(test_object.__dict__) test_object.write_row_to_database(db_location) con = sqlite3.connect(db_location) @@ -240,17 +419,33 @@ def test_write_row_to_database(tmp_path): con.close() expected_data_filepath = get_test_data_filepath("test_SQL_database_table.csv") - expected_data = pd.read_csv(expected_data_filepath) + expected_data = pd.read_csv(expected_data_filepath, index_col=0) # we don't expect the timestamp column to be the same, obviously - expected_data = expected_data.drop(columns="timestamp") - written_data = written_data.drop(columns="timestamp") + drop_cols = [x for x in written_data if (x == "timestamp") or ("modelFitMjd" in x)] + expected_data = expected_data.drop(columns=drop_cols) + written_data = written_data.drop(columns=drop_cols) # note that because I'm using Pandas there's some small dtype and np.nan/None stuff to clear up # but this makes for a quick streamlined test anyway - expected_data = expected_data.replace({np.nan: None}) + # expected_data = expected_data.replace({np.nan: None}) # TODO: fix weirdness between nan and None? + written_data = written_data.replace({None: np.nan}) # TODO: fix weirdness between nan and None? expected_data = expected_data.astype({"ssObjectId": str}) - pd.testing.assert_frame_equal(expected_data, written_data, check_dtype=False) + # print(expected_data.dtypes) + # print(written_data.dtypes) + # print(expected_data.iloc[0].to_dict()) + # print(written_data.iloc[0].to_dict()) + for x in written_data.columns: + # print(x) + # print(expected_data.iloc[0][x],written_data.iloc[0][x]) + # print(type(expected_data.iloc[0][x]),type(written_data.iloc[0][x])) + # print(expected_data.iloc[0][x]==written_data.iloc[0][x]) + if type(written_data.iloc[0][x]) == str: + assert expected_data.iloc[0][x] == written_data.iloc[0][x] + else: + assert_almost_equal(expected_data.iloc[0][x], written_data.iloc[0][x]) + # pd.testing.assert_frame_equal(expected_data, written_data, check_dtype=False) + # assert expected_data.iloc[0].to_dict() == pytest.approx(written_data.iloc[0].to_dict()) def test_read_row_from_database(): @@ -259,28 +454,50 @@ def test_read_row_from_database(): db_location = get_test_data_filepath("test_AdlerData_database.db") - new_object = AdlerData("8268570668335894776", ["u", "g", "r"]) + new_object = AdlerData("8268570668335894776", ["g", "r", "i"]) new_object.populate_from_database(db_location) - assert new_object.__dict__ == test_object.__dict__ + # print(new_object.__dict__) + # print(test_object.__dict__) + # assert new_object.__dict__ == test_object.__dict__ + + for filt, model in zip(["g", "g", "r", "i"], [model_1, model_2, model_1, model_2]): + # print(filt,model) + new = new_object.get_phase_parameters_in_filter(filt, model) + # print(new.__dict__) + test = test_object.get_phase_parameters_in_filter(filt, model) + # print(test.__dict__) + # assert new.__dict__ == test.__dict__ + # assert new.__dict__ == pytest.approx(test.__dict__) + + for x in new.__dict__.keys(): + print(x) + new_val = new.__dict__[x] + test_val = test.__dict__[x] + print(new_val, test_val) + if type(new_val) == str: + assert new_val == test_val + else: + assert_almost_equal(new_val, test_val) with pytest.raises(ValueError) as error_info_1: - empty_data = AdlerData("pretend_object", ["u", "g", "r"]) + empty_data = AdlerData("pretend_object", ["g", "r", "i"]) empty_data.populate_from_database(db_location) assert error_info_1.value.args[0] == "No data found in this database for the supplied ssObjectId." with pytest.raises(ValueError) as error_info_2: - bad_filter = AdlerData("8268570668335894776", ["u", "g", "h"]) + bad_filter = AdlerData("8268570668335894776", ["g", "r", "h"]) bad_filter.populate_from_database(db_location) - assert ( - error_info_2.value.args[0] - == "Data does not exist for some of the requested filters in this database. Filters in database for this object: ['u', 'g', 'r']" + assert error_info_2.value.args[ + 0 + ] == "Data does not exist for some of the requested filters in this database. Filters in database for this object: {}".format( + test_object.filter_list ) with pytest.raises(ValueError) as error_info_3: - bad_filter = AdlerData("8268570668335894776", ["u", "g", "h"]) + bad_filter = AdlerData("8268570668335894776", ["g", "r", "h"]) bad_filter.populate_from_database("./dummy_location.db") assert error_info_3.value.args[0] == "Database cannot be found at given filepath." diff --git a/tests/adler/objectdata/test_AdlerPlanetoid.py b/tests/adler/objectdata/test_AdlerPlanetoid.py index 4716b64..1bdbaa5 100644 --- a/tests/adler/objectdata/test_AdlerPlanetoid.py +++ b/tests/adler/objectdata/test_AdlerPlanetoid.py @@ -6,7 +6,7 @@ from adler.objectdata.AdlerPlanetoid import AdlerPlanetoid -ssoid = 8268570668335894776 +ssoid = "8268570668335894776" test_db_path = get_test_data_filepath("testing_database.db") @@ -160,25 +160,43 @@ def test_failed_SQL_queries(): def test_attach_previous_adlerdata(): test_planetoid = AdlerPlanetoid.construct_from_SQL(ssoid, test_db_path, filter_list=["g", "r"]) + # TODO: this setup is currently a bit dodgy. Because AdlerData write_row_to_database appends a new line to the db the most recent model for a given object may be nan in that row + # as such this test depends on the order/number of times the adler commands have been run to make it + + # the test database can be recreated by running the Adler commands in the tests/data dir: + # adler -s 8268570668335894776 -i testing_database.db -n test_AdlerData_database.db + # adler -s 8268570668335894776 -i testing_database.db -n test_AdlerData_database.db -m HG db_location = get_test_data_filepath("test_AdlerData_database.db") + print(db_location) test_planetoid.attach_previous_adler_data(db_location) - test_output = test_planetoid.PreviousAdlerData.get_phase_parameters_in_filter("g", "model_1") + test_output = test_planetoid.PreviousAdlerData.get_phase_parameters_in_filter("r", "HG12_Pen16") + print(test_output.__dict__) expected_output = { - "filter_name": "g", - "phaseAngle_min": 31.0, - "phaseAngle_range": 32.0, - "nobs": 33, - "arc": 34.0, - "model_name": "model_1", - "H": 35.0, - "H_err": 36.0, - "phase_parameter_1": 37.0, - "phase_parameter_1_err": 38.0, + "filter_name": "r", + "phaseAngle_min": 2.553332567214966, + "phaseAngle_range": 124.23803400993347, + "nobs": 38, + "arc": 3338.0655999999944, + "model_name": "HG12_Pen16", + "H": 19.92863542616601, + "H_err": 0.018525355171274356, + "phase_parameter_1": 1.0, + "phase_parameter_1_err": 0.05300829494059732, "phase_parameter_2": np.nan, "phase_parameter_2_err": np.nan, } - - assert test_output.__dict__ == expected_output + print(expected_output) + + # assert test_output.__dict__ == pytest.approx(expected_output) # TODO: why isn't this working now? + + for x in test_output.__dict__.keys(): + print(x) + test_val = test_output.__dict__[x] + expect_val = expected_output[x] + if type(expect_val) == str: + assert test_val == expect_val + else: + assert_almost_equal(test_val, expect_val) diff --git a/tests/adler/science/test_Colour.py b/tests/adler/science/test_Colour.py index 307eb68..2240762 100644 --- a/tests/adler/science/test_Colour.py +++ b/tests/adler/science/test_Colour.py @@ -53,11 +53,18 @@ H = sso.H G12 = sso.G12 - pc = PhaseCurve(H=H * u.mag, phase_parameter_1=G12, model_name="HG12_Pen16") + pc = PhaseCurve( + H=H, + # H=H * u.mag, + phase_parameter_1=G12, + model_name="HG12_Pen16", + ) pc_fit = pc.FitModel( - np.array(getattr(obs, "phaseAngle")) * u.deg, - np.array(getattr(obs, "reduced_mag")) * u.mag, + np.radians(np.array(getattr(obs, "phaseAngle"))), + np.array(getattr(obs, "reduced_mag")), + # np.array(getattr(obs, "phaseAngle")) * u.deg, + # np.array(getattr(obs, "reduced_mag")) * u.mag, ) pc = pc.InitModelSbpy(pc_fit) @@ -110,7 +117,17 @@ def test_col_obs_ref( ) # merge with observation data (avoiding duplicating x_col) # compare results df to stored df + # NB: see also pytest.approx for comparing dictionaries for x in list(df_col): print(x) assert_array_almost_equal(np.array(df_col[x]), np.array(df_ref[x])) # TODO: diasourceId failing on ubuntu tests, due to float? need to save as str/int? + + +# test_col_obs_ref( +# planetoid=planetoid, +# adler_data=adler_data, +# column_dict=column_dict, +# N_ref_list=[1, 3, 5], +# df_ref_list=[df_N_ref_1, df_N_ref_3, df_N_ref_5], +# ) diff --git a/tests/adler/utilities/test_AdlerCLIArguments.py b/tests/adler/utilities/test_AdlerCLIArguments.py index 1a49be3..ecf4368 100644 --- a/tests/adler/utilities/test_AdlerCLIArguments.py +++ b/tests/adler/utilities/test_AdlerCLIArguments.py @@ -16,6 +16,8 @@ def __init__( outpath, db_name, sql_filename, + plot_show, + phase_model, ): self.ssObjectId = ssObjectId self.ssObjectId_list = ssObjectId_list @@ -25,6 +27,8 @@ def __init__( self.outpath = outpath self.db_name = db_name self.sql_filename = sql_filename + self.plot_show = plot_show + self.phase_model = phase_model def test_AdlerCLIArguments_population(): @@ -38,6 +42,8 @@ def test_AdlerCLIArguments_population(): "outpath": "./", "db_name": "output", "sql_filename": None, + "plot_show": False, + "phase_model": "HG12_Pen16", } good_arguments = args(**good_input_dict) good_arguments_object = AdlerCLIArguments(good_arguments) @@ -64,7 +70,16 @@ def test_AdlerCLIArguments_population(): def test_AdlerCLIArguments_badSSOID(): # test that a bad ssObjectId triggers the right error bad_ssoid_arguments = args( - "hello!", None, ["g", "r", "i"], ["g-r", "r-i"], [60000.0, 67300.0], "./", "output", "None" + "hello!", + None, + ["g", "r", "i"], + ["g-r", "r-i"], + [60000.0, 67300.0], + "./", + "output", + "None", + False, + "HG12_Pen16", ) with pytest.raises(ValueError) as bad_ssoid_error: @@ -79,7 +94,16 @@ def test_AdlerCLIArguments_badSSOID(): def test_AdlerCLIArguments_badfilters(): # test that non-LSST or unexpected filters trigger the right error bad_filter_arguments = args( - "666", None, ["g", "r", "i", "m"], ["g-r", "r-i"], [60000.0, 67300.0], "./", "output", None + "666", + None, + ["g", "r", "i", "m"], + ["g-r", "r-i"], + [60000.0, 67300.0], + "./", + "output", + None, + False, + "HG12_Pen16", ) with pytest.raises(ValueError) as bad_filter_error: @@ -91,7 +115,16 @@ def test_AdlerCLIArguments_badfilters(): ) bad_filter_arguments_2 = args( - "666", None, ["pony"], ["g-r", "r-i"], [60000.0, 67300.0], "./", "output", None + "666", + None, + ["pony"], + ["g-r", "r-i"], + [60000.0, 67300.0], + "./", + "output", + None, + False, + "HG12_Pen16", ) with pytest.raises(ValueError) as bad_filter_error_2: @@ -108,7 +141,16 @@ def test_AdlerCLIArguments_badfilters(): # the colours are not in the right format bad_colour_arguments_1 = args( - "666", None, ["g", "r", "i"], ["g - r", "r x i"], [60000.0, 67300.0], "./", "output", None + "666", + None, + ["g", "r", "i"], + ["g - r", "r x i"], + [60000.0, 67300.0], + "./", + "output", + None, + False, + "HG12_Pen16", ) err_msg1 = "Unexpected filters found in --colour_list command-line argument. --colour_list must contain LSST filters in the format 'filter2-filter1'." @@ -119,7 +161,16 @@ def test_AdlerCLIArguments_badfilters(): # colours are requested in filters that are not available bad_colour_arguments_2 = args( - "666", None, ["g", "r", "i"], ["g-r", "r-j"], [60000.0, 67300.0], "./", "output", None + "666", + None, + ["g", "r", "i"], + ["g-r", "r-j"], + [60000.0, 67300.0], + "./", + "output", + None, + False, + "HG12_Pen16", ) err_msg2 = err_msg1 @@ -130,7 +181,16 @@ def test_AdlerCLIArguments_badfilters(): # colours are requested in filters that are not available bad_colour_arguments_3 = args( - "666", None, ["g", "r"], ["g-r", "r-i"], [60000.0, 67300.0], "./", "output", None + "666", + None, + ["g", "r"], + ["g-r", "r-i"], + [60000.0, 67300.0], + "./", + "output", + None, + False, + "HG12_Pen16", ) err_msg3 = "The filters required to calculate the colours have not been requested in --filter-list" @@ -143,7 +203,16 @@ def test_AdlerCLIArguments_badfilters(): def test_AdlerCLIArguments_baddates(): # test that overly-large dates trigger the right error big_date_arguments = args( - "666", None, ["g", "r", "i"], ["g-r", "r-i"], [260000.0, 267300.0], "./", "output", None + "666", + None, + ["g", "r", "i"], + ["g-r", "r-i"], + [260000.0, 267300.0], + "./", + "output", + None, + False, + "HG12_Pen16", ) with pytest.raises(ValueError) as big_date_error: @@ -156,7 +225,16 @@ def test_AdlerCLIArguments_baddates(): # test that unexpected date values trigger the right error bad_date_arguments = args( - "666", None, ["g", "r", "i"], ["g-r", "r-i"], [60000.0, "cheese"], "./", "output", None + "666", + None, + ["g", "r", "i"], + ["g-r", "r-i"], + [60000.0, "cheese"], + "./", + "output", + None, + False, + "HG12_Pen16", ) with pytest.raises(ValueError) as bad_date_error: @@ -178,6 +256,8 @@ def test_AdlerCLIArguments_badoutput(): "./definitely_fake_folder/", "output", None, + False, + "HG12_Pen16", ) with pytest.raises(ValueError) as bad_output_error: @@ -199,6 +279,8 @@ def test_AdlerCLIArguments_badlist(): "./", "output", "./", + False, + "HG12_Pen16", ) with pytest.raises(ValueError) as bad_list_error: @@ -220,6 +302,8 @@ def test_AdlerCLIArguments_badsql(): "./", "output", "./dummy_database.db", + False, + "HG12_Pen16", ) with pytest.raises(ValueError) as bad_sql_error: @@ -229,3 +313,6 @@ def test_AdlerCLIArguments_badsql(): bad_sql_error.value.args[0] == "The file supplied for the command-line argument --sql_filename cannot be found." ) + + +# TODO: test plot_show somehow, and phase_model diff --git a/tests/adler/utilities/test_science_utilities.py b/tests/adler/utilities/test_science_utilities.py index b8a0588..19eef3c 100644 --- a/tests/adler/utilities/test_science_utilities.py +++ b/tests/adler/utilities/test_science_utilities.py @@ -213,7 +213,7 @@ def test_zero_func(): def test_sigma_clip(): - outliers = sci_utils.sigma_clip(y_res, {"maxiters": 1, "cenfunc": sci_utils.zero_func}) + outliers = sci_utils.sigma_clip(y_res, sigma=3, maxiters=1, cenfunc=sci_utils.zero_func) assert_array_equal(outliers, outliers_y) diff --git a/tests/data/adler_run_test_data.py b/tests/data/adler_run_test_data.py new file mode 100644 index 0000000..4ddbf1e --- /dev/null +++ b/tests/data/adler_run_test_data.py @@ -0,0 +1,247 @@ +import logging +import argparse +import astropy.units as u +from astropy.time import Time +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +import matplotlib.gridspec as gridspec +import os +import sqlite3 + +from adler.objectdata.AdlerPlanetoid import AdlerPlanetoid +from adler.objectdata.AdlerData import AdlerData +from adler.science.PhaseCurve import PhaseCurve, ADLER_SBPY_DICT +from adler.science.Colour import col_obs_ref +from adler.utilities.AdlerCLIArguments import AdlerCLIArguments +from adler.utilities.adler_logging import setup_adler_logging +from adler.utilities.readin_utilities import read_in_SSObjectID_file +from adler.utilities.plotting_utilities import plot_errorbar +import adler.utilities.science_utilities as sci_utils + +""" +Copy of adler_run.oy which can be used to make the AdlerData testing database +""" + +ssObjectId = "8268570668335894776" +filter_list = ["g", "r", "i"] +colour_list = None +date_range = [60000.0, 67300.0] +outpath = "." +db_name = "test_AdlerData_database.db" +sql_filename = "testing_database.db" +plot_show = False +# phase_model="HG12_Pen16" +phase_model_list = ["HG12_Pen16", "HG"] + +# adler parameters +N_pc_fit = 10 # minimum number of data points to fit phase curve +diff_cut = 1.0 # magnitude difference used to identify outliers +obs_cols = ["diaSourceId", "midPointMjdTai", "outlier"] # observation columns to use + + +# Define colour parameters +# set number of reference observations to use for colour estimate +N_ref = 5 + +ssObjectId_list = [ssObjectId] + +# consider each ssObjectId in the list separately +for i, ssObjectId in enumerate(ssObjectId_list): + print( + "Query data in the range: {} <= date <= {}".format(date_range[0], date_range[1]) + ) # the adler planetoid date_range is used in an SQL BETWEEN statement which is inclusive + + # load ssObjectId data + if sql_filename: # load from a local database, if provided + msg = "query sql database {}".format(sql_filename) + print(msg) + planetoid = AdlerPlanetoid.construct_from_SQL( + ssObjectId, + filter_list=filter_list, + date_range=date_range, + sql_filename=sql_filename, + ) + else: # otherwise load from the Rubin Science Platform + msg = "query RSP" + print(msg) + planetoid = AdlerPlanetoid.construct_from_RSP(ssObjectId, filter_list, date_range) + + # TODO: Here we would load the AdlerData object from our data tables + adler_data = AdlerData(ssObjectId, planetoid.filter_list) + print(adler_data.__dict__) + + # now let's do some phase curves! + + # make an empty figure + fig = plot_errorbar(planetoid, filt_list=[]) + + # do each model + for phase_model in phase_model_list: + + print(phase_model) + # get the name of the phase parameter + # TODO: make an option for two parameter HG1G2 + phase_parameter_1 = ADLER_SBPY_DICT[phase_model]["phase_parameter_1"] + print(phase_model, phase_parameter_1) + + # set default phase parameter values + # TODO: make an option for two parameter HG1G2 + if phase_model == "HG": + phase_param_1_default = 0.15 + else: + phase_param_1_default = 0.62 + + # operate on each filter in turn + for filt in filter_list: + + # get the filter SSObject metadata + try: + sso = planetoid.SSObject_in_filter(filt) + except: + continue + + # get the observations + obs = planetoid.observations_in_filter(filt) + df_obs = pd.DataFrame(obs.__dict__) + df_obs["outlier"] = [False] * len(df_obs) + + # Determine the reference phase curve model + # TODO: We would load the best phase curve model available in AdlerData here + + # initial simple phase curve filter model with fixed phase_parameter + # use the ssObject value for H as initial guess, this is in HG12_Pen16 + # TODO: use the ssObject value for phase parameter as initial guess? + pc = PhaseCurve( + # H=sso.H * u.mag, + H=sso.H, + phase_parameter_1=phase_param_1_default, + model_name=phase_model, + ) + + # only fit phase_parameter when sufficient data is available + if len(df_obs) < N_pc_fit: + msg = "Do not fit {}, use {}={:.2f}".format( + phase_parameter_1, phase_parameter_1, pc.phase_parameter_1 + ) + # pc.model_function.G12.fixed = True + getattr(pc.model_function, phase_parameter_1).fixed = True + else: + # pc.model_function.G12.fixed = False + getattr(pc.model_function, phase_parameter_1).fixed = False + + # do a phase model fit to the past data + pc_fit = pc.FitModel( + # np.array(df_obs["phaseAngle"]) * u.deg, + # np.array(df_obs["reduced_mag"]) * u.mag, + # np.array(df_obs["magErr"]) * u.mag, + np.radians(np.array(df_obs["phaseAngle"])), + np.array(df_obs["reduced_mag"]), + np.array(df_obs["magErr"]), + ) + pc_fit = pc.InitModelSbpy(pc_fit) + + # Store the fitted values, and metadata, in an AdlerData object + ad_params = pc_fit.__dict__ + ad_params["phaseAngle_min"] = np.amin(df_obs["phaseAngle"]) # * u.deg + ad_params["phaseAngle_range"] = np.ptp(df_obs["phaseAngle"]) # * u.deg + ad_params["arc"] = np.ptp(df_obs["midPointMjdTai"]) # * u.d + ad_params["nobs"] = len(df_obs) + ad_params["modelFitMjd"] = Time.now().mjd + # adler_data.populate_phase_parameters(filt, **pc_fit.__dict__) + # TODO: replace any None with np.nan? e.g. phase_parameter_2? + adler_data.populate_phase_parameters(filt, **ad_params) + + # print the test data set + print(adler_data.get_phase_parameters_in_filter(filt, phase_model).__dict__) + + # add to plot + ax1 = fig.axes[0] + # TODO: set colours based on filter + ax1.scatter(df_obs["phaseAngle"], df_obs["reduced_mag"]) + alpha = np.linspace(0, np.amax(obs.phaseAngle)) * u.deg + ax1.plot( + alpha.value, + # pc_fit.ReducedMag(alpha).value, + # label="{}, H={:.2f}, G12={:.2f}".format(filt, pc_fit.H.value, pc_fit.phase_parameter_1), + pc_fit.ReducedMag(alpha), + label="{}, H={:.2f}, {}={:.2f}".format( + filt, pc_fit.H, phase_parameter_1, pc_fit.phase_parameter_1 + ), + ) + ax1.legend() + + # TODO: Use a CLI arg flag to open figure interactively instead of saving? + if plot_show: + plt.show() + # Save figures at the outpath location + else: + fig_file = "{}/phase_curve_{}_{}_{}.png".format( + outpath, ssObjectId, phase_model, int(np.amax(df_obs["midPointMjdTai"])) + ) + msg = "Save figure: {}".format(fig_file) + print(msg) + fig = plot_errorbar(planetoid, fig=fig, filename=fig_file) # TODO: add titles with filter name? + plt.close() + + # analyse colours for the filters provided + + # if requested cycle through the filters, calculating a colour relative to the next filter + if not colour_list: + colour_list = [] + else: + colour_list = colour_list + + # note that the order in which cli_args.filter_list is passed will determine which colours are calculated + for colour in colour_list: + col_filts = colour.split("-") + filt_obs = col_filts[0] + filt_ref = col_filts[1] + + if ~np.isin([filt_obs, filt_ref], planetoid.filter_list).all(): + missing_filts = np.array([filt_obs, filt_ref])[ + ~np.isin([filt_obs, filt_ref], planetoid.filter_list) + ] + continue + + # determine the filt_obs - filt_ref colour + # generate a plot + if plot_show: + plot_dir = None + else: + plot_dir = outpath + + col_dict = col_obs_ref( + planetoid, + adler_data, + phase_model=phase_model, + filt_obs=filt_obs, + filt_ref=filt_ref, + N_ref=N_ref, + # x1 = x1, + # x2 = x2, + plot_dir=plot_dir, + plot_show=plot_show, + ) + + # print(col_dict) + + # Output adler values to a database if a db_name is provided + print(adler_data.__dict__) + if db_name: + adler_db = "{}/{}".format(outpath, db_name) + + if os.path.isfile(adler_db): + print("clear old db file") + os.remove(adler_db) + + msg = "write to {}".format(adler_db) + print(msg) + adler_data.write_row_to_database(adler_db) + +# create the test csv file +fname_csv = "{}/test_SQL_database_table.csv".format(outpath) +conn = sqlite3.Connection(adler_db) +df = pd.read_sql("SELECT * from AdlerData", conn) +print("write to {}".format(fname_csv)) +df.to_csv(fname_csv) diff --git a/tests/data/test_AdlerData_database.db b/tests/data/test_AdlerData_database.db index 31bd8efe4e1700f5162a02236f01cd83226fd473..b6138a44d585bb4fafe554ca7fad8904293566b2 100644 GIT binary patch literal 8192 zcmWFz^vNtqRY=P(%1ta$FlG>7U}R))P*7lCV6XsUCLo3Zb0C8S#sSidNNik83kJQi zZeEaLZZQTR8U>>vFd71*Aut*OqaiRF0;3@?8UmvsFbG4SGo6uLTwb1WvTR9WQckL4 zN=|B#OJYePoWbNA78<6d#b9XJ{7hfzFRlEh<72 z1KSZ_kXV$Mn_7}u6mLjKo(MCH2$*4n#f;qil++xz%o5)$U?_lm0dsx42SPO@L@-^8 zDUL@E)O8qokX#jy;+i6ih(Hm6Mnn-bB8ngpQG^x|sC3!GNBQX35keIw1`0E zL##p(!{Rmk@$h2F>e`hyba8MFg0Q!?hTJ3XKS?dZ4bu(1YYE44Gnb-%oGgGt&EJV zj12XR42?~UO|=aStPBhstaGgtfYvxTi8)>X(wq(}udql=F4^EVk?!e^_8-oE@zL`JCfhE^+0m$HZe(TRb zTFqgVh~l)My#@}(Z+G)5dO6rn`r+S|`zOf$15g$S>{ly4w~Ft`gII8w+rj9eee&Pz zpZ0a#(sQ1*HSfP*sP?kr%0Ii0&FL5ZZ95FHNGH`nMXr1SP`^WDe#sdit>CaqM)vpQ z&J7O6%T282`lAyTTUP43nbHVzdAw07=M4n6fr z`UHKZK1Ka!2qpy6LvEQB=9_PZftjDV`25@1C=uG#R7O0}O!A5-3UM`!5YkAmT6)dT z+cbDFH>oP6cwN{a`qkfh_TlZ{Bz@sQ0!RP}AOR$R1dsp{Kmter3H%3vpXZADdS^%Z zb(8SHSa`#+kVib>_d)IOyYF52+J)CU^R@eQBaXigeu&HDblB9AQ6%DoN3*7O6U?r8 zEWAlL7C|(cJjh5s2}PEhOb2nMTwZ1pO^0F}7(su@1VYMkBLC`O#wCwLB4l7x7B_XX zs&2O2&B~Ld=3%vBGFA{NkG-k8G}&r5Oct(yUdjOI5I~0o=&%gBWQu_niAUC{C~oRb z*IC~U>&TULmNKx8gmq-Gj)ZmcyHhg7K#Rm9>r@msb*JmB@6OkGpFNvzZ8ylvKlNEE zc#r@RKmter2_OL^fCP{L5%xE>_D+dOc4;CtW?!5@M53%}v&}yZ_@qe*>M< diff --git a/tests/data/test_SQL_database_table.csv b/tests/data/test_SQL_database_table.csv index 6c34fe1..242eb3d 100644 --- a/tests/data/test_SQL_database_table.csv +++ b/tests/data/test_SQL_database_table.csv @@ -1,2 +1,2 @@ -ssObjectId,timestamp,u_phaseAngle_min,u_phaseAngle_range,u_nobs,u_arc,u_model_1_H,u_model_1_H_err,u_model_1_phase_parameter_1,u_model_1_phase_parameter_1_err,u_model_1_phase_parameter_2,u_model_1_phase_parameter_2_err,u_model_2_H,u_model_2_H_err,u_model_2_phase_parameter_1,u_model_2_phase_parameter_1_err,u_model_2_phase_parameter_2,u_model_2_phase_parameter_2_err,g_phaseAngle_min,g_phaseAngle_range,g_nobs,g_arc,g_model_1_H,g_model_1_H_err,g_model_1_phase_parameter_1,g_model_1_phase_parameter_1_err,g_model_1_phase_parameter_2,g_model_1_phase_parameter_2_err,r_phaseAngle_min,r_phaseAngle_range,r_nobs,r_arc,r_model_2_H,r_model_2_H_err,r_model_2_phase_parameter_1,r_model_2_phase_parameter_1_err,r_model_2_phase_parameter_2,r_model_2_phase_parameter_2_err -8268570668335894776,2024-04-18 13:32:07.096776+00:00,11.0,12.0,13,14.0,15.0,16.0,17.0,18.0,,,25.0,26.0,27.0,28.0,29.0,30.0,31.0,32.0,33,34.0,35.0,36.0,37.0,38.0,,,41.0,42.0,43,44.0,45.0,46.0,47.0,48.0,49.0,50.0 +,ssObjectId,timestamp,g_phaseAngle_min,g_phaseAngle_range,g_nobs,g_arc,g_HG12_Pen16_H,g_HG12_Pen16_H_err,g_HG12_Pen16_phase_parameter_1,g_HG12_Pen16_phase_parameter_1_err,g_HG12_Pen16_phase_parameter_2,g_HG12_Pen16_phase_parameter_2_err,g_HG12_Pen16_modelFitMjd,g_HG_H,g_HG_H_err,g_HG_phase_parameter_1,g_HG_phase_parameter_1_err,g_HG_phase_parameter_2,g_HG_phase_parameter_2_err,g_HG_modelFitMjd,r_phaseAngle_min,r_phaseAngle_range,r_nobs,r_arc,r_HG12_Pen16_H,r_HG12_Pen16_H_err,r_HG12_Pen16_phase_parameter_1,r_HG12_Pen16_phase_parameter_1_err,r_HG12_Pen16_phase_parameter_2,r_HG12_Pen16_phase_parameter_2_err,r_HG12_Pen16_modelFitMjd,r_HG_H,r_HG_H_err,r_HG_phase_parameter_1,r_HG_phase_parameter_1_err,r_HG_phase_parameter_2,r_HG_phase_parameter_2_err,r_HG_modelFitMjd,i_phaseAngle_min,i_phaseAngle_range,i_nobs,i_arc,i_HG12_Pen16_H,i_HG12_Pen16_H_err,i_HG12_Pen16_phase_parameter_1,i_HG12_Pen16_phase_parameter_1_err,i_HG12_Pen16_phase_parameter_2,i_HG12_Pen16_phase_parameter_2_err,i_HG12_Pen16_modelFitMjd,i_HG_H,i_HG_H_err,i_HG_phase_parameter_1,i_HG_phase_parameter_1_err,i_HG_phase_parameter_2,i_HG_phase_parameter_2_err,i_HG_modelFitMjd +0,8268570668335894776,2024-11-06 17:23:21.213435+00:00,27.426668167114258,36.17388343811035,9,3306.0079999999944,20.719465178426468,0.018444134023977106,0.62,,,,60620.724548566934,20.72954332871528,0.018444134023977106,0.15,,,,60620.72454979901,2.553332567214966,124.23803400993347,38,3338.0655999999944,19.92863542616601,0.018525355171274356,1.0,0.05300829494059732,,,60620.72454870314,18.879873513575124,0.007456882346021157,-0.253,1.6701987135262256e-05,,,60620.72454985305,10.0595064163208,101.74150371551514,32,3342.0585599999977,19.653155995892117,0.01415178691419005,1.0,0.01516569963597136,,,60620.72454882437,18.628894992991583,0.01716803531553185,-0.253,0.00014324314956736168,,,60620.724549914594