Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

106 outlying photometry checker #113

Merged
merged 5 commits into from
May 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
297 changes: 297 additions & 0 deletions notebooks/outlier_detection.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,297 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "f88d62a9",
"metadata": {},
"outputs": [],
"source": [
"from adler.dataclasses.AdlerPlanetoid import AdlerPlanetoid\n",
"from adler.science.PhaseCurve import PhaseCurve\n",
"from adler.dataclasses.AdlerData import AdlerData\n",
"import adler.utilities.science_utilities as utils\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import matplotlib.gridspec as gridspec\n",
"import astropy.units as u"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "86e657fe",
"metadata": {},
"outputs": [],
"source": [
"# ssObjectId of object to analyse\n",
"ssoid = \"8268570668335894776\"\n",
"\n",
"# load object from local database\n",
"fname = \"../tests/data/testing_database.db\"\n",
"planetoid = AdlerPlanetoid.construct_from_SQL(ssoid, sql_filename=fname)\n",
"\n",
"# retrieve observations in the r filter\n",
"obs_r = planetoid.observations_in_filter(\"r\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f4131a10",
"metadata": {},
"outputs": [],
"source": [
"# define the phase curve model using the SSObject data\n",
"\n",
"sso_r = planetoid.SSObject_in_filter(\"r\")\n",
"r_H = sso_r.H\n",
"r_G12 = sso_r.G12\n",
"\n",
"pc = PhaseCurve(abs_mag=r_H * u.mag, phase_param=r_G12, model_name=\"HG12_Pen16\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0f625b5a",
"metadata": {},
"outputs": [],
"source": [
"# calculate data - model residuals\n",
"res = obs_r.reduced_mag - pc.ReducedMag(obs_r.phaseAngle * u.degree).value"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "81e04bad",
"metadata": {},
"outputs": [],
"source": [
"res"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f573cce4",
"metadata": {},
"outputs": [],
"source": [
"# plot the observations with the SSObject phase curve\n",
"x_plot = \"phaseAngle\"\n",
"y_plot = \"reduced_mag\"\n",
"\n",
"x = getattr(obs_r, x_plot)\n",
"y = getattr(obs_r, y_plot)\n",
"yerr = obs_r.magErr\n",
"\n",
"fig = plt.figure()\n",
"gs = gridspec.GridSpec(1, 1)\n",
"ax1 = plt.subplot(gs[0, 0])\n",
"\n",
"ax1.errorbar(x, y, yerr, fmt=\"o\")\n",
"\n",
"# plot the phase curve model\n",
"alpha = np.linspace(0, np.amax(obs_r.phaseAngle)) * u.deg\n",
"red_mag = pc.ReducedMag(alpha)\n",
"\n",
"# legend label for the phase curve model\n",
"pc_label = []\n",
"for x in pc.model_function.param_names:\n",
" pc_label.append(\"{}={:.2f}\".format(x, getattr(pc.model_function, x).value))\n",
"pc_label = \", \".join(pc_label)\n",
"\n",
"ax1.plot(alpha.value, red_mag.value, c=\"k\", label=pc_label)\n",
"\n",
"ax1.invert_yaxis()\n",
"ax1.set_xlabel(x_plot)\n",
"ax1.set_ylabel(y_plot)\n",
"ax1.legend()\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "aee95371",
"metadata": {},
"outputs": [],
"source": [
"# plot the data - model residuals\n",
"x_plot = \"phaseAngle\"\n",
"\n",
"x = getattr(obs_r, x_plot)\n",
"yerr = obs_r.magErr\n",
"\n",
"fig = plt.figure()\n",
"gs = gridspec.GridSpec(1, 1)\n",
"ax1 = plt.subplot(gs[0, 0])\n",
"\n",
"ax1.errorbar(x, res, yerr, fmt=\"o\")\n",
"\n",
"ax1.axhline(0, c=\"k\")\n",
"\n",
"# indicate the standard deviations of the residuals\n",
"res_std = np.std(res)\n",
"for i in range(1, 4):\n",
" ax1.axhline(res_std * i, ls=\":\", c=\"C{}\".format(i), label=\"{} sigma\".format(i))\n",
" ax1.axhline(-res_std * i, ls=\":\", c=\"C{}\".format(i))\n",
"\n",
"ax1.invert_yaxis()\n",
"ax1.set_xlabel(x_plot)\n",
"ax1.set_ylabel(\"data - model\")\n",
"ax1.legend()\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "13e25b85",
"metadata": {},
"outputs": [],
"source": [
"# return a list of flags for outlying objects"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "16c712ff",
"metadata": {},
"outputs": [],
"source": [
"# astropy sigma_clip normally uses the median as the central function\n",
"utils.sigma_clip(res, {\"cenfunc\": \"median\"})"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8983c9dd",
"metadata": {},
"outputs": [],
"source": [
"# assuming that the model is the ground truth, we use zero as the centroid for the residuals\n",
"utils.sigma_clip(res, {\"cenfunc\": utils.zero_func})"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "15bdb74e",
"metadata": {},
"outputs": [],
"source": [
"# use the standard deviation of the residuals to identify outliers\n",
"utils.outlier_std(res, res, std_cut=3.0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "50276ce5",
"metadata": {},
"outputs": [],
"source": [
"# use a simple threshold value for residuals to find outliers\n",
"utils.outlier_diff(res, diff_cut=1.0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "64e2f5bc",
"metadata": {},
"outputs": [],
"source": [
"# consider the residual compared to the uncertainty of the measurement\n",
"std_err = 5\n",
"utils.outlier_sigma_diff(res, yerr, std_err)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3f8cc586",
"metadata": {},
"outputs": [],
"source": [
"# plot the data - model residuals\n",
"x_plot = \"phaseAngle\"\n",
"\n",
"x = getattr(obs_r, x_plot)\n",
"yerr = obs_r.magErr\n",
"\n",
"fig = plt.figure()\n",
"gs = gridspec.GridSpec(1, 1)\n",
"ax1 = plt.subplot(gs[0, 0])\n",
"\n",
"ax1.errorbar(x, res, yerr, fmt=\"o\")\n",
"\n",
"ax1.axhline(0, c=\"k\")\n",
"\n",
"# indicate the standard deviations of the residuals\n",
"res_std = np.std(res)\n",
"for i in range(1, 4):\n",
" ax1.axhline(res_std * i, ls=\":\", c=\"C{}\".format(i), label=\"{} sigma\".format(i))\n",
" ax1.axhline(-res_std * i, ls=\":\", c=\"C{}\".format(i))\n",
"\n",
"mask = utils.outlier_sigma_diff(res, yerr, std_err)\n",
"ax1.scatter(x[mask], res[mask], edgecolor=\"r\", facecolor=\"none\", marker=\"o\", zorder=3)\n",
"\n",
"ax1.invert_yaxis()\n",
"ax1.set_xlabel(x_plot)\n",
"ax1.set_ylabel(\"data - model\")\n",
"ax1.legend()\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8c935b03",
"metadata": {},
"outputs": [],
"source": [
"# NB that for phase curve models, residuals can be much larger than the photometric uncertainty!"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bcccc758",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "adler-dev",
"language": "python",
"name": "adler-dev"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Loading
Loading