diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..133d784 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,26 @@ +FROM python:3.7 + +# Installing git +RUN apt-get update -y +RUN apt-get install -y git +# Installing numpy and jupyterlab +RUN pip install jupyterlab numpy Cython pyarrow + +# Installing requirements +COPY ./requirements.txt requirements.txt +RUN pip install -r requirements.txt + +# Copying examples +COPY ./examples /examples + +# Copying this package +COPY . /package +RUN pip install -e /package + +# Download models +RUN python -c "from lc_classifier.classifier.models import HierarchicalRandomForest;HierarchicalRandomForest({}).download_model()" + +WORKDIR /examples +EXPOSE 8888 + +CMD ["jupyter", "notebook", "--allow-root", "--ip", "0.0.0.0", "--NotebookApp.token=''"] diff --git a/README.md b/README.md index faa7a7f..5e930df 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ pip install -e . # Functionalities ## Augmentation -If you want more samples you can use our implementation of data augmentation. For now you can use ShortTransientAugmenter for some transients. +If you want more samples you can use our implementation of data augmentation. For now you can use ShortTransientAugmenter for some transients. ## Classifier The classifier code contains BaseClassifier (a simple random forest) and HierarchicalRandomForest (a random forest with internal hierarchy), both with methods for fit and predict. @@ -39,7 +39,7 @@ Before to get features, we preprocess the time series with filters and boundary ### Extractors: The extractors are the portion of code with the logic to extract features from time series. Each extractor do only one task, after that our CustomHierarchicalExtractor merge all extractors for get features to train the model. -##### How can I add extractors to library? +##### How can I add extractors to library? You can use inheritance from `base extractors` and use it for create your own extractor. For now you can inherit: - `FeatureExtractor` is a generic extractor only fill methods. - `FeatureExtractorSingleBand` is a extractor that compute features by band. @@ -83,6 +83,20 @@ After that you can see a report of tests: coverage report ``` +# Run a container + +This repository comes with a Dockerfile to test the model. + +To build the image run +``` +docker build -t alerce/lc_classifier +``` +Then run the container +``` +docker run --rm -p 8888:8888 alerce/lc_classifier +``` +The container comes with a jupyter notebook and some examples in `http://localhost:8888` + # Reference If you use this library, please cite our work: diff --git a/examples/Compute Features From LightCurve.ipynb b/examples/Compute Features From LightCurve.ipynb new file mode 100644 index 0000000..08edecc --- /dev/null +++ b/examples/Compute Features From LightCurve.ipynb @@ -0,0 +1,1251 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import warnings\n", + "from lc_classifier.features import CustomHierarchicalExtractor\n", + "warnings.filterwarnings('ignore')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Reading the sample data\n", + "\n", + "To calculate the model features we will need:\n", + "- Detections: Alerts from the ZTF stream, with 5 $\\sigma$ difference between the template and the current image.\n", + "- Non Detections: Previous measurements of the object but with less than 5 $\\sigma$ magnitudes.\n", + "- Object Information: Aggregated information by ALeRCE." + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [], + "source": [ + "detections = pd.read_parquet('data/detections_sample.parquet')\n", + "non_detections = pd.read_parquet('data/non_detections_sample.parquet')\n", + "object_information = pd.read_parquet('data/object_info_sample.parquet')" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
fidisdiffpossigmapsffieldfwhmdecmagpsfrcidrasky...sgscore3neargaiasgscore1mjdcorrectedmagpsf_corrsigmapsf_corrsigmapsf_corr_extdubioushas_stamp
oid
ZTF17aaaemke1-1.00.0989723962.9100000.80200117.95747961358.760453-0.130846...0.4412080.0613950.9987558700.461042True16.4785760.0085410.025349FalseTrue
ZTF17aaaemke1-1.00.0750923963.8500000.80193017.80670461358.7604340.125953...0.4412080.2747040.9987558754.290857True16.520813100.0000000.022974FalseTrue
ZTF17aaaemke1-1.00.0932253963.1600000.80199717.61657161358.7604720.457313...0.4412080.0053240.9987558763.377998True16.5863250.0246600.036094FalseTrue
ZTF17aaaemke1-1.00.1463963961.5440620.80199718.68696261358.760405-0.294222...0.4412080.2332890.9987558718.413889True16.350400100.0000000.017018FalseTrue
ZTF17aaaemke1-1.00.0914173963.6700000.80189217.77518161358.7604520.110218...0.4412080.3862950.9987558372.401910True16.5306430.0147380.029054FalseTrue
\n", + "

5 rows × 30 columns

\n", + "
" + ], + "text/plain": [ + " fid isdiffpos sigmapsf field fwhm dec magpsf \\\n", + "oid \n", + "ZTF17aaaemke 1 -1.0 0.098972 396 2.910000 0.802001 17.957479 \n", + "ZTF17aaaemke 1 -1.0 0.075092 396 3.850000 0.801930 17.806704 \n", + "ZTF17aaaemke 1 -1.0 0.093225 396 3.160000 0.801997 17.616571 \n", + "ZTF17aaaemke 1 -1.0 0.146396 396 1.544062 0.801997 18.686962 \n", + "ZTF17aaaemke 1 -1.0 0.091417 396 3.670000 0.801892 17.775181 \n", + "\n", + " rcid ra sky ... sgscore3 neargaia sgscore1 \\\n", + "oid ... \n", + "ZTF17aaaemke 61 358.760453 -0.130846 ... 0.441208 0.061395 0.99875 \n", + "ZTF17aaaemke 61 358.760434 0.125953 ... 0.441208 0.274704 0.99875 \n", + "ZTF17aaaemke 61 358.760472 0.457313 ... 0.441208 0.005324 0.99875 \n", + "ZTF17aaaemke 61 358.760405 -0.294222 ... 0.441208 0.233289 0.99875 \n", + "ZTF17aaaemke 61 358.760452 0.110218 ... 0.441208 0.386295 0.99875 \n", + "\n", + " mjd corrected magpsf_corr sigmapsf_corr \\\n", + "oid \n", + "ZTF17aaaemke 58700.461042 True 16.478576 0.008541 \n", + "ZTF17aaaemke 58754.290857 True 16.520813 100.000000 \n", + "ZTF17aaaemke 58763.377998 True 16.586325 0.024660 \n", + "ZTF17aaaemke 58718.413889 True 16.350400 100.000000 \n", + "ZTF17aaaemke 58372.401910 True 16.530643 0.014738 \n", + "\n", + " sigmapsf_corr_ext dubious has_stamp \n", + "oid \n", + "ZTF17aaaemke 0.025349 False True \n", + "ZTF17aaaemke 0.022974 False True \n", + "ZTF17aaaemke 0.036094 False True \n", + "ZTF17aaaemke 0.017018 False True \n", + "ZTF17aaaemke 0.029054 False True \n", + "\n", + "[5 rows x 30 columns]" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "detections.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parent_candidjdfidpiddiffmaglimpdiffimfilenameprogrampiprogramidcandidisdiffpos...ranrdecnrscorrmagzpscimagzpsciuncmagzpscirmsclrcoeffclrcouncrbversionmjd
oid
ZTF17aaaemke6184019061150100022.458353e+06159837703061520.065100/ztf/archive/sci/2018/0822/376991/ztf_20180822...Kulkarni1NaNNone...NaNNaNNaN0.00.00.00.00.0t12_f5_c358352.377037
ZTF17aaaemke6363532061150100012.458361e+06160640359061519.109699/ztf/archive/sci/2018/0830/403600/ztf_20180830...Kulkarni1NaNNone...NaNNaNNaN0.00.00.00.00.0t12_f5_c358360.403600
ZTF17aaaemke6184019061150100022.458364e+06160931289061520.057800/ztf/archive/sci/2018/0902/312859/ztf_20180902...Kulkarni1NaNNone...NaNNaNNaN0.00.00.00.00.0t12_f5_c358363.312894
ZTF17aaaemke6184019061150100022.458367e+06161240050061520.791800/ztf/archive/sci/2018/0905/400509/ztf_20180905...Kulkarni1NaNNone...NaNNaNNaN0.00.00.00.00.0t12_f5_c358366.400509
ZTF17aaaemke6184019061150100022.458370e+06161532574061520.812700/ztf/archive/sci/2018/0908/325729/ztf_20180908...Kulkarni1NaNNone...NaNNaNNaN0.00.00.00.00.0t12_f5_c358369.325741
\n", + "

5 rows × 59 columns

\n", + "
" + ], + "text/plain": [ + " parent_candid jd fid pid diffmaglim \\\n", + "oid \n", + "ZTF17aaaemke 618401906115010002 2.458353e+06 1 598377030615 20.065100 \n", + "ZTF17aaaemke 636353206115010001 2.458361e+06 1 606403590615 19.109699 \n", + "ZTF17aaaemke 618401906115010002 2.458364e+06 1 609312890615 20.057800 \n", + "ZTF17aaaemke 618401906115010002 2.458367e+06 1 612400500615 20.791800 \n", + "ZTF17aaaemke 618401906115010002 2.458370e+06 1 615325740615 20.812700 \n", + "\n", + " pdiffimfilename programpi \\\n", + "oid \n", + "ZTF17aaaemke /ztf/archive/sci/2018/0822/376991/ztf_20180822... Kulkarni \n", + "ZTF17aaaemke /ztf/archive/sci/2018/0830/403600/ztf_20180830... Kulkarni \n", + "ZTF17aaaemke /ztf/archive/sci/2018/0902/312859/ztf_20180902... Kulkarni \n", + "ZTF17aaaemke /ztf/archive/sci/2018/0905/400509/ztf_20180905... Kulkarni \n", + "ZTF17aaaemke /ztf/archive/sci/2018/0908/325729/ztf_20180908... Kulkarni \n", + "\n", + " programid candid isdiffpos ... ranr decnr scorr magzpsci \\\n", + "oid ... \n", + "ZTF17aaaemke 1 NaN None ... NaN NaN NaN 0.0 \n", + "ZTF17aaaemke 1 NaN None ... NaN NaN NaN 0.0 \n", + "ZTF17aaaemke 1 NaN None ... NaN NaN NaN 0.0 \n", + "ZTF17aaaemke 1 NaN None ... NaN NaN NaN 0.0 \n", + "ZTF17aaaemke 1 NaN None ... NaN NaN NaN 0.0 \n", + "\n", + " magzpsciunc magzpscirms clrcoeff clrcounc rbversion \\\n", + "oid \n", + "ZTF17aaaemke 0.0 0.0 0.0 0.0 t12_f5_c3 \n", + "ZTF17aaaemke 0.0 0.0 0.0 0.0 t12_f5_c3 \n", + "ZTF17aaaemke 0.0 0.0 0.0 0.0 t12_f5_c3 \n", + "ZTF17aaaemke 0.0 0.0 0.0 0.0 t12_f5_c3 \n", + "ZTF17aaaemke 0.0 0.0 0.0 0.0 t12_f5_c3 \n", + "\n", + " mjd \n", + "oid \n", + "ZTF17aaaemke 58352.377037 \n", + "ZTF17aaaemke 58360.403600 \n", + "ZTF17aaaemke 58363.312894 \n", + "ZTF17aaaemke 58366.400509 \n", + "ZTF17aaaemke 58369.325741 \n", + "\n", + "[5 rows x 59 columns]" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "non_detections.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ndethistncovhistmjdstarthistmjdendhistmeanrameandecsigmarasigmadecfirstmjdlastmjd...nearZTFnearPS1stellarcorrectedndetndubiousg-r_maxg-r_max_corrg-r_meang-r_mean_corr
oid
ZTF17aaaemke135.0670.058302.48194459107.311562358.7604400.8019900.0000860.00006058372.40191059107.311562...TrueFalseTrueTrue8801.2095300.8288000.5314330.952658
ZTF17aaafyya968.01471.058101.20128559107.36621517.78727161.1290360.0001280.00006658314.46775559107.366215...TrueTrueTrueTrue37530.7575560.8242600.6353230.916122
ZTF17aaageae280.0783.058351.39006959107.3106258.6869620.9057690.0001070.00006758355.37199159107.310625...TrueTrueTrueTrue21700.168999-0.0308950.0631140.207873
ZTF17aaaivsr232.0669.058101.41075258981.216250129.3978649.6540560.0000600.00006458375.51610058981.216250...TrueTrueTrueTrue7900.6631410.6300970.8235360.559042
ZTF17aaaizej332.0428.058101.41944458987.201377141.2249074.2637690.0000710.00005058472.42032458987.201377...TrueTrueTrueTrue1000-0.603085-0.1055300.1136680.197381
\n", + "

5 rows × 22 columns

\n", + "
" + ], + "text/plain": [ + " ndethist ncovhist mjdstarthist mjdendhist meanra \\\n", + "oid \n", + "ZTF17aaaemke 135.0 670.0 58302.481944 59107.311562 358.760440 \n", + "ZTF17aaafyya 968.0 1471.0 58101.201285 59107.366215 17.787271 \n", + "ZTF17aaageae 280.0 783.0 58351.390069 59107.310625 8.686962 \n", + "ZTF17aaaivsr 232.0 669.0 58101.410752 58981.216250 129.397864 \n", + "ZTF17aaaizej 332.0 428.0 58101.419444 58987.201377 141.224907 \n", + "\n", + " meandec sigmara sigmadec firstmjd lastmjd ... \\\n", + "oid ... \n", + "ZTF17aaaemke 0.801990 0.000086 0.000060 58372.401910 59107.311562 ... \n", + "ZTF17aaafyya 61.129036 0.000128 0.000066 58314.467755 59107.366215 ... \n", + "ZTF17aaageae 0.905769 0.000107 0.000067 58355.371991 59107.310625 ... \n", + "ZTF17aaaivsr 9.654056 0.000060 0.000064 58375.516100 58981.216250 ... \n", + "ZTF17aaaizej 4.263769 0.000071 0.000050 58472.420324 58987.201377 ... \n", + "\n", + " nearZTF nearPS1 stellar corrected ndet ndubious g-r_max \\\n", + "oid \n", + "ZTF17aaaemke True False True True 88 0 1.209530 \n", + "ZTF17aaafyya True True True True 375 3 0.757556 \n", + "ZTF17aaageae True True True True 217 0 0.168999 \n", + "ZTF17aaaivsr True True True True 79 0 0.663141 \n", + "ZTF17aaaizej True True True True 100 0 -0.603085 \n", + "\n", + " g-r_max_corr g-r_mean g-r_mean_corr \n", + "oid \n", + "ZTF17aaaemke 0.828800 0.531433 0.952658 \n", + "ZTF17aaafyya 0.824260 0.635323 0.916122 \n", + "ZTF17aaageae -0.030895 0.063114 0.207873 \n", + "ZTF17aaaivsr 0.630097 0.823536 0.559042 \n", + "ZTF17aaaizej -0.105530 0.113668 0.197381 \n", + "\n", + "[5 rows x 22 columns]" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "object_information.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Calculating features" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [], + "source": [ + "# The extractor used is CustomHirarchicalExtractor\n", + "features_computer = CustomHierarchicalExtractor()" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [], + "source": [ + "# We use the compute_features method\n", + "features = features_computer.compute_features(\n", + " detections = detections,\n", + " non_detections = non_detections,\n", + " objects = object_information)" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Amplitude_1Amplitude_2AndersonDarling_1AndersonDarling_2Autocor_length_1Autocor_length_2Beyond1Std_1Beyond1Std_2Con_1Con_2...n_non_det_after_fid_1n_non_det_after_fid_2n_non_det_before_fid_1n_non_det_before_fid_2n_pos_1n_pos_2positive_fraction_1positive_fraction_2rbsgscore1
oid
ZTF17aaaemke0.3199450.2776610.9999980.9999971.01.00.4500000.3636360.0263160.0...105103775120.1250000.3636360.8800000.998750
ZTF17aaafyya0.3730890.3476661.0000001.0000001.01.00.4045800.3917530.0000000.0...89821152430.3969470.4432990.7878570.996250
ZTF17aaageae0.2197090.3386111.0000001.0000001.01.00.3400000.1052630.0000000.0...1331775128160.5600000.4210530.7492861.000000
ZTF17aaaivsr0.3432580.3052700.9999990.8219261.01.00.3714290.4000000.0000000.0...2471201340.3714290.1600000.7916670.992012
ZTF17aaaizej0.6759340.4044251.0000001.0000001.01.00.3250000.3111110.0000000.0...816111480.3500000.1777780.8433331.000000
ZTF17aaajiztNaN0.462060NaN0.993290NaN1.0NaN0.260870NaN0.0...NaN18NaN1NaN10NaN0.4347830.7633330.987768
ZTF17aaajlul0.4072200.3181671.0000001.0000001.01.00.4250000.4081630.0000000.0...2832001080.2500000.1632650.8171431.000000
ZTF17aaakstv0.1577150.2942210.9873770.9996382.01.00.3750000.4166670.0000000.0...325011440.5000000.3333330.6869050.998750
ZTF17aaanxog0.5785540.4335260.9988140.9785311.01.00.3888890.3846150.0000000.0...615923720.3888890.1538460.7914290.996875
ZTF17aaanyaa0.2720500.2436521.0000001.0000001.01.00.4705880.3846150.0000000.0...212300690.1764710.3461540.8914291.000000
\n", + "

10 rows × 172 columns

\n", + "
" + ], + "text/plain": [ + " Amplitude_1 Amplitude_2 AndersonDarling_1 AndersonDarling_2 \\\n", + "oid \n", + "ZTF17aaaemke 0.319945 0.277661 0.999998 0.999997 \n", + "ZTF17aaafyya 0.373089 0.347666 1.000000 1.000000 \n", + "ZTF17aaageae 0.219709 0.338611 1.000000 1.000000 \n", + "ZTF17aaaivsr 0.343258 0.305270 0.999999 0.821926 \n", + "ZTF17aaaizej 0.675934 0.404425 1.000000 1.000000 \n", + "ZTF17aaajizt NaN 0.462060 NaN 0.993290 \n", + "ZTF17aaajlul 0.407220 0.318167 1.000000 1.000000 \n", + "ZTF17aaakstv 0.157715 0.294221 0.987377 0.999638 \n", + "ZTF17aaanxog 0.578554 0.433526 0.998814 0.978531 \n", + "ZTF17aaanyaa 0.272050 0.243652 1.000000 1.000000 \n", + "\n", + " Autocor_length_1 Autocor_length_2 Beyond1Std_1 Beyond1Std_2 \\\n", + "oid \n", + "ZTF17aaaemke 1.0 1.0 0.450000 0.363636 \n", + "ZTF17aaafyya 1.0 1.0 0.404580 0.391753 \n", + "ZTF17aaageae 1.0 1.0 0.340000 0.105263 \n", + "ZTF17aaaivsr 1.0 1.0 0.371429 0.400000 \n", + "ZTF17aaaizej 1.0 1.0 0.325000 0.311111 \n", + "ZTF17aaajizt NaN 1.0 NaN 0.260870 \n", + "ZTF17aaajlul 1.0 1.0 0.425000 0.408163 \n", + "ZTF17aaakstv 2.0 1.0 0.375000 0.416667 \n", + "ZTF17aaanxog 1.0 1.0 0.388889 0.384615 \n", + "ZTF17aaanyaa 1.0 1.0 0.470588 0.384615 \n", + "\n", + " Con_1 Con_2 ... n_non_det_after_fid_1 \\\n", + "oid ... \n", + "ZTF17aaaemke 0.026316 0.0 ... 105 \n", + "ZTF17aaafyya 0.000000 0.0 ... 89 \n", + "ZTF17aaageae 0.000000 0.0 ... 133 \n", + "ZTF17aaaivsr 0.000000 0.0 ... 24 \n", + "ZTF17aaaizej 0.000000 0.0 ... 8 \n", + "ZTF17aaajizt NaN 0.0 ... NaN \n", + "ZTF17aaajlul 0.000000 0.0 ... 28 \n", + "ZTF17aaakstv 0.000000 0.0 ... 32 \n", + "ZTF17aaanxog 0.000000 0.0 ... 61 \n", + "ZTF17aaanyaa 0.000000 0.0 ... 21 \n", + "\n", + " n_non_det_after_fid_2 n_non_det_before_fid_1 \\\n", + "oid \n", + "ZTF17aaaemke 103 7 \n", + "ZTF17aaafyya 82 1 \n", + "ZTF17aaageae 177 5 \n", + "ZTF17aaaivsr 71 2 \n", + "ZTF17aaaizej 16 1 \n", + "ZTF17aaajizt 18 NaN \n", + "ZTF17aaajlul 32 0 \n", + "ZTF17aaakstv 50 1 \n", + "ZTF17aaanxog 59 2 \n", + "ZTF17aaanyaa 23 0 \n", + "\n", + " n_non_det_before_fid_2 n_pos_1 n_pos_2 positive_fraction_1 \\\n", + "oid \n", + "ZTF17aaaemke 7 5 12 0.125000 \n", + "ZTF17aaafyya 1 52 43 0.396947 \n", + "ZTF17aaageae 1 28 16 0.560000 \n", + "ZTF17aaaivsr 0 13 4 0.371429 \n", + "ZTF17aaaizej 1 14 8 0.350000 \n", + "ZTF17aaajizt 1 NaN 10 NaN \n", + "ZTF17aaajlul 0 10 8 0.250000 \n", + "ZTF17aaakstv 1 4 4 0.500000 \n", + "ZTF17aaanxog 3 7 2 0.388889 \n", + "ZTF17aaanyaa 0 6 9 0.176471 \n", + "\n", + " positive_fraction_2 rb sgscore1 \n", + "oid \n", + "ZTF17aaaemke 0.363636 0.880000 0.998750 \n", + "ZTF17aaafyya 0.443299 0.787857 0.996250 \n", + "ZTF17aaageae 0.421053 0.749286 1.000000 \n", + "ZTF17aaaivsr 0.160000 0.791667 0.992012 \n", + "ZTF17aaaizej 0.177778 0.843333 1.000000 \n", + "ZTF17aaajizt 0.434783 0.763333 0.987768 \n", + "ZTF17aaajlul 0.163265 0.817143 1.000000 \n", + "ZTF17aaakstv 0.333333 0.686905 0.998750 \n", + "ZTF17aaanxog 0.153846 0.791429 0.996875 \n", + "ZTF17aaanyaa 0.346154 0.891429 1.000000 \n", + "\n", + "[10 rows x 172 columns]" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# The result is a dataframe with the features\n", + "features" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To this features we add some other features, more specific WISE colors." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/examples/Inference with Hierarchical Model.ipynb b/examples/Inference with Hierarchical Model.ipynb new file mode 100644 index 0000000..56f8bb7 --- /dev/null +++ b/examples/Inference with Hierarchical Model.ipynb @@ -0,0 +1,844 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import warnings\n", + "\n", + "from lc_classifier.classifier.models import HierarchicalRandomForest \n", + "warnings.filterwarnings('ignore')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Loading data" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "features = pd.read_parquet(\"data/features_sample.parquet\")" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MHAOV_Period_1MHAOV_Period_2Amplitude_1Amplitude_2AndersonDarling_1AndersonDarling_2Autocor_length_1Autocor_length_2Beyond1Std_1Beyond1Std_2...W4W1-W2W2-W3r-W3r-W2g-W3g-W2g-r_mldelta_period_1delta_period_2
oid
ZTF17aaaaavn0.1060210.6135720.0789120.0978600.8479020.6737122.01.00.3809520.428571...9.426-0.3011.4914.2397092.7487095.3504753.8594751.1107660.0179904.895612e-01
ZTF17aaaadjh0.2334430.2334540.4939800.4637651.0000001.0000001.01.00.2790700.293103...8.905-0.1252.3804.5339042.1539045.2814452.9014450.7475410.0000115.197596e-08
ZTF17aaaadwo0.0600490.2168730.0907080.1965420.9036250.9938061.01.00.3684210.285714...8.848-0.0651.6024.3443522.7423525.4959743.8939741.1516220.4230442.662193e-01
ZTF17aaaajjv526.315748555.5555290.5396000.5662761.0000001.00000014.020.00.4195800.481203...5.957-0.0400.9977.7656966.76869610.1935319.1965312.42783529.2396371.437226e-04
ZTF17aaaakjt0.1482650.1482650.5648670.5724490.9999070.9905011.01.00.3384620.350000...9.098-0.0270.0273.1612383.1342384.2988264.2718261.1375880.0000022.169809e-06
\n", + "

5 rows × 183 columns

\n", + "
" + ], + "text/plain": [ + " MHAOV_Period_1 MHAOV_Period_2 Amplitude_1 Amplitude_2 \\\n", + "oid \n", + "ZTF17aaaaavn 0.106021 0.613572 0.078912 0.097860 \n", + "ZTF17aaaadjh 0.233443 0.233454 0.493980 0.463765 \n", + "ZTF17aaaadwo 0.060049 0.216873 0.090708 0.196542 \n", + "ZTF17aaaajjv 526.315748 555.555529 0.539600 0.566276 \n", + "ZTF17aaaakjt 0.148265 0.148265 0.564867 0.572449 \n", + "\n", + " AndersonDarling_1 AndersonDarling_2 Autocor_length_1 \\\n", + "oid \n", + "ZTF17aaaaavn 0.847902 0.673712 2.0 \n", + "ZTF17aaaadjh 1.000000 1.000000 1.0 \n", + "ZTF17aaaadwo 0.903625 0.993806 1.0 \n", + "ZTF17aaaajjv 1.000000 1.000000 14.0 \n", + "ZTF17aaaakjt 0.999907 0.990501 1.0 \n", + "\n", + " Autocor_length_2 Beyond1Std_1 Beyond1Std_2 ... W4 W1-W2 \\\n", + "oid ... \n", + "ZTF17aaaaavn 1.0 0.380952 0.428571 ... 9.426 -0.301 \n", + "ZTF17aaaadjh 1.0 0.279070 0.293103 ... 8.905 -0.125 \n", + "ZTF17aaaadwo 1.0 0.368421 0.285714 ... 8.848 -0.065 \n", + "ZTF17aaaajjv 20.0 0.419580 0.481203 ... 5.957 -0.040 \n", + "ZTF17aaaakjt 1.0 0.338462 0.350000 ... 9.098 -0.027 \n", + "\n", + " W2-W3 r-W3 r-W2 g-W3 g-W2 g-r_ml \\\n", + "oid \n", + "ZTF17aaaaavn 1.491 4.239709 2.748709 5.350475 3.859475 1.110766 \n", + "ZTF17aaaadjh 2.380 4.533904 2.153904 5.281445 2.901445 0.747541 \n", + "ZTF17aaaadwo 1.602 4.344352 2.742352 5.495974 3.893974 1.151622 \n", + "ZTF17aaaajjv 0.997 7.765696 6.768696 10.193531 9.196531 2.427835 \n", + "ZTF17aaaakjt 0.027 3.161238 3.134238 4.298826 4.271826 1.137588 \n", + "\n", + " delta_period_1 delta_period_2 \n", + "oid \n", + "ZTF17aaaaavn 0.017990 4.895612e-01 \n", + "ZTF17aaaadjh 0.000011 5.197596e-08 \n", + "ZTF17aaaadwo 0.423044 2.662193e-01 \n", + "ZTF17aaaajjv 29.239637 1.437226e-04 \n", + "ZTF17aaaakjt 0.000002 2.169809e-06 \n", + "\n", + "[5 rows x 183 columns]" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "features.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Creating model " + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "model = HierarchicalRandomForest({})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Loading pre-trained model" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/package/lc_classifier/classifier/pickles/hierarchical_random_forest_1.0.0\n" + ] + } + ], + "source": [ + "# The container comes with the pre-trained model downloaded in a specific path\n", + "print(model.MODEL_PICKLE_PATH)\n", + "# To load the models we call the load_model method with that path\n", + "model.load_model(model.MODEL_PICKLE_PATH)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Predict" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AGNBlazarCV/NovaQSOYSOCEPDSCTELPVPeriodic-OtherRRLSLSNSNIISNIaSNIbc
oid
ZTF17aaaaavn0.0092400.0246400.0455840.0104720.2180640.0365040.0189280.1568320.0216320.4299360.0121680.0052800.0048320.0022080.003680
ZTF17aaaadjh0.0000840.0002520.0118440.0000000.0018200.0966280.0256360.6586480.0039440.0473280.1538160.0000000.0000000.0000000.000000
ZTF17aaaadwo0.0007440.0032240.0344720.0012400.0843200.0751640.0192280.2132560.0139840.5296440.0227240.0006800.0005600.0003040.000456
ZTF17aaaajjv0.0006000.0014400.0004800.0000000.0174800.0019520.0000000.0000000.9740480.0000000.0000000.0021200.0008960.0003600.000624
ZTF17aaaakjt0.0000720.0012960.0115200.0000720.0230400.2467840.0231360.4858560.0096400.0809760.1176080.0000000.0000000.0000000.000000
................................................
ZTF17aabtkpv0.0107520.0217600.0366080.0099840.0488960.0346920.0115640.2329320.0115640.5220320.0132160.0107640.0144440.0083720.012420
ZTF17aabuknx0.0000080.0001120.0032160.0000240.0006400.0656040.0636160.5924240.0000000.1491000.1232560.0007880.0005600.0002720.000380
ZTF17aabulhm0.0032480.0058000.0320160.0018560.0150800.0243360.1909440.2714400.0037440.3837600.0617760.0017280.0016680.0014400.001164
ZTF17aabuovj0.0000000.0000000.0000000.0000000.0000000.0300000.0840000.7940000.0020000.0280000.0620000.0000000.0000000.0000000.000000
ZTF17aabuuxj0.0000120.0000400.0008840.0000000.0010640.0878240.0179640.7285400.0019960.0778440.0838320.0000000.0000000.0000000.000000
\n", + "

100 rows × 15 columns

\n", + "
" + ], + "text/plain": [ + " AGN Blazar CV/Nova QSO YSO CEP \\\n", + "oid \n", + "ZTF17aaaaavn 0.009240 0.024640 0.045584 0.010472 0.218064 0.036504 \n", + "ZTF17aaaadjh 0.000084 0.000252 0.011844 0.000000 0.001820 0.096628 \n", + "ZTF17aaaadwo 0.000744 0.003224 0.034472 0.001240 0.084320 0.075164 \n", + "ZTF17aaaajjv 0.000600 0.001440 0.000480 0.000000 0.017480 0.001952 \n", + "ZTF17aaaakjt 0.000072 0.001296 0.011520 0.000072 0.023040 0.246784 \n", + "... ... ... ... ... ... ... \n", + "ZTF17aabtkpv 0.010752 0.021760 0.036608 0.009984 0.048896 0.034692 \n", + "ZTF17aabuknx 0.000008 0.000112 0.003216 0.000024 0.000640 0.065604 \n", + "ZTF17aabulhm 0.003248 0.005800 0.032016 0.001856 0.015080 0.024336 \n", + "ZTF17aabuovj 0.000000 0.000000 0.000000 0.000000 0.000000 0.030000 \n", + "ZTF17aabuuxj 0.000012 0.000040 0.000884 0.000000 0.001064 0.087824 \n", + "\n", + " DSCT E LPV Periodic-Other RRL \\\n", + "oid \n", + "ZTF17aaaaavn 0.018928 0.156832 0.021632 0.429936 0.012168 \n", + "ZTF17aaaadjh 0.025636 0.658648 0.003944 0.047328 0.153816 \n", + "ZTF17aaaadwo 0.019228 0.213256 0.013984 0.529644 0.022724 \n", + "ZTF17aaaajjv 0.000000 0.000000 0.974048 0.000000 0.000000 \n", + "ZTF17aaaakjt 0.023136 0.485856 0.009640 0.080976 0.117608 \n", + "... ... ... ... ... ... \n", + "ZTF17aabtkpv 0.011564 0.232932 0.011564 0.522032 0.013216 \n", + "ZTF17aabuknx 0.063616 0.592424 0.000000 0.149100 0.123256 \n", + "ZTF17aabulhm 0.190944 0.271440 0.003744 0.383760 0.061776 \n", + "ZTF17aabuovj 0.084000 0.794000 0.002000 0.028000 0.062000 \n", + "ZTF17aabuuxj 0.017964 0.728540 0.001996 0.077844 0.083832 \n", + "\n", + " SLSN SNII SNIa SNIbc \n", + "oid \n", + "ZTF17aaaaavn 0.005280 0.004832 0.002208 0.003680 \n", + "ZTF17aaaadjh 0.000000 0.000000 0.000000 0.000000 \n", + "ZTF17aaaadwo 0.000680 0.000560 0.000304 0.000456 \n", + "ZTF17aaaajjv 0.002120 0.000896 0.000360 0.000624 \n", + "ZTF17aaaakjt 0.000000 0.000000 0.000000 0.000000 \n", + "... ... ... ... ... \n", + "ZTF17aabtkpv 0.010764 0.014444 0.008372 0.012420 \n", + "ZTF17aabuknx 0.000788 0.000560 0.000272 0.000380 \n", + "ZTF17aabulhm 0.001728 0.001668 0.001440 0.001164 \n", + "ZTF17aabuovj 0.000000 0.000000 0.000000 0.000000 \n", + "ZTF17aabuuxj 0.000000 0.000000 0.000000 0.000000 \n", + "\n", + "[100 rows x 15 columns]" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Getting probabilities for each object\n", + "probabilities = model.predict_proba(features)\n", + "probabilities" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
classALeRCE
oid
ZTF17aaaaavnPeriodic-Other
ZTF17aaaadjhE
ZTF17aaaadwoPeriodic-Other
ZTF17aaaajjvLPV
ZTF17aaaakjtE
......
ZTF17aabtkpvPeriodic-Other
ZTF17aabuknxE
ZTF17aabulhmPeriodic-Other
ZTF17aabuovjE
ZTF17aabuuxjE
\n", + "

100 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " classALeRCE\n", + "oid \n", + "ZTF17aaaaavn Periodic-Other\n", + "ZTF17aaaadjh E\n", + "ZTF17aaaadwo Periodic-Other\n", + "ZTF17aaaajjv LPV\n", + "ZTF17aaaakjt E\n", + "... ...\n", + "ZTF17aabtkpv Periodic-Other\n", + "ZTF17aabuknx E\n", + "ZTF17aabulhm Periodic-Other\n", + "ZTF17aabuovj E\n", + "ZTF17aabuuxj E\n", + "\n", + "[100 rows x 1 columns]" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Getting classes\n", + "classes = model.predict(features)\n", + "classes" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'hierarchical': {'top': {'Periodic': 0.676,\n", + " 'Stochastic': 0.308,\n", + " 'Transient': 0.016},\n", + " 'children': {'Stochastic': {'AGN': 0.03,\n", + " 'Blazar': 0.08,\n", + " 'CV/Nova': 0.148,\n", + " 'QSO': 0.034,\n", + " 'YSO': 0.708},\n", + " 'Periodic': {'CEP': 0.054,\n", + " 'DSCT': 0.028,\n", + " 'E': 0.232,\n", + " 'LPV': 0.032,\n", + " 'Periodic-Other': 0.636,\n", + " 'RRL': 0.018},\n", + " 'Transient': {'SLSN': 0.33, 'SNII': 0.302, 'SNIa': 0.138, 'SNIbc': 0.23}}},\n", + " 'probabilities': {'AGN': 0.00924,\n", + " 'Blazar': 0.02464,\n", + " 'CV/Nova': 0.045584,\n", + " 'QSO': 0.010472,\n", + " 'YSO': 0.21806399999999998,\n", + " 'CEP': 0.036504,\n", + " 'DSCT': 0.018928,\n", + " 'E': 0.15683200000000003,\n", + " 'LPV': 0.021632000000000002,\n", + " 'Periodic-Other': 0.42993600000000004,\n", + " 'RRL': 0.012168,\n", + " 'SLSN': 0.00528,\n", + " 'SNII': 0.004832,\n", + " 'SNIa': 0.0022080000000000003,\n", + " 'SNIbc': 0.00368},\n", + " 'class': 'Periodic-Other'}" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Getting tree, only works for one object\n", + "model.predict_in_pipeline(features.iloc[0])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/examples/compute_features_from_light_curves.ipynb b/examples/compute_features_from_light_curves.ipynb deleted file mode 100644 index 78735ba..0000000 --- a/examples/compute_features_from_light_curves.ipynb +++ /dev/null @@ -1,629 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "import pandas as pd\n", - "from lc_classifier.features import CustomHierarchicalExtractor" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - " /home/ignacio/miniconda3/envs/lc-classifier/lib/python3.7/site-packages/pyarrow/pandas_compat.py:708: FutureWarning:.labels was deprecated in version 0.24.0. Use .codes instead.\n", - " /home/ignacio/miniconda3/envs/lc-classifier/lib/python3.7/site-packages/pyarrow/pandas_compat.py:735: FutureWarning:the 'labels' keyword is deprecated, use 'codes' instead\n", - " /home/ignacio/miniconda3/envs/lc-classifier/lib/python3.7/site-packages/pyarrow/pandas_compat.py:752: FutureWarning:.labels was deprecated in version 0.24.0. Use .codes instead.\n" - ] - } - ], - "source": [ - "detections = pd.read_parquet('detections_sample.parquet')\n", - "non_detections = pd.read_parquet('non_detections_sample.parquet')\n", - "object_info = pd.read_parquet('object_info_sample.parquet').set_index('objectId')" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "detections.index.name = 'oid'\n", - "non_detections.index.name = 'oid'\n", - "object_info.index.name = 'oid'" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "non_detections['mjd'] = non_detections['jd'] - 2400000.5" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "doid = detections.index.unique().values\n", - "ndoid = non_detections.index.unique().values\n", - "ooid = object_info.index.unique().values\n", - "object_info = object_info.loc[doid]" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "features_computer = CustomHierarchicalExtractor()" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - " /home/ignacio/Projects/lc_classifier/lc_classifier/features/extractors/mhps_extractor.py:54: RuntimeWarning:invalid value encountered in true_divide\n", - " /home/ignacio/Projects/turbo-fats/turbofats/features/structure_function.py:88: RuntimeWarning:invalid value encountered in sqrt\n", - " /home/ignacio/Projects/turbo-fats/turbofats/features/structure_function.py:97: RuntimeWarning:invalid value encountered in greater\n", - " /home/ignacio/Projects/turbo-fats/turbofats/features/structure_function.py:88: RuntimeWarning:invalid value encountered in sqrt\n", - " /home/ignacio/Projects/turbo-fats/turbofats/features/structure_function.py:97: RuntimeWarning:invalid value encountered in greater\n", - " /home/ignacio/Projects/turbo-fats/turbofats/features/structure_function.py:119: RuntimeWarning:invalid value encountered in log10\n", - " /home/ignacio/Projects/turbo-fats/turbofats/features/structure_function.py:120: RuntimeWarning:invalid value encountered in log10\n", - " /home/ignacio/Projects/turbo-fats/turbofats/FeatureSpace.py:58: RankWarning:Polyfit may be poorly conditioned\n", - " /home/ignacio/Projects/turbo-fats/turbofats/FeatureSpace.py:58: RankWarning:Polyfit may be poorly conditioned\n", - " /home/ignacio/Projects/turbo-fats/turbofats/FeatureSpace.py:58: RankWarning:Polyfit may be poorly conditioned\n", - " /home/ignacio/Projects/turbo-fats/turbofats/FeatureSpace.py:58: RankWarning:Polyfit may be poorly conditioned\n", - " /home/ignacio/Projects/turbo-fats/turbofats/FeatureSpace.py:58: RankWarning:Polyfit may be poorly conditioned\n", - " /home/ignacio/Projects/lc_classifier/lc_classifier/features/core/base.py:112: FutureWarning:Sorting because non-concatenation axis is not aligned. A future version\n", - "of pandas will change to not sort by default.\n", - "\n", - "To accept the future behavior, pass 'sort=False'.\n", - "\n", - "To retain the current behavior and silence the warning, pass 'sort=True'.\n", - "\n", - " /home/ignacio/miniconda3/envs/lc-classifier/lib/python3.7/site-packages/scipy/optimize/minpack.py:829: OptimizeWarning:Covariance of the parameters could not be estimated\n", - " /home/ignacio/Projects/P4J/P4J/periodograms.py:72: RuntimeWarning:divide by zero encountered in true_divide\n", - " /home/ignacio/Projects/P4J/P4J/periodograms.py:72: RuntimeWarning:invalid value encountered in true_divide\n", - " /home/ignacio/Projects/P4J/P4J/base_periodogram.py:50: RuntimeWarning:invalid value encountered in greater\n", - "ERROR:root:TypeError exception in PeriodExtractor: oid ZTF18abefhat\n", - "'NoneType' object is not iterable\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Warning: Not enough local maxima found in the periodogram\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "ERROR:root:TypeError exception in PeriodExtractor: oid ZTF18abvtdzm\n", - "'NoneType' object is not iterable\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Warning: Not enough local maxima found in the periodogram\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "ERROR:root:TypeError exception in PeriodExtractor: oid ZTF18aaapdpk\n", - "'NoneType' object is not iterable\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Warning: Not enough local maxima found in the periodogram\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "ERROR:root:TypeError exception in PeriodExtractor: oid ZTF18abtrvxi\n", - "'NoneType' object is not iterable\n", - "ERROR:root:TypeError exception in PeriodExtractor: oid ZTF19abcgdkr\n", - "'NoneType' object is not iterable\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Warning: Not enough local maxima found in the periodogram\n", - "Warning: Not enough local maxima found in the periodogram\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "ERROR:root:TypeError exception in PeriodExtractor: oid ZTF19abuxfqt\n", - "'NoneType' object is not iterable\n", - "ERROR:root:TypeError exception in PeriodExtractor: oid ZTF19abvflof\n", - "'NoneType' object is not iterable\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Warning: Not enough local maxima found in the periodogram\n", - "Warning: Not enough local maxima found in the periodogram\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "ERROR:root:TypeError exception in PeriodExtractor: oid ZTF19aadlzzh\n", - "'NoneType' object is not iterable\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Warning: Not enough local maxima found in the periodogram\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "ERROR:root:TypeError exception in PeriodExtractor: oid ZTF19adcfvpg\n", - "'NoneType' object is not iterable\n", - "ERROR:root:TypeError exception in PeriodExtractor: oid ZTF19aannrmo\n", - "'NoneType' object is not iterable\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Warning: Not enough local maxima found in the periodogram\n", - "Warning: Not enough local maxima found in the periodogram\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "ERROR:root:TypeError exception in PeriodExtractor: oid ZTF20aavxphg\n", - "'NoneType' object is not iterable\n", - "ERROR:root:TypeError exception in PeriodExtractor: oid ZTF20abchqmp\n", - "'NoneType' object is not iterable\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Warning: Not enough local maxima found in the periodogram\n", - "Warning: Not enough local maxima found in the periodogram\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "ERROR:root:PeriodPowerRateExtractor: period is not available for ZTF18abefhat\n", - "ERROR:root:PeriodPowerRateExtractor: period is not available for ZTF18abvtdzm\n", - "ERROR:root:PeriodPowerRateExtractor: period is not available for ZTF18aaapdpk\n", - "ERROR:root:PeriodPowerRateExtractor: period is not available for ZTF18abtrvxi\n", - "ERROR:root:PeriodPowerRateExtractor: period is not available for ZTF19abcgdkr\n", - "ERROR:root:PeriodPowerRateExtractor: period is not available for ZTF19abuxfqt\n", - "ERROR:root:PeriodPowerRateExtractor: period is not available for ZTF19abvflof\n", - "ERROR:root:PeriodPowerRateExtractor: period is not available for ZTF19aadlzzh\n", - "ERROR:root:PeriodPowerRateExtractor: period is not available for ZTF19adcfvpg\n", - "ERROR:root:PeriodPowerRateExtractor: period is not available for ZTF19aannrmo\n", - "ERROR:root:PeriodPowerRateExtractor: period is not available for ZTF20aavxphg\n", - "ERROR:root:PeriodPowerRateExtractor: period is not available for ZTF20abchqmp\n", - " /home/ignacio/Projects/lc_classifier/lc_classifier/features/extractors/folded_kim_extractor.py:61: RuntimeWarning:invalid value encountered in true_divide\n", - " /home/ignacio/Projects/lc_classifier/lc_classifier/features/extractors/folded_kim_extractor.py:64: RuntimeWarning:divide by zero encountered in double_scalars\n", - " /home/ignacio/Projects/lc_classifier/lc_classifier/features/extractors/folded_kim_extractor.py:65: RuntimeWarning:invalid value encountered in double_scalars\n", - " /home/ignacio/Projects/lc_classifier/lc_classifier/features/extractors/folded_kim_extractor.py:55: RuntimeWarning:invalid value encountered in remainder\n", - "ERROR:root:KeyError in HarmonicsExtractor, period is not available: oid ZTF18abefhat\n", - "SVD did not converge\n", - "ERROR:root:KeyError in HarmonicsExtractor, period is not available: oid ZTF18abvtdzm\n", - "SVD did not converge\n", - "ERROR:root:KeyError in HarmonicsExtractor, period is not available: oid ZTF18aaapdpk\n", - "SVD did not converge\n", - "ERROR:root:KeyError in HarmonicsExtractor, period is not available: oid ZTF18abtrvxi\n", - "SVD did not converge\n", - "ERROR:root:KeyError in HarmonicsExtractor, period is not available: oid ZTF19abcgdkr\n", - "SVD did not converge\n", - " /home/ignacio/miniconda3/envs/lc-classifier/lib/python3.7/site-packages/numpy/linalg/linalg.py:1965: RuntimeWarning:invalid value encountered in greater\n", - " /home/ignacio/Projects/lc_classifier/lc_classifier/features/extractors/harmonics_extractor.py:65: RuntimeWarning:invalid value encountered in remainder\n", - "ERROR:root:KeyError in HarmonicsExtractor, period is not available: oid ZTF19abvflof\n", - "SVD did not converge\n", - "ERROR:root:KeyError in HarmonicsExtractor, period is not available: oid ZTF19aadlzzh\n", - "SVD did not converge\n", - "ERROR:root:KeyError in HarmonicsExtractor, period is not available: oid ZTF19adcfvpg\n", - "SVD did not converge\n", - "ERROR:root:KeyError in HarmonicsExtractor, period is not available: oid ZTF19aannrmo\n", - "SVD did not converge\n", - "ERROR:root:KeyError in HarmonicsExtractor, period is not available: oid ZTF20aavxphg\n", - "SVD did not converge\n", - "ERROR:root:KeyError in HarmonicsExtractor, period is not available: oid ZTF18abefhat\n", - "SVD did not converge\n", - "ERROR:root:KeyError in HarmonicsExtractor, period is not available: oid ZTF18abvtdzm\n", - "SVD did not converge\n", - "ERROR:root:KeyError in HarmonicsExtractor, period is not available: oid ZTF18aaapdpk\n", - "SVD did not converge\n", - "ERROR:root:KeyError in HarmonicsExtractor, period is not available: oid ZTF18abtrvxi\n", - "SVD did not converge\n", - "ERROR:root:KeyError in HarmonicsExtractor, period is not available: oid ZTF19abuxfqt\n", - "SVD did not converge\n", - "ERROR:root:KeyError in HarmonicsExtractor, period is not available: oid ZTF19aadlzzh\n", - "SVD did not converge\n", - "ERROR:root:KeyError in HarmonicsExtractor, period is not available: oid ZTF19adcfvpg\n", - "SVD did not converge\n", - "ERROR:root:KeyError in HarmonicsExtractor, period is not available: oid ZTF19aannrmo\n", - "SVD did not converge\n", - "ERROR:root:KeyError in HarmonicsExtractor, period is not available: oid ZTF20abchqmp\n", - "SVD did not converge\n" - ] - } - ], - "source": [ - "features = features_computer.compute_features(\n", - " detections=detections,\n", - " non_detections=non_detections,\n", - " objects=object_info)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Amplitude_1\n", - "Amplitude_2\n", - "AndersonDarling_1\n", - "AndersonDarling_2\n", - "Autocor_length_1\n", - "Autocor_length_2\n", - "Beyond1Std_1\n", - "Beyond1Std_2\n", - "Con_1\n", - "Con_2\n", - "Eta_e_1\n", - "Eta_e_2\n", - "ExcessVar_1\n", - "ExcessVar_2\n", - "GP_DRW_sigma_1\n", - "GP_DRW_sigma_2\n", - "GP_DRW_tau_1\n", - "GP_DRW_tau_2\n", - "Gskew_1\n", - "Gskew_2\n", - "Harmonics_mag_1_1\n", - "Harmonics_mag_1_2\n", - "Harmonics_mag_2_1\n", - "Harmonics_mag_2_2\n", - "Harmonics_mag_3_1\n", - "Harmonics_mag_3_2\n", - "Harmonics_mag_4_1\n", - "Harmonics_mag_4_2\n", - "Harmonics_mag_5_1\n", - "Harmonics_mag_5_2\n", - "Harmonics_mag_6_1\n", - "Harmonics_mag_6_2\n", - "Harmonics_mag_7_1\n", - "Harmonics_mag_7_2\n", - "Harmonics_mse_1\n", - "Harmonics_mse_2\n", - "Harmonics_phase_2_1\n", - "Harmonics_phase_2_2\n", - "Harmonics_phase_3_1\n", - "Harmonics_phase_3_2\n", - "Harmonics_phase_4_1\n", - "Harmonics_phase_4_2\n", - "Harmonics_phase_5_1\n", - "Harmonics_phase_5_2\n", - "Harmonics_phase_6_1\n", - "Harmonics_phase_6_2\n", - "Harmonics_phase_7_1\n", - "Harmonics_phase_7_2\n", - "IAR_phi_1\n", - "IAR_phi_2\n", - "LinearTrend_1\n", - "LinearTrend_2\n", - "MHPS_PN_flag_1\n", - "MHPS_PN_flag_2\n", - "MHPS_high_1\n", - "MHPS_high_2\n", - "MHPS_low_1\n", - "MHPS_low_2\n", - "MHPS_non_zero_1\n", - "MHPS_non_zero_2\n", - "MHPS_ratio_1\n", - "MHPS_ratio_2\n", - "MaxSlope_1\n", - "MaxSlope_2\n", - "Mean_1\n", - "Mean_2\n", - "Meanvariance_1\n", - "Meanvariance_2\n", - "MedianAbsDev_1\n", - "MedianAbsDev_2\n", - "MedianBRP_1\n", - "MedianBRP_2\n", - "Multiband_period\n", - "PPE\n", - "PairSlopeTrend_1\n", - "PairSlopeTrend_2\n", - "PercentAmplitude_1\n", - "PercentAmplitude_2\n", - "Period_band_1\n", - "Period_band_2\n", - "Power_rate_1/2\n", - "Power_rate_1/3\n", - "Power_rate_1/4\n", - "Power_rate_2\n", - "Power_rate_3\n", - "Power_rate_4\n", - "Psi_CS_1\n", - "Psi_CS_2\n", - "Psi_eta_1\n", - "Psi_eta_2\n", - "Pvar_1\n", - "Pvar_2\n", - "Q31_1\n", - "Q31_2\n", - "Rcs_1\n", - "Rcs_2\n", - "SF_ML_amplitude_1\n", - "SF_ML_amplitude_2\n", - "SF_ML_gamma_1\n", - "SF_ML_gamma_2\n", - "SPM_A_1\n", - "SPM_A_2\n", - "SPM_beta_1\n", - "SPM_beta_2\n", - "SPM_chi_1\n", - "SPM_chi_2\n", - "SPM_gamma_1\n", - "SPM_gamma_2\n", - "SPM_t0_1\n", - "SPM_t0_2\n", - "SPM_tau_fall_1\n", - "SPM_tau_fall_2\n", - "SPM_tau_rise_1\n", - "SPM_tau_rise_2\n", - "Skew_1\n", - "Skew_2\n", - "SmallKurtosis_1\n", - "SmallKurtosis_2\n", - "Std_1\n", - "Std_2\n", - "StetsonK_1\n", - "StetsonK_2\n", - "delta_mag_fid_1\n", - "delta_mag_fid_2\n", - "delta_mjd_fid_1\n", - "delta_mjd_fid_2\n", - "delta_period_1\n", - "delta_period_2\n", - "dmag_first_det_fid_1\n", - "dmag_first_det_fid_2\n", - "dmag_non_det_fid_1\n", - "dmag_non_det_fid_2\n", - "first_mag_1\n", - "first_mag_2\n", - "g-r_max\n", - "g-r_max_corr\n", - "g-r_mean\n", - "g-r_mean_corr\n", - "gal_b\n", - "gal_l\n", - "iqr_1\n", - "iqr_2\n", - "last_diffmaglim_before_fid_1\n", - "last_diffmaglim_before_fid_2\n", - "last_mjd_before_fid_1\n", - "last_mjd_before_fid_2\n", - "max_diffmaglim_after_fid_1\n", - "max_diffmaglim_after_fid_2\n", - "max_diffmaglim_before_fid_1\n", - "max_diffmaglim_before_fid_2\n", - "mean_mag_1\n", - "mean_mag_2\n", - "median_diffmaglim_after_fid_1\n", - "median_diffmaglim_after_fid_2\n", - "median_diffmaglim_before_fid_1\n", - "median_diffmaglim_before_fid_2\n", - "min_mag_1\n", - "min_mag_2\n", - "n_det_1\n", - "n_det_2\n", - "n_neg_1\n", - "n_neg_2\n", - "n_non_det_after_fid_1\n", - "n_non_det_after_fid_2\n", - "n_non_det_before_fid_1\n", - "n_non_det_before_fid_2\n", - "n_pos_1\n", - "n_pos_2\n", - "positive_fraction_1\n", - "positive_fraction_2\n", - "rb\n", - "sgscore1\n" - ] - } - ], - "source": [ - "for feature in features.columns:\n", - " print(feature)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "74 objects out of 485 don't have features ['ZTF19abcgdkr', 'ZTF20aawagxa', 'ZTF19adcfrvn', 'ZTF18aauipis', 'ZTF20aaxcthi', 'ZTF19aamnubo', 'ZTF19adcfnyc', 'ZTF18abpyasr', 'ZTF20aavxphg', 'ZTF19abqstat', 'ZTF19abcnlpp', 'ZTF20aapcoxv', 'ZTF20abchqmp', 'ZTF20aapnacw', 'ZTF18abzpdkq', 'ZTF20aawlwfx', 'ZTF18aashcfs', 'ZTF19abztrlv', 'ZTF19acbjnei', 'ZTF19aaskjoo', 'ZTF19aaujnth', 'ZTF19aamozya', 'ZTF19aadlzzh', 'ZTF18acibeqd', 'ZTF18aapuheo', 'ZTF19aabfrlc', 'ZTF18abahdyl', 'ZTF19acnyjtb', 'ZTF19adccmdp', 'ZTF20aapndzj', 'ZTF18aajhrvr', 'ZTF18accdeht', 'ZTF19aclqxzm', 'ZTF18abwbopw', 'ZTF18abvtdzm', 'ZTF19adcfvpg', 'ZTF19aacqfol', 'ZTF18adasgea', 'ZTF19aanevrw', 'ZTF20aaeibqo', 'ZTF20aaicdhx', 'ZTF20aafdsba', 'ZTF18abotaqw', 'ZTF18abjlial', 'ZTF20aazfvln', 'ZTF19abvflof', 'ZTF19aannrmo', 'ZTF18abddohm', 'ZTF18aaapdpk', 'ZTF18aboacia', 'ZTF19abxgenk', 'ZTF18aaukrps', 'ZTF19abuxfqt', 'ZTF18aajedfg', 'ZTF20aadvasi', 'ZTF19aapkrqp', 'ZTF19adcfowp', 'ZTF18aaaagoy', 'ZTF19abfqlih', 'ZTF18aceezaa', 'ZTF18acerlpm', 'ZTF18abhxigv', 'ZTF19aamwsnq', 'ZTF19aauethb', 'ZTF19acigeac', 'ZTF20aapojsa', 'ZTF18acwyxty', 'ZTF18abefhat', 'ZTF18abmouki', 'ZTF18aabcchi', 'ZTF18acezmjz', 'ZTF19aaailrw', 'ZTF18abtrvxi', 'ZTF18aaaomtu']\n" - ] - } - ], - "source": [ - "missing_oids = set(detections.index.unique().values) - set(features.index.values)\n", - "missing_oids = list(missing_oids)\n", - "print(len(missing_oids), \"objects out of\", \n", - " len(detections.index.unique()), \"don't have features\", missing_oids)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "ZTF19abcgdkr 12\n", - "ZTF20aawagxa 7\n", - "ZTF19adcfrvn 11\n", - "ZTF18aauipis 9\n", - "ZTF20aaxcthi 36\n", - "ZTF19aamnubo 34\n", - "ZTF19adcfnyc 56\n", - "ZTF18abpyasr 8\n", - "ZTF20aavxphg 46\n", - "ZTF19abqstat 56\n", - "ZTF19abcnlpp 10\n", - "ZTF20aapcoxv 12\n", - "ZTF20abchqmp 34\n", - "ZTF20aapnacw 10\n", - "ZTF18abzpdkq 20\n", - "ZTF20aawlwfx 38\n", - "ZTF18aashcfs 218\n", - "ZTF19abztrlv 9\n", - "ZTF19acbjnei 242\n", - "ZTF19aaskjoo 9\n", - "ZTF19aaujnth 11\n", - "ZTF19aamozya 36\n", - "ZTF19aadlzzh 112\n", - "ZTF18acibeqd 34\n", - "ZTF18aapuheo 84\n", - "ZTF19aabfrlc 32\n", - "ZTF18abahdyl 30\n", - "ZTF19acnyjtb 26\n", - "ZTF19adccmdp 9\n", - "ZTF20aapndzj 28\n", - "ZTF18aajhrvr 13\n", - "ZTF18accdeht 11\n", - "ZTF19aclqxzm 40\n", - "ZTF18abwbopw 8\n", - "ZTF18abvtdzm 36\n", - "ZTF19adcfvpg 76\n", - "ZTF19aacqfol 55\n", - "ZTF18adasgea 58\n", - "ZTF19aanevrw 10\n", - "ZTF20aaeibqo 82\n", - "ZTF20aaicdhx 8\n", - "ZTF20aafdsba 11\n", - "ZTF18abotaqw 14\n", - "ZTF18abjlial 30\n", - "ZTF20aazfvln 12\n", - "ZTF19abvflof 46\n", - "ZTF19aannrmo 42\n", - "ZTF18abddohm 106\n", - "ZTF18aaapdpk 44\n", - "ZTF18aboacia 10\n", - "ZTF19abxgenk 17\n", - "ZTF18aaukrps 204\n", - "ZTF19abuxfqt 50\n", - "ZTF18aajedfg 136\n", - "ZTF20aadvasi 7\n", - "ZTF19aapkrqp 7\n", - "ZTF19adcfowp 46\n", - "ZTF18aaaagoy 58\n", - "ZTF19abfqlih 12\n", - "ZTF18aceezaa 60\n", - "ZTF18acerlpm 60\n", - "ZTF18abhxigv 45\n", - "ZTF19aamwsnq 58\n", - "ZTF19aauethb 9\n", - "ZTF19acigeac 10\n", - "ZTF20aapojsa 10\n", - "ZTF18acwyxty 9\n", - "ZTF18abefhat 28\n", - "ZTF18abmouki 76\n", - "ZTF18aabcchi 9\n", - "ZTF18acezmjz 8\n", - "ZTF19aaailrw 20\n", - "ZTF18abtrvxi 162\n", - "ZTF18aaaomtu 222\n" - ] - } - ], - "source": [ - "for oid in missing_oids:\n", - " detections_from_oid = detections.loc[oid]\n", - " print(oid, len(detections_from_oid))" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.7" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/examples/data/detections_sample.parquet b/examples/data/detections_sample.parquet new file mode 100644 index 0000000..59abd07 Binary files /dev/null and b/examples/data/detections_sample.parquet differ diff --git a/examples/data/features_sample.parquet b/examples/data/features_sample.parquet new file mode 100644 index 0000000..33b8606 Binary files /dev/null and b/examples/data/features_sample.parquet differ diff --git a/examples/data/non_detections_sample.parquet b/examples/data/non_detections_sample.parquet new file mode 100644 index 0000000..1d4b101 Binary files /dev/null and b/examples/data/non_detections_sample.parquet differ diff --git a/examples/data/object_info_sample.parquet b/examples/data/object_info_sample.parquet new file mode 100644 index 0000000..b2d447d Binary files /dev/null and b/examples/data/object_info_sample.parquet differ diff --git a/examples/detections_sample.parquet b/examples/detections_sample.parquet deleted file mode 100644 index ab57111..0000000 Binary files a/examples/detections_sample.parquet and /dev/null differ diff --git a/examples/non_detections_sample.parquet b/examples/non_detections_sample.parquet deleted file mode 100644 index 4768aaf..0000000 Binary files a/examples/non_detections_sample.parquet and /dev/null differ diff --git a/examples/object_info_sample.parquet b/examples/object_info_sample.parquet deleted file mode 100644 index 2ace81d..0000000 Binary files a/examples/object_info_sample.parquet and /dev/null differ