From 591007022d698712bc3170e990a2af9cce35952b Mon Sep 17 00:00:00 2001 From: Julien Date: Wed, 24 Jul 2024 14:26:42 +0200 Subject: [PATCH] Add profiling capabilities (#398) * Add profiling capabilities * Update the image (#400) --- .github/workflows/run_test.yml | 2 +- fink_science/ad_features/processor.py | 5 ++++- fink_science/agn/classifier.py | 4 +++- fink_science/agn/feature_extraction.py | 19 +++++++++++++++++-- fink_science/agn/models.py | 2 +- fink_science/agn/processor.py | 5 ++++- fink_science/anomaly_detection/processor.py | 5 ++++- fink_science/asteroids/processor.py | 5 ++++- fink_science/cats/processor.py | 4 +++- fink_science/fast_transient_rate/processor.py | 5 ++++- fink_science/kilonova/processor.py | 6 +++++- fink_science/microlensing/processor.py | 6 +++++- fink_science/random_forest_snia/processor.py | 4 ++++ fink_science/slsn/classifier.py | 4 +++- fink_science/slsn/feature_extraction.py | 11 +++++++++-- fink_science/slsn/processor.py | 4 +++- fink_science/snn/processor.py | 7 ++++++- fink_science/t2/processor.py | 5 ++++- fink_science/xmatch/processor.py | 5 +++++ fink_science/xmatch/utils.py | 6 +++++- 20 files changed, 94 insertions(+), 20 deletions(-) diff --git a/.github/workflows/run_test.yml b/.github/workflows/run_test.yml index ef0f4ddb..c1056370 100644 --- a/.github/workflows/run_test.yml +++ b/.github/workflows/run_test.yml @@ -12,7 +12,7 @@ jobs: strategy: matrix: - container: ["julienpeloton/fink-ci:dev"] + container: ["julienpeloton/fink-ci:latest"] container: image: ${{ matrix.container }} diff --git a/fink_science/ad_features/processor.py b/fink_science/ad_features/processor.py index 41a27722..c0fc9712 100644 --- a/fink_science/ad_features/processor.py +++ b/fink_science/ad_features/processor.py @@ -1,4 +1,4 @@ -# Copyright 2020-2023 AstroLab Software +# Copyright 2020-2024 AstroLab Software # Author: Igor Beschastnov, Julien Peloton # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,6 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from line_profiler import profile + import logging import os @@ -75,6 +77,7 @@ def create_extractor(): columns_count = len(FEATURES_COLS) +@profile def extract_features_ad_raw( magpsf, jd, diff --git a/fink_science/agn/classifier.py b/fink_science/agn/classifier.py index b0632434..6a4b5112 100644 --- a/fink_science/agn/classifier.py +++ b/fink_science/agn/classifier.py @@ -1,4 +1,4 @@ -# Copyright 2022 Fink Software +# Copyright 2022-2024 Fink Software # Author: Etienne Russeil # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from line_profiler import profile import joblib import fink_science.agn.kernel as k @@ -63,6 +64,7 @@ def load_classifier(source): return clf +@profile def agn_classifier(data, source): """ Call the agn_classifier diff --git a/fink_science/agn/feature_extraction.py b/fink_science/agn/feature_extraction.py index 22b602a6..f4a155ac 100644 --- a/fink_science/agn/feature_extraction.py +++ b/fink_science/agn/feature_extraction.py @@ -1,4 +1,4 @@ -# Copyright 2022 Fink Software +# Copyright 2022-2024 Fink Software # Author: Etienne Russeil # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from line_profiler import profile import pandas as pd import fink_science.agn.models as mod @@ -24,7 +25,7 @@ import warnings import fink_science.agn.unit_examples as uex # noqa: F401 - +@profile def map_fid(ps): """Convert LSST filters to corresponding int value From u, g, r, i, z, Y to 0, 1, 2, 3, 4, 5 @@ -49,6 +50,7 @@ def map_fid(ps): return np.array(list(map(band_dict.get, ps))) +@profile def remove_nan(ps): """ funtion that remove nan values from list contains in columns @@ -76,6 +78,7 @@ def remove_nan(ps): return [np.array(_col)[mask].astype(type(_col[0])) for _col in ps] +@profile def mag2fluxcal_snana(magpsf: float, sigmapsf: float): """Conversion from magnitude to Fluxcal from SNANA manual Parameters @@ -110,6 +113,7 @@ def mag2fluxcal_snana(magpsf: float, sigmapsf: float): return fluxcal, fluxcal_err +@profile def compute_hostgal_dist(df): """Compute the distance to host galaxy column using simple Pythagoras computation. @@ -146,6 +150,7 @@ def compute_hostgal_dist(df): return hostgal_dist +@profile def convert_full_dataset(clean: pd.DataFrame): """ Convert all mag and mag err to flux and flux err @@ -179,6 +184,7 @@ def convert_full_dataset(clean: pd.DataFrame): return clean +@profile def format_data(df, source): """Transform filter names to ints and add distance to host galaxy column. @@ -242,6 +248,7 @@ def format_data(df, source): return df +@profile def keep_filter(ps, band): """ Funtion that removes points from other bands than the one specified @@ -277,6 +284,7 @@ def keep_filter(ps, band): return [np.array(_col)[mask].astype(type(_col[0])) for _col in ps] +@profile def translate(ps): """Translate a cjd list by substracting maxflux point @@ -308,6 +316,7 @@ def translate(ps): return ps["cjd"] - ps["cjd"][np.argmax(ps["cflux"])] +@profile def normalize(ps): """Normalize by dividing by a data frame of maximum @@ -418,6 +427,7 @@ def get_min(x, absolute=False): return x.min() +@profile def transform_data(formated, minimum_points, source): """Apply transformations for each filters on a flux formated dataset - Shift cjd so that the max flux point is at 0 @@ -500,6 +510,7 @@ def transform_data(formated, minimum_points, source): return all_transformed, valid +@profile def parametric_bump(ps, band): """Fit the lightcurves using the bump function. @@ -541,6 +552,7 @@ def parametric_bump(ps, band): return fit[0] +@profile def compute_color(ps, fit_func): """Compute the color of an alert by computing blue-red Proceed by virtually filling missing points of each band using the bump fit @@ -645,6 +657,7 @@ def compute_mean(x): return np.mean(x) +@profile def parametrise(all_transformed, source, target_col=""): """Extract parameters from a list of dataset outputed by the transform_data function. @@ -764,6 +777,7 @@ def parametrise(all_transformed, source, target_col=""): return all_features +@profile def merge_features(all_features, minimum_points, source, target_col=""): """Merge feature tables of all filters. Additionnaly compute color parameters : @@ -936,6 +950,7 @@ def merge_features(all_features, minimum_points, source, target_col=""): return ordered_features +@profile def get_probabilities(clf, features, valid): """Returns probabilty of being an AGN predicted by the classifier. diff --git a/fink_science/agn/models.py b/fink_science/agn/models.py index cd55ea32..c358b8c9 100644 --- a/fink_science/agn/models.py +++ b/fink_science/agn/models.py @@ -1,4 +1,4 @@ -# Copyright 2022 Fink Software +# Copyright 2022-2024 Fink Software # Author: Etienne Russeil # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/fink_science/agn/processor.py b/fink_science/agn/processor.py index ee243795..44d255fc 100644 --- a/fink_science/agn/processor.py +++ b/fink_science/agn/processor.py @@ -1,4 +1,4 @@ -# Copyright 2022 Fink Software +# Copyright 2022-2024 Fink Software # Author: Etienne Russeil # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from line_profiler import profile from fink_science.agn.classifier import agn_classifier from pyspark.sql.functions import pandas_udf @@ -23,6 +24,7 @@ @pandas_udf(DoubleType()) +@profile def agn_elasticc( diaObjectId, cmidPoinTai, cpsFlux, cpsFluxErr, cfilterName, ra, decl, hostgal_zphot, hostgal_zphot_err, hostgal_ra, hostgal_dec): @@ -84,6 +86,7 @@ def agn_elasticc( @pandas_udf(DoubleType()) +@profile def agn_ztf(objectId, jd, magpsf, sigmapsf, fid, ra, dec): """High level spark wrapper for the AGN classifier on ZTF data diff --git a/fink_science/anomaly_detection/processor.py b/fink_science/anomaly_detection/processor.py index 59cebcec..e039ff6f 100644 --- a/fink_science/anomaly_detection/processor.py +++ b/fink_science/anomaly_detection/processor.py @@ -1,4 +1,4 @@ -# Copyright 2020-2022 AstroLab Software +# Copyright 2020-2024 AstroLab Software # Author: Igor Beschastnov # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,6 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from line_profiler import profile + import logging import os import zipfile @@ -50,6 +52,7 @@ def anomaly_score(self, data_g, data_r): return (scores_g[-1] + scores_r[-1]) / 2 @pandas_udf(DoubleType()) +@profile def anomaly_score(lc_features, model_type="AADForest"): """Returns anomaly score for an observation diff --git a/fink_science/asteroids/processor.py b/fink_science/asteroids/processor.py index 6b97e4ed..44743004 100644 --- a/fink_science/asteroids/processor.py +++ b/fink_science/asteroids/processor.py @@ -1,4 +1,4 @@ -# Copyright 2020 AstroLab Software +# Copyright 2020-2024 AstroLab Software # Author: Julien Peloton # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,6 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from line_profiler import profile + from pyspark.sql.functions import pandas_udf, PandasUDFType from pyspark.sql.types import IntegerType @@ -24,6 +26,7 @@ from fink_science.tester import spark_unit_tests @pandas_udf(IntegerType(), PandasUDFType.SCALAR) +@profile def roid_catcher(jd, magpsf, ndethist, sgscore1, ssdistnr, distpsnr1): """ Determine if an alert is a potential Solar System object (SSO) using two criteria: diff --git a/fink_science/cats/processor.py b/fink_science/cats/processor.py index 50673a16..cc930b07 100644 --- a/fink_science/cats/processor.py +++ b/fink_science/cats/processor.py @@ -1,4 +1,4 @@ -# Copyright 2020-2022 AstroLab Software +# Copyright 2020-2024 AstroLab Software # Author: Andre Santos, Bernardo Fraga, Clecio de Bom # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from line_profiler import profile import os import numpy as np @@ -32,6 +33,7 @@ @pandas_udf(ArrayType(FloatType()), PandasUDFType.SCALAR) +@profile def predict_nn( midpointTai: pd.Series, psFlux: pd.Series, diff --git a/fink_science/fast_transient_rate/processor.py b/fink_science/fast_transient_rate/processor.py index 54f3aa32..bf0dac68 100644 --- a/fink_science/fast_transient_rate/processor.py +++ b/fink_science/fast_transient_rate/processor.py @@ -1,4 +1,4 @@ -# Copyright 2020-2023 AstroLab Software +# Copyright 2020-2024 AstroLab Software # Author: Roman Le Montagner # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,6 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from line_profiler import profile + import numpy as np import pandas as pd import os @@ -114,6 +116,7 @@ def return_last_alerts(*args) -> list: ] +@profile def fast_transient_rate(df: pd.DataFrame, N: int, seed: int = None) -> pd.DataFrame: """ Compute the magnitude rate for fast transient detection. diff --git a/fink_science/kilonova/processor.py b/fink_science/kilonova/processor.py index 5ae8cd6f..0c43ec12 100644 --- a/fink_science/kilonova/processor.py +++ b/fink_science/kilonova/processor.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 AstroLab Software +# Copyright 2021-2024 AstroLab Software # Author: Julien Peloton # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,6 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from line_profiler import profile + from pyspark.sql.functions import pandas_udf, PandasUDFType from pyspark.sql.types import DoubleType, StringType @@ -32,6 +34,7 @@ from fink_science.tester import spark_unit_tests @pandas_udf(DoubleType(), PandasUDFType.SCALAR) +@profile def knscore(jd, fid, magpsf, sigmapsf, jdstarthist, cdsxmatch, ndethist, model_name=None) -> pd.Series: """ Return the probability of an alert to be a Kilonova using a Random Forest Classifier. @@ -190,6 +193,7 @@ def knscore(jd, fid, magpsf, sigmapsf, jdstarthist, cdsxmatch, ndethist, model_n return pd.Series(to_return) @pandas_udf(StringType(), PandasUDFType.SCALAR) +@profile def extract_features_knscore(jd, fid, magpsf, sigmapsf) -> pd.Series: """ Extract features used by the Kilonova classifier (using a Random Forest Classifier). diff --git a/fink_science/microlensing/processor.py b/fink_science/microlensing/processor.py index 9ae279b1..17ed7471 100644 --- a/fink_science/microlensing/processor.py +++ b/fink_science/microlensing/processor.py @@ -1,4 +1,4 @@ -# Copyright 2020-2023 AstroLab Software +# Copyright 2020-2024 AstroLab Software # Author: Julien Peloton # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,6 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from line_profiler import profile + from pyspark.sql.functions import pandas_udf, PandasUDFType from pyspark.sql.types import StringType, DoubleType @@ -33,6 +35,7 @@ from fink_science.tester import spark_unit_tests @pandas_udf(DoubleType(), PandasUDFType.SCALAR) +@profile def mulens( fid, magpsf, sigmapsf, magnr, sigmagnr, isdiffpos, ndethist): @@ -172,6 +175,7 @@ def mulens( return pd.Series(to_return) @pandas_udf(StringType(), PandasUDFType.SCALAR) +@profile def extract_features_mulens( fid, magpsf, sigmapsf, magnr, sigmagnr, isdiffpos): diff --git a/fink_science/random_forest_snia/processor.py b/fink_science/random_forest_snia/processor.py index e08f72d8..2f5d3c08 100644 --- a/fink_science/random_forest_snia/processor.py +++ b/fink_science/random_forest_snia/processor.py @@ -17,6 +17,7 @@ import pandas as pd import numpy as np +from line_profiler import profile import os import pickle @@ -78,6 +79,7 @@ def apply_selection_cuts_ztf( return mask @pandas_udf(DoubleType(), PandasUDFType.SCALAR) +@profile def rfscore_sigmoid_full( jd, fid, magpsf, sigmapsf, cdsxmatch, ndethist, min_rising_points=pd.Series([2]), @@ -245,6 +247,7 @@ def rfscore_sigmoid_full( return pd.Series(to_return) @pandas_udf(StringType(), PandasUDFType.SCALAR) +@profile def extract_features_rf_snia( jd, fid, magpsf, sigmapsf, cdsxmatch, ndethist, min_rising_points=pd.Series([2]), @@ -417,6 +420,7 @@ def extract_features_rainbow( @pandas_udf(DoubleType(), PandasUDFType.SCALAR) +@profile def rfscore_rainbow_elasticc( midPointTai, filterName, cpsFlux, cpsFluxErr, snr, diff --git a/fink_science/slsn/classifier.py b/fink_science/slsn/classifier.py index 9d988b39..67b5f840 100644 --- a/fink_science/slsn/classifier.py +++ b/fink_science/slsn/classifier.py @@ -1,4 +1,4 @@ -# Copyright 2022 Fink Software +# Copyright 2022-2024 Fink Software # Author: Etienne Russeil # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from line_profiler import profile import joblib import fink_science.slsn.kernel as k @@ -42,6 +43,7 @@ def load_classifier(): return clf +@profile def slsn_classifier(data): """ Call the slsn_classifier diff --git a/fink_science/slsn/feature_extraction.py b/fink_science/slsn/feature_extraction.py index c15f7bf7..4268126f 100644 --- a/fink_science/slsn/feature_extraction.py +++ b/fink_science/slsn/feature_extraction.py @@ -1,4 +1,4 @@ -# Copyright 2022 Fink Software +# Copyright 2022-2024 Fink Software # Author: Etienne Russeil # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,6 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from line_profiler import profile + import pickle # noqa: F401 import warnings from scipy.optimize import curve_fit @@ -27,6 +29,7 @@ from pandas.testing import assert_frame_equal # noqa: F401 +@profile def transform_data(formated): """Apply transformations for each filters on a flux formated dataset - Shift cjd so that the max flux point is at 0 @@ -86,7 +89,7 @@ def transform_data(formated): return all_transformed, valid - +@profile def parametric_func(ps, band): """Fit the lightcurves using the mvsr transient function. @@ -118,6 +121,7 @@ def parametric_func(ps, band): return fit[0] +@profile def compute_color(ps): """Compute the color of an alert by computing blue-red Proceed by virtually filling missing points of each band using the mvsr transient fit @@ -147,6 +151,7 @@ def compute_color(ps): return (new_cflux_0 - new_cflux_1) * ps["peak"] +@profile def parametrise(all_transformed, target_col=""): """Extract parameters from a list of dataset outputed by the transform_data function. @@ -231,6 +236,7 @@ def parametrise(all_transformed, target_col=""): return all_features +@profile def merge_features(all_features, target_col=""): """Merge feature tables of all filters. Additionnaly fit requested bands and add fitted values as parameters: @@ -344,6 +350,7 @@ def merge_features(all_features, target_col=""): return ordered_features +@profile def compute_chi2(pdf, color): x = pdf[f'cjd_{color}'] diff --git a/fink_science/slsn/processor.py b/fink_science/slsn/processor.py index 998636ee..8dec75ed 100644 --- a/fink_science/slsn/processor.py +++ b/fink_science/slsn/processor.py @@ -1,4 +1,4 @@ -# Copyright 2022 Fink Software +# Copyright 2022-2024 Fink Software # Author: Etienne Russeil # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from line_profiler import profile from fink_science.slsn.classifier import slsn_classifier from pyspark.sql.functions import pandas_udf @@ -21,6 +22,7 @@ @pandas_udf(DoubleType()) +@profile def slsn_elasticc( diaObjectId, cmidPoinTai, cpsFlux, cpsFluxErr, cfilterName, ra, decl, hostgal_zphot, hostgal_zphot_err, hostgal_ra, hostgal_dec): diff --git a/fink_science/snn/processor.py b/fink_science/snn/processor.py index ea4076e4..3056036f 100644 --- a/fink_science/snn/processor.py +++ b/fink_science/snn/processor.py @@ -1,4 +1,4 @@ -# Copyright 2020-2022 AstroLab Software +# Copyright 2020-2024 AstroLab Software # Author: Julien Peloton # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,6 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from line_profiler import profile + from pyspark.sql.functions import pandas_udf, PandasUDFType from pyspark.sql.types import DoubleType, FloatType, ArrayType @@ -74,6 +76,7 @@ def apply_selection_cuts_ztf( return mask @pandas_udf(DoubleType(), PandasUDFType.SCALAR) +@profile def snn_ia(candid, jd, fid, magpsf, sigmapsf, roid, cdsxmatch, jdstarthist, model_name, model_ext=None) -> pd.Series: """ Compute probabilities of alerts to be SN Ia using SuperNNova @@ -203,6 +206,7 @@ def snn_ia(candid, jd, fid, magpsf, sigmapsf, roid, cdsxmatch, jdstarthist, mode return pd.Series(to_return) @pandas_udf(FloatType(), PandasUDFType.SCALAR) +@profile def snn_ia_elasticc( diaSourceId, midPointTai, filterName, psFlux, psFluxErr, roid, cdsxmatch, jdstarthist, @@ -343,6 +347,7 @@ def extract_max_prob(arr): return {'class': index, 'prob': array[index]} @pandas_udf(ArrayType(FloatType()), PandasUDFType.SCALAR) +@profile def snn_broad_elasticc( diaSourceId, midPointTai, filterName, psFlux, psFluxErr, roid, cdsxmatch, jdstarthist, diff --git a/fink_science/t2/processor.py b/fink_science/t2/processor.py index 348428e5..ef9452b0 100644 --- a/fink_science/t2/processor.py +++ b/fink_science/t2/processor.py @@ -1,4 +1,4 @@ -# Copyright 2022 AstroLab Software +# Copyright 2022-2024 AstroLab Software # Author: Tarek Allam, Julien Peloton # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,6 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from line_profiler import profile + import os from pyspark.sql.functions import pandas_udf, PandasUDFType @@ -42,6 +44,7 @@ def maxclass(dic): @pandas_udf(MapType(StringType(), FloatType()), PandasUDFType.SCALAR) +@profile def t2(candid, jd, fid, magpsf, sigmapsf, roid, cdsxmatch, jdstarthist, model_name=None) -> pd.Series: """ Return vector of probabilities from T2 diff --git a/fink_science/xmatch/processor.py b/fink_science/xmatch/processor.py index 66fb50f2..c5f8f04d 100644 --- a/fink_science/xmatch/processor.py +++ b/fink_science/xmatch/processor.py @@ -12,6 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from line_profiler import profile + import pyspark.sql.functions as F from pyspark.sql.functions import pandas_udf, PandasUDFType from pyspark.sql.types import StringType, MapType @@ -37,6 +39,7 @@ from typing import Any @pandas_udf(StringType(), PandasUDFType.SCALAR) +@profile def cdsxmatch(objectId: Any, ra: Any, dec: Any, distmaxarcsec: float, extcatalog: str, cols: str) -> pd.Series: """ Query the CDSXmatch service to find identified objects in alerts. The catalog queried is the SIMBAD bibliographical database. @@ -274,6 +277,7 @@ def xmatch_cds( @pandas_udf(StringType(), PandasUDFType.SCALAR) +@profile def crossmatch_other_catalog(candid, ra, dec, catalog_name, radius_arcsec=None): """ Crossmatch alerts with user-defined catalogs @@ -427,6 +431,7 @@ def crossmatch_other_catalog(candid, ra, dec, catalog_name, radius_arcsec=None): return pdf_merge['Type'] @pandas_udf(MapType(StringType(), StringType()), PandasUDFType.SCALAR) +@profile def crossmatch_mangrove(candid, ra, dec, radius_arcsec=None): """ Crossmatch alerts with the Mangrove catalog diff --git a/fink_science/xmatch/utils.py b/fink_science/xmatch/utils.py index fd545434..00c7d25b 100644 --- a/fink_science/xmatch/utils.py +++ b/fink_science/xmatch/utils.py @@ -1,4 +1,4 @@ -# Copyright 2019-2021 AstroLab Software +# Copyright 2019-2024 AstroLab Software # Author: Julien Peloton # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,6 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from line_profiler import profile + import io import csv import pandas as pd @@ -30,6 +32,7 @@ 'ang_dist' ] +@profile def cross_match_astropy(pdf, catalog_ztf, catalog_other, radius_arcsec=None): """ Crossmatch two catalogs @@ -233,6 +236,7 @@ def extract_vsx(filename): pdf_vsx = pd.read_parquet(filename) return pdf_vsx['RAJ2000'], pdf_vsx['DEJ2000'], pdf_vsx['VType'] +@profile def generate_csv(s: str, lists: list) -> str: """ Make a string (CSV formatted) given lists of data and header. Parameters