Skip to content

Commit

Permalink
Add profiling capabilities (#398)
Browse files Browse the repository at this point in the history
* Add profiling capabilities

* Update the image (#400)
  • Loading branch information
JulienPeloton authored Jul 24, 2024
1 parent d48b162 commit 5910070
Show file tree
Hide file tree
Showing 20 changed files with 94 additions and 20 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/run_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:

strategy:
matrix:
container: ["julienpeloton/fink-ci:dev"]
container: ["julienpeloton/fink-ci:latest"]

container:
image: ${{ matrix.container }}
Expand Down
5 changes: 4 additions & 1 deletion fink_science/ad_features/processor.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2020-2023 AstroLab Software
# Copyright 2020-2024 AstroLab Software
# Author: Igor Beschastnov, Julien Peloton
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand All @@ -12,6 +12,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from line_profiler import profile

import logging
import os

Expand Down Expand Up @@ -75,6 +77,7 @@ def create_extractor():
columns_count = len(FEATURES_COLS)


@profile
def extract_features_ad_raw(
magpsf,
jd,
Expand Down
4 changes: 3 additions & 1 deletion fink_science/agn/classifier.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2022 Fink Software
# Copyright 2022-2024 Fink Software
# Author: Etienne Russeil
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand All @@ -12,6 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from line_profiler import profile

import joblib
import fink_science.agn.kernel as k
Expand Down Expand Up @@ -63,6 +64,7 @@ def load_classifier(source):
return clf


@profile
def agn_classifier(data, source):
"""
Call the agn_classifier
Expand Down
19 changes: 17 additions & 2 deletions fink_science/agn/feature_extraction.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2022 Fink Software
# Copyright 2022-2024 Fink Software
# Author: Etienne Russeil
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand All @@ -12,6 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from line_profiler import profile

import pandas as pd
import fink_science.agn.models as mod
Expand All @@ -24,7 +25,7 @@
import warnings
import fink_science.agn.unit_examples as uex # noqa: F401


@profile
def map_fid(ps):
"""Convert LSST filters to corresponding int value
From u, g, r, i, z, Y to 0, 1, 2, 3, 4, 5
Expand All @@ -49,6 +50,7 @@ def map_fid(ps):
return np.array(list(map(band_dict.get, ps)))


@profile
def remove_nan(ps):
"""
funtion that remove nan values from list contains in columns
Expand Down Expand Up @@ -76,6 +78,7 @@ def remove_nan(ps):
return [np.array(_col)[mask].astype(type(_col[0])) for _col in ps]


@profile
def mag2fluxcal_snana(magpsf: float, sigmapsf: float):
"""Conversion from magnitude to Fluxcal from SNANA manual
Parameters
Expand Down Expand Up @@ -110,6 +113,7 @@ def mag2fluxcal_snana(magpsf: float, sigmapsf: float):
return fluxcal, fluxcal_err


@profile
def compute_hostgal_dist(df):
"""Compute the distance to host galaxy column
using simple Pythagoras computation.
Expand Down Expand Up @@ -146,6 +150,7 @@ def compute_hostgal_dist(df):
return hostgal_dist


@profile
def convert_full_dataset(clean: pd.DataFrame):
"""
Convert all mag and mag err to flux and flux err
Expand Down Expand Up @@ -179,6 +184,7 @@ def convert_full_dataset(clean: pd.DataFrame):
return clean


@profile
def format_data(df, source):
"""Transform filter names to ints and
add distance to host galaxy column.
Expand Down Expand Up @@ -242,6 +248,7 @@ def format_data(df, source):
return df


@profile
def keep_filter(ps, band):
"""
Funtion that removes points from other bands than the one specified
Expand Down Expand Up @@ -277,6 +284,7 @@ def keep_filter(ps, band):
return [np.array(_col)[mask].astype(type(_col[0])) for _col in ps]


@profile
def translate(ps):
"""Translate a cjd list by substracting maxflux point
Expand Down Expand Up @@ -308,6 +316,7 @@ def translate(ps):
return ps["cjd"] - ps["cjd"][np.argmax(ps["cflux"])]


@profile
def normalize(ps):
"""Normalize by dividing by a data frame of maximum
Expand Down Expand Up @@ -418,6 +427,7 @@ def get_min(x, absolute=False):
return x.min()


@profile
def transform_data(formated, minimum_points, source):
"""Apply transformations for each filters on a flux formated dataset
- Shift cjd so that the max flux point is at 0
Expand Down Expand Up @@ -500,6 +510,7 @@ def transform_data(formated, minimum_points, source):
return all_transformed, valid


@profile
def parametric_bump(ps, band):

"""Fit the lightcurves using the bump function.
Expand Down Expand Up @@ -541,6 +552,7 @@ def parametric_bump(ps, band):
return fit[0]


@profile
def compute_color(ps, fit_func):
"""Compute the color of an alert by computing blue-red
Proceed by virtually filling missing points of each band using the bump fit
Expand Down Expand Up @@ -645,6 +657,7 @@ def compute_mean(x):
return np.mean(x)


@profile
def parametrise(all_transformed, source, target_col=""):
"""Extract parameters from a list of dataset outputed
by the transform_data function.
Expand Down Expand Up @@ -764,6 +777,7 @@ def parametrise(all_transformed, source, target_col=""):
return all_features


@profile
def merge_features(all_features, minimum_points, source, target_col=""):
"""Merge feature tables of all filters.
Additionnaly compute color parameters :
Expand Down Expand Up @@ -936,6 +950,7 @@ def merge_features(all_features, minimum_points, source, target_col=""):
return ordered_features


@profile
def get_probabilities(clf, features, valid):
"""Returns probabilty of being an AGN predicted by the classifier.
Expand Down
2 changes: 1 addition & 1 deletion fink_science/agn/models.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2022 Fink Software
# Copyright 2022-2024 Fink Software
# Author: Etienne Russeil
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand Down
5 changes: 4 additions & 1 deletion fink_science/agn/processor.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2022 Fink Software
# Copyright 2022-2024 Fink Software
# Author: Etienne Russeil
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand All @@ -12,6 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from line_profiler import profile

from fink_science.agn.classifier import agn_classifier
from pyspark.sql.functions import pandas_udf
Expand All @@ -23,6 +24,7 @@


@pandas_udf(DoubleType())
@profile
def agn_elasticc(
diaObjectId, cmidPoinTai, cpsFlux, cpsFluxErr, cfilterName,
ra, decl, hostgal_zphot, hostgal_zphot_err, hostgal_ra, hostgal_dec):
Expand Down Expand Up @@ -84,6 +86,7 @@ def agn_elasticc(


@pandas_udf(DoubleType())
@profile
def agn_ztf(objectId, jd, magpsf, sigmapsf, fid, ra, dec):
"""High level spark wrapper for the AGN classifier on ZTF data
Expand Down
5 changes: 4 additions & 1 deletion fink_science/anomaly_detection/processor.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2020-2022 AstroLab Software
# Copyright 2020-2024 AstroLab Software
# Author: Igor Beschastnov
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand All @@ -12,6 +12,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from line_profiler import profile

import logging
import os
import zipfile
Expand Down Expand Up @@ -50,6 +52,7 @@ def anomaly_score(self, data_g, data_r):
return (scores_g[-1] + scores_r[-1]) / 2

@pandas_udf(DoubleType())
@profile
def anomaly_score(lc_features, model_type="AADForest"):
"""Returns anomaly score for an observation
Expand Down
5 changes: 4 additions & 1 deletion fink_science/asteroids/processor.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2020 AstroLab Software
# Copyright 2020-2024 AstroLab Software
# Author: Julien Peloton
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand All @@ -12,6 +12,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from line_profiler import profile

from pyspark.sql.functions import pandas_udf, PandasUDFType
from pyspark.sql.types import IntegerType

Expand All @@ -24,6 +26,7 @@
from fink_science.tester import spark_unit_tests

@pandas_udf(IntegerType(), PandasUDFType.SCALAR)
@profile
def roid_catcher(jd, magpsf, ndethist, sgscore1, ssdistnr, distpsnr1):
""" Determine if an alert is a potential Solar System object (SSO) using two criteria:
Expand Down
4 changes: 3 additions & 1 deletion fink_science/cats/processor.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2020-2022 AstroLab Software
# Copyright 2020-2024 AstroLab Software
# Author: Andre Santos, Bernardo Fraga, Clecio de Bom
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand All @@ -12,6 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from line_profiler import profile

import os
import numpy as np
Expand All @@ -32,6 +33,7 @@


@pandas_udf(ArrayType(FloatType()), PandasUDFType.SCALAR)
@profile
def predict_nn(
midpointTai: pd.Series,
psFlux: pd.Series,
Expand Down
5 changes: 4 additions & 1 deletion fink_science/fast_transient_rate/processor.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2020-2023 AstroLab Software
# Copyright 2020-2024 AstroLab Software
# Author: Roman Le Montagner
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand All @@ -12,6 +12,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from line_profiler import profile

import numpy as np
import pandas as pd
import os
Expand Down Expand Up @@ -114,6 +116,7 @@ def return_last_alerts(*args) -> list:
]


@profile
def fast_transient_rate(df: pd.DataFrame, N: int, seed: int = None) -> pd.DataFrame:
"""
Compute the magnitude rate for fast transient detection.
Expand Down
6 changes: 5 additions & 1 deletion fink_science/kilonova/processor.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2021-2022 AstroLab Software
# Copyright 2021-2024 AstroLab Software
# Author: Julien Peloton
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand All @@ -12,6 +12,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from line_profiler import profile

from pyspark.sql.functions import pandas_udf, PandasUDFType
from pyspark.sql.types import DoubleType, StringType

Expand All @@ -32,6 +34,7 @@
from fink_science.tester import spark_unit_tests

@pandas_udf(DoubleType(), PandasUDFType.SCALAR)
@profile
def knscore(jd, fid, magpsf, sigmapsf, jdstarthist, cdsxmatch, ndethist, model_name=None) -> pd.Series:
""" Return the probability of an alert to be a Kilonova using a Random
Forest Classifier.
Expand Down Expand Up @@ -190,6 +193,7 @@ def knscore(jd, fid, magpsf, sigmapsf, jdstarthist, cdsxmatch, ndethist, model_n
return pd.Series(to_return)

@pandas_udf(StringType(), PandasUDFType.SCALAR)
@profile
def extract_features_knscore(jd, fid, magpsf, sigmapsf) -> pd.Series:
""" Extract features used by the Kilonova classifier (using a Random
Forest Classifier).
Expand Down
6 changes: 5 additions & 1 deletion fink_science/microlensing/processor.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2020-2023 AstroLab Software
# Copyright 2020-2024 AstroLab Software
# Author: Julien Peloton
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand All @@ -12,6 +12,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from line_profiler import profile

from pyspark.sql.functions import pandas_udf, PandasUDFType
from pyspark.sql.types import StringType, DoubleType

Expand All @@ -33,6 +35,7 @@
from fink_science.tester import spark_unit_tests

@pandas_udf(DoubleType(), PandasUDFType.SCALAR)
@profile
def mulens(
fid, magpsf, sigmapsf, magnr, sigmagnr,
isdiffpos, ndethist):
Expand Down Expand Up @@ -172,6 +175,7 @@ def mulens(
return pd.Series(to_return)

@pandas_udf(StringType(), PandasUDFType.SCALAR)
@profile
def extract_features_mulens(
fid, magpsf, sigmapsf, magnr, sigmagnr,
isdiffpos):
Expand Down
Loading

0 comments on commit 5910070

Please sign in to comment.