From 848e8dd8bbb7d2a0cfa704778c4d722f7b088cd9 Mon Sep 17 00:00:00 2001
From: Ignacio Reyes <ireyesj93@gmail.com>
Date: Wed, 16 Jun 2021 00:12:42 -0400
Subject: [PATCH 01/12] features adapted to data from forced photometry service

---
 .../features/custom/ztf_feature_extractor.py  |  30 +++---
 .../sn_parametric_model_computer.py           |   4 +-
 lc_classifier/features/preprocess/base.py     |   1 -
 .../features/preprocess/preprocess_ztf.py     | 100 ++++++++++++++++++
 4 files changed, 117 insertions(+), 18 deletions(-)

diff --git a/lc_classifier/features/custom/ztf_feature_extractor.py b/lc_classifier/features/custom/ztf_feature_extractor.py
index 55ab7ab..fd7a0e4 100644
--- a/lc_classifier/features/custom/ztf_feature_extractor.py
+++ b/lc_classifier/features/custom/ztf_feature_extractor.py
@@ -134,12 +134,12 @@ def filter_out_short_lightcurves(self, detections):
 class ZTFForcedPhotometryFeatureExtractor(FeatureExtractor):
     def __init__(self, bands=(1, 2), stream=False):
         self.bands = list(bands)
-        self.stream = stream
+        # self.stream = stream
 
         extractors = [
             GalacticCoordinatesExtractor(),
             ZTFColorFeatureExtractor(),
-            RealBogusExtractor(),
+            # RealBogusExtractor(),
             MHPSExtractor(bands),
             IQRExtractor(bands),
             TurboFatsFeatureExtractor(bands),
@@ -151,16 +151,16 @@ def __init__(self, bands=(1, 2), stream=False):
             HarmonicsExtractor(bands),
             GPDRWExtractor(bands)
         ]
-        if self.stream:
-            extractors += [
-                StreamSGScoreExtractor(),
-                WiseStreamExtractor()
-            ]
-        else:
-            extractors += [
-                SGScoreExtractor(),
-                WiseStaticExtractor()
-            ]
+        # if self.stream:
+        #     extractors += [
+        #         StreamSGScoreExtractor(),
+        #         WiseStreamExtractor()
+        #     ]
+        # else:
+        #     extractors += [
+        #         SGScoreExtractor(),
+        #         WiseStaticExtractor()
+        #     ]
         self.composed_feature_extractor = FeatureExtractorComposer(extractors)
 
     @lru_cache(1)
@@ -201,8 +201,8 @@ def _compute_features(self, detections, **kwargs):
 
         """
         required = []
-        if self.stream:
-            required += ['metadata', 'xmatches']
+        # if self.stream:
+        #     required += ['metadata', 'xmatches']
         for key in required:
             if key not in kwargs:
                 raise Exception(f"HierarchicalFeaturesComputer requires {key} argument")
@@ -226,4 +226,4 @@ def filter_out_short_lightcurves(self, detections):
         has_enough_alerts = self.get_enough_alerts_mask(detections)
         too_short_oids = has_enough_alerts[~has_enough_alerts]
         detections = detections.loc[has_enough_alerts]
-        return detections, too_short_oids.index.values
\ No newline at end of file
+        return detections, too_short_oids.index.values
diff --git a/lc_classifier/features/extractors/sn_parametric_model_computer.py b/lc_classifier/features/extractors/sn_parametric_model_computer.py
index baf7d8c..0515550 100644
--- a/lc_classifier/features/extractors/sn_parametric_model_computer.py
+++ b/lc_classifier/features/extractors/sn_parametric_model_computer.py
@@ -122,8 +122,8 @@ def fit(self, times, fluxpsf, obs_errors):
         argmax_fluxpsf = np.argmax(fluxpsf)
         max_fluxpsf = fluxpsf[argmax_fluxpsf]
         A_bounds = [max_fluxpsf / 3.0, max_fluxpsf * 3.0]
-        t0_bounds = [-50.0, 70.0]
-        gamma_bounds = [1.0, 100.0]
+        t0_bounds = [-50.0, 90.0]
+        gamma_bounds = [1.0, 120.0]
         beta_bounds = [0.0, 1.0]
         trise_bounds = [1.0, 100.0]
         tfall_bounds = [1.0, 180.0]
diff --git a/lc_classifier/features/preprocess/base.py b/lc_classifier/features/preprocess/base.py
index c9635b6..8eb96bc 100644
--- a/lc_classifier/features/preprocess/base.py
+++ b/lc_classifier/features/preprocess/base.py
@@ -13,7 +13,6 @@ def verify_dataframe(self, dataframe):
             raise ValueError("Input isn't a Pandas DataFrame")
         return
 
-
     @abstractmethod
     def preprocess(self, dataframe):
         """
diff --git a/lc_classifier/features/preprocess/preprocess_ztf.py b/lc_classifier/features/preprocess/preprocess_ztf.py
index ddfd007..87152c8 100644
--- a/lc_classifier/features/preprocess/preprocess_ztf.py
+++ b/lc_classifier/features/preprocess/preprocess_ztf.py
@@ -148,3 +148,103 @@ def rename_columns_non_detections(self, non_detections):
     def rename_columns_detections(self, detections):
         return detections.rename(
             columns=self.column_translation, errors='ignore')
+
+
+class ZTFForcedPhotometryLightcurvePreprocessor(GenericPreprocessor):
+    def __init__(self):
+        super().__init__()
+
+        self.required_columns = [
+            'time',
+            'band',
+            'magnitude',
+            'error',
+            'magpsf',  # TODO: rename to diff_magnitude
+            'sigmapsf',  # diff_error
+            'diff_flux',
+            'diff_err',
+            'ra',
+            'dec',
+            'infobitssci'
+        ]
+
+        self.column_translation = {
+            'mjd': 'time',
+            'fid': 'band',
+        }
+        self.max_sigma = 1.0
+
+    def has_necessary_columns(self, dataframe):
+        """
+        :param dataframe:
+        :return:
+        """
+        booleans = list(map(lambda x: x in dataframe.columns, self.required_columns))
+        return reduce(lambda x, y: x & y, booleans)
+
+    def discard_invalid_value_detections(self, detections):
+        """
+        :param detections:
+        :return:
+        """
+        detections = detections.replace([np.inf, -np.inf], np.nan)
+        valid_alerts = detections[self.required_columns].notna().all(axis=1)
+        detections = detections[valid_alerts.values]
+        detections[self.required_columns] = detections[self.required_columns].apply(
+            lambda x: pd.to_numeric(x, errors='coerce'))
+        return detections
+
+    def drop_duplicates(self, detections):
+        """
+        :param detections:
+        :return:
+        """
+        assert detections.index.name == 'oid'
+        detections = detections.copy()
+        detections['oid'] = detections.index
+        detections = detections.drop_duplicates(['oid', 'time'])
+        detections = detections[[col for col in detections.columns if col != 'oid']]
+        return detections
+
+    def discard_noisy_detections(self, detections):
+        """
+        :param detections:
+        :return:
+        """
+        detections = detections[((detections['error'] > 0.0) &
+                                 (detections['error'] < self.max_sigma))
+                                ]
+        return detections
+
+    def discard_defectuous_detections(self, detections):
+        detections = detections[detections['infobitssci'] == 0.0]
+        return detections
+
+    def enough_alerts(self, detections, min_dets=5):
+        objects = detections.groupby("oid")
+        indexes = []
+        for oid, group in objects:
+            if len(group.band == 1) > min_dets or len(group.band == 2) > min_dets:
+                indexes.append(oid)
+        return detections.loc[indexes]
+
+    def preprocess(self, dataframe, objects=None):
+        """
+        :param dataframe:
+        :param objects:
+        :return:
+        """
+        self.verify_dataframe(dataframe)
+        dataframe = self.rename_columns_detections(dataframe)
+        if not self.has_necessary_columns(dataframe):
+            raise Exception('dataframe does not have all the necessary columns')
+        dataframe = self.drop_duplicates(dataframe)
+        dataframe = self.discard_invalid_value_detections(dataframe)
+        dataframe = self.discard_noisy_detections(dataframe)
+        dataframe = self.discard_defectuous_detections(dataframe)
+        dataframe = self.enough_alerts(dataframe)
+        return dataframe
+
+    def rename_columns_detections(self, detections):
+        return detections.rename(
+            columns=self.column_translation, errors='ignore')

From b8c56a21d5d7f94be50c8c5cb057f4fa3635cf0a Mon Sep 17 00:00:00 2001
From: Ignacio Reyes <ireyesj93@gmail.com>
Date: Wed, 16 Jun 2021 02:40:10 -0400
Subject: [PATCH 02/12] fix warnings in folded kim extractor

---
 .../extractors/folded_kim_extractor.py        | 30 +++++++++++--------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/lc_classifier/features/extractors/folded_kim_extractor.py b/lc_classifier/features/extractors/folded_kim_extractor.py
index 80b146a..859de2c 100644
--- a/lc_classifier/features/extractors/folded_kim_extractor.py
+++ b/lc_classifier/features/extractors/folded_kim_extractor.py
@@ -55,20 +55,26 @@ def aux_function(oid_detections, band, **kwargs):
                 return self.nan_series_in_band(band)
 
             oid_band_detections = oid_detections[oid_detections['band'] == band]
+            lc_len = len(oid_band_detections)
+            if lc_len <= 2:
+                psi_cumsum = psi_eta = np.nan
+            else:
+                time = oid_band_detections['time'].values
+                magnitude = oid_band_detections['magnitude'].values
 
-            time = oid_band_detections['time'].values
-            magnitude = oid_band_detections['magnitude'].values
+                folded_time = np.mod(time, 2 * oid_period) / (2 * oid_period)
+                sorted_mags = magnitude[np.argsort(folded_time)]
+                sigma = np.std(sorted_mags)
+                if sigma != 0.0:
+                    m = np.mean(sorted_mags)
+                    s = np.cumsum(sorted_mags - m) * 1.0 / (lc_len * sigma)
+                    psi_cumsum = np.max(s) - np.min(s)
+                    sigma_squared = sigma ** 2
+                    psi_eta = (1.0 / ((lc_len - 1) * sigma_squared) *
+                               np.sum(np.power(sorted_mags[1:] - sorted_mags[:-1], 2)))
+                else:
+                    psi_cumsum = psi_eta = np.nan
 
-            folded_time = np.mod(time, 2 * oid_period) / (2 * oid_period)
-            sorted_mags = magnitude[np.argsort(folded_time)]
-            sigma = np.std(sorted_mags)
-            m = np.mean(sorted_mags)
-            lc_len = len(sorted_mags)
-            s = np.cumsum(sorted_mags - m) * 1.0 / (lc_len * sigma)
-            psi_cumsum = np.max(s) - np.min(s)
-            sigma_squared = sigma ** 2
-            psi_eta = (1.0 / ((lc_len - 1) * sigma_squared) *
-                       np.sum(np.power(sorted_mags[1:] - sorted_mags[:-1], 2)))
             out = pd.Series(
                 data=[psi_cumsum, psi_eta],
                 index=columns)

From d8770d99b0e8b9e34d005871d0329f542a571e1f Mon Sep 17 00:00:00 2001
From: Javier <javier.arredondo.c@usach.cl>
Date: Wed, 7 Jul 2021 10:34:02 -0400
Subject: [PATCH 03/12] fix bugs in preprocess

---
 .../features/custom/custom_hierarchical.py    |  27 ++--
 .../features/preprocess/preprocess_ztf.py     | 125 ++----------------
 2 files changed, 22 insertions(+), 130 deletions(-)

diff --git a/lc_classifier/features/custom/custom_hierarchical.py b/lc_classifier/features/custom/custom_hierarchical.py
index df74a62..5bd1a22 100644
--- a/lc_classifier/features/custom/custom_hierarchical.py
+++ b/lc_classifier/features/custom/custom_hierarchical.py
@@ -9,7 +9,6 @@
 from lc_classifier.features import MHPSExtractor
 from lc_classifier.features import IQRExtractor
 from lc_classifier.features import SNParametricModelExtractor
-from lc_classifier.features import WiseStaticExtractor
 from lc_classifier.features import WiseStreamExtractor
 from lc_classifier.features import PeriodExtractor
 from lc_classifier.features import PowerRateExtractor
@@ -18,7 +17,7 @@
 from lc_classifier.features import GPDRWExtractor
 
 from ..core.base import FeatureExtractor, FeatureExtractorSingleBand
-from ..preprocess import DetectionsPreprocessorZTF, StreamDetectionsPreprocessorZTF
+from ..preprocess import DetectionsPreprocessorZTF
 
 import pandas as pd
 import logging
@@ -78,8 +77,9 @@ def get_enough_alerts_mask(self, detections):
         -------
 
         """
-        n_detections = detections[["mjd"]].groupby(level=0).count()
-        has_enough_alerts = n_detections.mjd > 5
+        n_detections_by_fid = detections[["mjd", "fid"]].groupby(["oid", "fid"]).count()
+        has_enough_alerts = n_detections_by_fid.mjd > 5
+        has_enough_alerts = has_enough_alerts.groupby(level=0).sum() > 0
         return has_enough_alerts
 
     def _compute_features(self, detections, **kwargs):
@@ -105,7 +105,6 @@ def _compute_features(self, detections, **kwargs):
         too_short_oids = has_enough_alerts[~has_enough_alerts]
         too_short_features = pd.DataFrame(index=too_short_oids.index)
         detections = detections.loc[has_enough_alerts]
-        detections = detections.sort_values("mjd")
         non_detections = kwargs["non_detections"]
 
         if len(non_detections) == 0:
@@ -146,7 +145,7 @@ def __init__(self, bands=None):
             HarmonicsExtractor(),
             GPDRWExtractor()
         ]
-        self.preprocessor = StreamDetectionsPreprocessorZTF()
+        self.preprocessor = DetectionsPreprocessorZTF()
 
     @lru_cache(1)
     def get_features_keys(self) -> List[str]:
@@ -179,8 +178,9 @@ def get_enough_alerts_mask(self, detections):
         -------
 
         """
-        n_detections = detections[["mjd"]].groupby(level=0).count()
-        has_enough_alerts = n_detections.mjd > 5
+        n_detections_by_fid = detections[["mjd", "fid"]].groupby(["oid", "fid"]).count()
+        has_enough_alerts = n_detections_by_fid.mjd > 5
+        has_enough_alerts = has_enough_alerts.groupby(level=0).sum() > 0
         return has_enough_alerts
 
     def _compute_features(self, detections, **kwargs):
@@ -199,14 +199,15 @@ def _compute_features(self, detections, **kwargs):
         if not isinstance(detections, pd.core.frame.DataFrame):
             raise TypeError('detections has to be a DataFrame')
         
-        required = ["non_detections", "xmatches", "metadata"]
+        required = ["non_detections", "xmatches", "metadata", "objects"]
         for key in required:
             if key not in kwargs:
                 raise Exception(f"HierarchicalFeaturesComputer requires {key} argument")
-        detections = self.preprocessor.preprocess(detections)
+        objects = kwargs["objects"]
+        detections = self.preprocessor.preprocess(detections, objects=objects)
         has_enough_alerts = self.get_enough_alerts_mask(detections)
-        too_short_oids = has_enough_alerts[~has_enough_alerts]
-        too_short_features = pd.DataFrame(index=too_short_oids.index)
+        #  too_short_oids = has_enough_alerts[~has_enough_alerts]
+        #  too_short_features = pd.DataFrame(index=too_short_oids.index)
         detections = detections.loc[has_enough_alerts]
         detections = detections.sort_values("mjd")
         if len(detections) == 0:
@@ -231,5 +232,5 @@ def _compute_features(self, detections, **kwargs):
             logging.info(f"EXTRACTOR={ex}, FEATURE_SHAPE={df.shape}")
             features.append(df)
         df = pd.concat(features, axis=1, join="inner")
-        df = pd.concat([df, too_short_features], axis=0, join="outer", sort=True)
+        # df = pd.concat([df, too_short_features], axis=0, join="outer", sort=True)
         return df
diff --git a/lc_classifier/features/preprocess/preprocess_ztf.py b/lc_classifier/features/preprocess/preprocess_ztf.py
index 2aa7c7d..3994137 100644
--- a/lc_classifier/features/preprocess/preprocess_ztf.py
+++ b/lc_classifier/features/preprocess/preprocess_ztf.py
@@ -1,5 +1,4 @@
 from .base import GenericPreprocessor
-from functools import reduce
 import numpy as np
 import pandas as pd
 
@@ -16,8 +15,7 @@ def __init__(self):
             'sigmapsf_ml',
             'ra',
             'dec',
-            'rb',
-            'sgscore1'
+            'rb'
         ]
         self.max_sigma = 1.0
         self.rb_threshold = 0.55
@@ -27,8 +25,10 @@ def has_necessary_columns(self, dataframe):
         :param dataframe:
         :return:
         """
-        booleans = list(map(lambda x: x in dataframe.columns, self.not_null_columns))
-        return reduce(lambda x, y: x & y, booleans)
+        input_columns = set(dataframe.columns)
+        constraint = set(self.not_null_columns)
+        difference = constraint.difference(input_columns)
+        return len(difference) == 0
 
     def discard_invalid_value_detections(self, detections):
         """
@@ -49,6 +49,7 @@ def drop_duplicates(self, detections):
         """
         assert detections.index.name == 'oid'
         detections = detections.copy()
+        detections = detections.sort_values("mjd", ascending=True)
         detections['oid'] = detections.index
         detections = detections.drop_duplicates(['oid', 'mjd'])
         detections = detections[[col for col in detections.columns if col != 'oid']]
@@ -110,120 +111,10 @@ def preprocess(self, dataframe, objects=None):
         dataframe = self.get_magpsf_ml(dataframe, objects)
         if not self.has_necessary_columns(dataframe):
             raise Exception('dataframe does not have all the necessary columns')
-        dataframe = self.drop_duplicates(dataframe)
-        dataframe = self.discard_invalid_value_detections(dataframe)
-        dataframe = self.discard_noisy_detections(dataframe)
+        dataframe.sort_values("mjd", inplace=True)
         dataframe = self.discard_bogus(dataframe)
-        dataframe = self.enough_alerts(dataframe)
-        return dataframe
-
-
-class StreamDetectionsPreprocessorZTF(GenericPreprocessor):
-    def __init__(self):
-        super().__init__()
-        self.not_null_columns = [
-            'mjd',
-            'fid',
-            'magpsf',
-            'sigmapsf',
-            'magpsf_ml',
-            'sigmapsf_ml',
-            'ra',
-            'dec',
-            'rb',
-        ]
-        self.max_sigma = 1.0
-        self.rb_threshold = 0.55
-
-    def has_necessary_columns(self, dataframe):
-        """
-        :param dataframe:
-        :return:
-        """
-        missing = set(self.not_null_columns).difference(set(dataframe.columns))
-        return missing
-
-    def discard_invalid_value_detections(self, detections):
-        """
-        :param detections:
-        :return:
-        """
-        detections = detections.replace([np.inf, -np.inf], np.nan)
-        valid_alerts = detections[self.not_null_columns].notna().all(axis=1)
-        detections = detections[valid_alerts.values]
-        detections[self.not_null_columns] = detections[self.not_null_columns].apply(
-            lambda x: pd.to_numeric(x, errors='coerce'))
-        return detections
-
-    def drop_duplicates(self, detections):
-        """
-        :param detections:
-        :return:
-        """
-        assert detections.index.name == 'oid'
-        detections = detections.copy()
-        detections['oid'] = detections.index
-        detections = detections.drop_duplicates(['oid', 'mjd'])
-        detections = detections[[col for col in detections.columns if col != 'oid']]
-        return detections
-
-    def discard_noisy_detections(self, detections):
-        """
-        :param detections:
-        :return:
-        """
-        detections = detections[((detections['sigmapsf_ml'] > 0.0) &
-                                 (detections['sigmapsf_ml'] < self.max_sigma))
-                                ]
-        return detections
-
-    def discard_bogus(self, detections):
-        """
-
-        :param detections:
-        :return:
-        """
-        detections = detections[detections['rb'] >= self.rb_threshold]
-        return detections
-
-    def enough_alerts(self, detections, min_dets=5):
-        objects = detections.groupby("oid")
-        indexes = []
-        for oid, group in objects:
-            if len(group.fid == 1) > min_dets or len(group.fid == 2) > min_dets:
-                indexes.append(oid)
-        return detections.loc[indexes]
-
-    def get_magpsf_ml(self, detections):
-        def magpsf_ml(detections):
-            detections = detections.copy()
-            is_corrected = detections.corrected.all()
-            if is_corrected:
-                detections["magpsf_ml"] = detections["magpsf_corr"]
-                detections["sigmapsf_ml"] = detections["sigmapsf_corr_ext"]
-            else:
-                detections["magpsf_ml"] = detections["magpsf"]
-                detections["sigmapsf_ml"] = detections["sigmapsf"]
-            return detections
-
-        detections = detections.groupby(level=0, sort=False)\
-            .apply(magpsf_ml).droplevel(level=1)
-        return detections
-
-    def preprocess(self, dataframe):
-        """
-        :param dataframe:
-        :param objects:
-        :return:
-        """
-        self.verify_dataframe(dataframe)
-        dataframe = self.get_magpsf_ml(dataframe)
-        missing = self.has_necessary_columns(dataframe)
-        if len(missing) > 0:
-            raise Exception(f'dataframe does not have all the necessary columns. Missing {missing}')
-        dataframe = self.drop_duplicates(dataframe)
         dataframe = self.discard_invalid_value_detections(dataframe)
         dataframe = self.discard_noisy_detections(dataframe)
-        dataframe = self.discard_bogus(dataframe)
+        dataframe = self.drop_duplicates(dataframe)
         dataframe = self.enough_alerts(dataframe)
         return dataframe

From e0d09a4eda1e7274fab397c6eb5a977aa90e2437 Mon Sep 17 00:00:00 2001
From: Diego Rodriguez <diegorodriguezmancini@gmail.com>
Date: Wed, 14 Jul 2021 12:18:10 -0400
Subject: [PATCH 04/12] update model to 1.1.0

---
 lc_classifier/classifier/models.py | 200 ++++++++++++-----------------
 1 file changed, 85 insertions(+), 115 deletions(-)

diff --git a/lc_classifier/classifier/models.py b/lc_classifier/classifier/models.py
index 0b50f72..78494dc 100644
--- a/lc_classifier/classifier/models.py
+++ b/lc_classifier/classifier/models.py
@@ -22,9 +22,8 @@ def predict(self, samples: pd.DataFrame) -> pd.DataFrame:
         probs = self.predict_proba(samples)
         predicted_class = probs.idxmax(axis=1)
         predicted_class_df = pd.DataFrame(
-            predicted_class,
-            columns=['classALeRCE'],
-            index=samples.index)
+            predicted_class, columns=["classALeRCE"], index=samples.index
+        )
         predicted_class_df.index.name = samples.index.name
         return predicted_class_df
 
@@ -57,16 +56,17 @@ class BaselineRandomForest(BaseClassifier):
     def __init__(self):
         self.random_forest_classifier = RandomForestClassifier(
             n_estimators=500,
-            max_features='auto',
+            max_features="auto",
             max_depth=None,
             n_jobs=1,
             class_weight=None,
-            criterion='entropy',
+            criterion="entropy",
             min_samples_split=2,
-            min_samples_leaf=1)
+            min_samples_leaf=1,
+        )
         self.feature_preprocessor = FeaturePreprocessor()
         self.feature_list = None
-        self.model_filename = 'baseline_rf.pkl'
+        self.model_filename = "baseline_rf.pkl"
 
     def fit(self, samples: pd.DataFrame, labels: pd.DataFrame):
         samples = self.feature_preprocessor.preprocess_features(samples)
@@ -77,7 +77,7 @@ def fit(self, samples: pd.DataFrame, labels: pd.DataFrame):
 
         self.feature_list = samples.columns
         samples_np_array = samples.values
-        labels_np_array = labels['classALeRCE'].loc[samples.index].values
+        labels_np_array = labels["classALeRCE"].loc[samples.index].values
         self.random_forest_classifier.fit(samples_np_array, labels_np_array)
 
     def predict_proba(self, samples: pd.DataFrame) -> pd.DataFrame:
@@ -87,76 +87,63 @@ def predict_proba(self, samples: pd.DataFrame) -> pd.DataFrame:
         predicted_probs_df = pd.DataFrame(
             predicted_probs,
             columns=self.get_list_of_classes(),
-            index=samples.index.values
+            index=samples.index.values,
         )
-        predicted_probs_df.index.name = 'oid'
+        predicted_probs_df.index.name = "oid"
         return predicted_probs_df
 
     def get_list_of_classes(self) -> list:
         return self.random_forest_classifier.classes_
 
     def save_model(self, directory: str) -> None:
-        with open(os.path.join(directory, self.model_filename), 'wb') as f:
-            pickle.dump(
-                self.random_forest_classifier,
-                f,
-                pickle.HIGHEST_PROTOCOL)
-        with open(os.path.join(directory, 'feature_list.pkl'), 'wb') as f:
-            pickle.dump(
-                self.feature_list,
-                f,
-                pickle.HIGHEST_PROTOCOL)
+        with open(os.path.join(directory, self.model_filename), "wb") as f:
+            pickle.dump(self.random_forest_classifier, f, pickle.HIGHEST_PROTOCOL)
+        with open(os.path.join(directory, "feature_list.pkl"), "wb") as f:
+            pickle.dump(self.feature_list, f, pickle.HIGHEST_PROTOCOL)
 
     def load_model(self, directory: str) -> None:
         rf = pd.read_pickle(os.path.join(directory, self.model_filename))
         self.random_forest_classifier = rf
-        self.feature_list = pd.read_pickle(
-            os.path.join(directory, 'feature_list.pkl'))
+        self.feature_list = pd.read_pickle(os.path.join(directory, "feature_list.pkl"))
 
 
 class HierarchicalRandomForest(BaseClassifier):
     MODEL_NAME = "hierarchical_random_forest"
-    MODEL_VERSION = "1.0.0"
+    MODEL_VERSION = "1.1.0"
     MODEL_VERSION_NAME = f"{MODEL_NAME}_{MODEL_VERSION}"
     MODEL_PICKLE_PATH = os.path.join(PICKLE_PATH, f"{MODEL_VERSION_NAME}")
 
     def __init__(self, taxonomy_dictionary, non_used_features=None):
         n_trees = 500
         self.top_classifier = RandomForestClassifier(
-            n_estimators=n_trees,
-            max_depth=None,
-            max_features='auto'
+            n_estimators=n_trees, max_depth=None, max_features="auto"
         )
 
         self.stochastic_classifier = RandomForestClassifier(
-            n_estimators=n_trees,
-            max_depth=None,
-            max_features=0.2
+            n_estimators=n_trees, max_depth=None, max_features=0.2
         )
 
         self.periodic_classifier = RandomForestClassifier(
-            n_estimators=n_trees,
-            max_depth=None,
-            max_features='auto'
+            n_estimators=n_trees, max_depth=None, max_features="auto"
         )
 
         self.transient_classifier = RandomForestClassifier(
-            n_estimators=n_trees,
-            max_depth=None,
-            max_features='auto'
+            n_estimators=n_trees, max_depth=None, max_features="auto"
         )
 
-        self.feature_preprocessor = FeaturePreprocessor(non_used_features=non_used_features)
+        self.feature_preprocessor = FeaturePreprocessor(
+            non_used_features=non_used_features
+        )
 
         self.taxonomy_dictionary = taxonomy_dictionary
         self.feature_list = None
         self.inverted_dictionary = invert_dictionary(self.taxonomy_dictionary)
         self.pickles = {
-            "features_list":"features_RF_model.pkl",
-            "top_rf":"hierarchical_level_RF_model.pkl",
-            "periodic_rf":"periodic_level_RF_model.pkl",
-            "stochastic_rf":"stochastic_level_RF_model.pkl",
-            "transient_rf":"transient_level_RF_model.pkl"
+            "features_list": "features_RF_model.pkl",
+            "top_rf": "top_level_BRF_model.pkl",
+            "periodic_rf": "periodic_level_BRF_model.pkl",
+            "stochastic_rf": "stochastic_level_BRF_model.pkl",
+            "transient_rf": "transient_level_BRF_model.pkl",
         }
         self.url_model = f"https://assets.alerce.online/pipeline/hierarchical_rf_{self.MODEL_VERSION}/"
 
@@ -169,10 +156,10 @@ def fit(self, samples: pd.DataFrame, labels: pd.DataFrame) -> None:
 
         for label in feeded_labels:
             if label not in expected_labels:
-                raise Exception(f'{label} is not in the taxonomy dictionary')
+                raise Exception(f"{label} is not in the taxonomy dictionary")
 
         # Create top class
-        labels['top_class'] = labels['classALeRCE'].map(self.inverted_dictionary)
+        labels["top_class"] = labels["classALeRCE"].map(self.inverted_dictionary)
 
         # Preprocessing
         samples = self.feature_preprocessor.preprocess_features(samples)
@@ -183,28 +170,25 @@ def fit(self, samples: pd.DataFrame, labels: pd.DataFrame) -> None:
         self.feature_list = samples.columns
 
         # Train top classifier
-        self.top_classifier.fit(samples.values, labels['top_class'].values)
+        self.top_classifier.fit(samples.values, labels["top_class"].values)
 
         # Train specialized classifiers
-        is_stochastic = labels['top_class'] == 'Stochastic'
+        is_stochastic = labels["top_class"] == "Stochastic"
         self.stochastic_classifier.fit(
-            samples[is_stochastic].values,
-            labels[is_stochastic]['classALeRCE'].values
+            samples[is_stochastic].values, labels[is_stochastic]["classALeRCE"].values
         )
 
-        is_periodic = labels['top_class'] == 'Periodic'
+        is_periodic = labels["top_class"] == "Periodic"
         self.periodic_classifier.fit(
-            samples[is_periodic].values,
-            labels[is_periodic]['classALeRCE'].values
+            samples[is_periodic].values, labels[is_periodic]["classALeRCE"].values
         )
 
-        is_transient = labels['top_class'] == 'Transient'
+        is_transient = labels["top_class"] == "Transient"
         self.transient_classifier.fit(
-            samples[is_transient].values,
-            labels[is_transient]['classALeRCE'].values
+            samples[is_transient].values, labels[is_transient]["classALeRCE"].values
         )
 
-    def check_missing_features(self,columns, feature_list):
+    def check_missing_features(self, columns, feature_list):
         missing = set(feature_list).difference(set(columns))
         return missing
 
@@ -222,76 +206,68 @@ def predict_proba(self, samples: pd.DataFrame) -> pd.DataFrame:
         periodic_probs = self.periodic_classifier.predict_proba(samples.values)
         transient_probs = self.transient_classifier.predict_proba(samples.values)
 
-        stochastic_index = self.top_classifier.classes_.tolist().index('Stochastic')
-        periodic_index = self.top_classifier.classes_.tolist().index('Periodic')
-        transient_index = self.top_classifier.classes_.tolist().index('Transient')
+        stochastic_index = self.top_classifier.classes_.tolist().index("Stochastic")
+        periodic_index = self.top_classifier.classes_.tolist().index("Periodic")
+        transient_index = self.top_classifier.classes_.tolist().index("Transient")
 
-        stochastic_probs = stochastic_probs * top_probs[:, stochastic_index].reshape([-1, 1])
+        stochastic_probs = stochastic_probs * top_probs[:, stochastic_index].reshape(
+            [-1, 1]
+        )
         periodic_probs = periodic_probs * top_probs[:, periodic_index].reshape([-1, 1])
-        transient_probs = transient_probs * top_probs[:, transient_index].reshape([-1, 1])
+        transient_probs = transient_probs * top_probs[:, transient_index].reshape(
+            [-1, 1]
+        )
 
         final_probs = np.concatenate(
-            [stochastic_probs, periodic_probs, transient_probs],
-            axis=1
+            [stochastic_probs, periodic_probs, transient_probs], axis=1
         )
 
         df = pd.DataFrame(
-            data=final_probs,
-            index=samples.index,
-            columns=self.get_list_of_classes()
+            data=final_probs, index=samples.index, columns=self.get_list_of_classes()
         )
         df.index.name = samples.index.name
         return df
 
     def get_list_of_classes(self) -> list:
         final_columns = (
-                self.stochastic_classifier.classes_.tolist()
-                + self.periodic_classifier.classes_.tolist()
-                + self.transient_classifier.classes_.tolist())
+            self.stochastic_classifier.classes_.tolist()
+            + self.periodic_classifier.classes_.tolist()
+            + self.transient_classifier.classes_.tolist()
+        )
         return final_columns
 
     def save_model(self, directory: str) -> None:
-        with open(os.path.join(directory, self.pickles['top_rf']), 'wb') as f:
-            pickle.dump(
-                self.top_classifier,
-                f,
-                pickle.HIGHEST_PROTOCOL)
-
-        with open(os.path.join(directory, self.pickles['stochastic_rf']), 'wb') as f:
-            pickle.dump(
-                self.stochastic_classifier,
-                f,
-                pickle.HIGHEST_PROTOCOL)
-
-        with open(os.path.join(directory, self.pickles['periodic_rf']), 'wb') as f:
-            pickle.dump(
-                self.periodic_classifier,
-                f,
-                pickle.HIGHEST_PROTOCOL)
-
-        with open(os.path.join(directory, self.pickles['transient_rf']), 'wb') as f:
-            pickle.dump(
-                self.transient_classifier,
-                f,
-                pickle.HIGHEST_PROTOCOL)
-
-        with open(os.path.join(directory, self.pickles['features_list']), 'wb') as f:
-            pickle.dump(
-                self.feature_list,
-                f,
-                pickle.HIGHEST_PROTOCOL)
+        with open(os.path.join(directory, self.pickles["top_rf"]), "wb") as f:
+            pickle.dump(self.top_classifier, f, pickle.HIGHEST_PROTOCOL)
+
+        with open(os.path.join(directory, self.pickles["stochastic_rf"]), "wb") as f:
+            pickle.dump(self.stochastic_classifier, f, pickle.HIGHEST_PROTOCOL)
+
+        with open(os.path.join(directory, self.pickles["periodic_rf"]), "wb") as f:
+            pickle.dump(self.periodic_classifier, f, pickle.HIGHEST_PROTOCOL)
+
+        with open(os.path.join(directory, self.pickles["transient_rf"]), "wb") as f:
+            pickle.dump(self.transient_classifier, f, pickle.HIGHEST_PROTOCOL)
+
+        with open(os.path.join(directory, self.pickles["features_list"]), "wb") as f:
+            pickle.dump(self.feature_list, f, pickle.HIGHEST_PROTOCOL)
 
     def load_model(self, directory: str) -> None:
         self.top_classifier = pd.read_pickle(
-            os.path.join(directory, self.pickles['top_rf'] ))
+            os.path.join(directory, self.pickles["top_rf"])
+        )
         self.stochastic_classifier = pd.read_pickle(
-            os.path.join(directory,  self.pickles['stochastic_rf']))
+            os.path.join(directory, self.pickles["stochastic_rf"])
+        )
         self.periodic_classifier = pd.read_pickle(
-            os.path.join(directory, self.pickles['periodic_rf']))
+            os.path.join(directory, self.pickles["periodic_rf"])
+        )
         self.transient_classifier = pd.read_pickle(
-            os.path.join(directory, self.pickles['transient_rf']))
+            os.path.join(directory, self.pickles["transient_rf"])
+        )
         self.feature_list = pd.read_pickle(
-            os.path.join(directory, self.pickles['features_list']))
+            os.path.join(directory, self.pickles["features_list"])
+        )
 
     def download_model(self):
         if not os.path.exists(self.MODEL_PICKLE_PATH):
@@ -304,8 +280,8 @@ def download_model(self):
 
     def predict_in_pipeline(self, input_features: pd.DataFrame) -> dict:
         if not isinstance(input_features, pd.core.frame.DataFrame):
-            raise TypeError('predict_in_pipeline expects a DataFrame.')
-            
+            raise TypeError("predict_in_pipeline expects a DataFrame.")
+
         missing = self.check_missing_features(input_features.columns, self.feature_list)
         if len(missing) > 0:
             raise Exception(f"Missing features: {missing}")
@@ -316,7 +292,7 @@ def predict_in_pipeline(self, input_features: pd.DataFrame) -> dict:
         prob_root = pd.DataFrame(
             self.top_classifier.predict_proba(input_features),
             columns=self.top_classifier.classes_,
-            index=input_features.index
+            index=input_features.index,
         )
 
         prob_children = []
@@ -325,18 +301,14 @@ def predict_in_pipeline(self, input_features: pd.DataFrame) -> dict:
         child_models = [
             self.stochastic_classifier,
             self.periodic_classifier,
-            self.transient_classifier
-        ]
-        child_names = [
-            'Stochastic',
-            'Periodic',
-            'Transient'
+            self.transient_classifier,
         ]
+        child_names = ["Stochastic", "Periodic", "Transient"]
         for name, model in zip(child_names, child_models):
             prob_child = pd.DataFrame(
                 model.predict_proba(input_features),
                 columns=model.classes_,
-                index=input_features.index
+                index=input_features.index,
             )
 
             resp_children[name] = prob_child
@@ -345,9 +317,7 @@ def predict_in_pipeline(self, input_features: pd.DataFrame) -> dict:
         prob_all = pd.concat(prob_children, axis=1, sort=False)
 
         return {
-            "hierarchical": {
-                "top": prob_root,
-                "children": resp_children},
+            "hierarchical": {"top": prob_root, "children": resp_children},
             "probabilities": prob_all,
-            "class": prob_all.idxmax(axis=1)
+            "class": prob_all.idxmax(axis=1),
         }

From 67c1f14a67d3f454bf3a5ed4175efc9a1b2d9e16 Mon Sep 17 00:00:00 2001
From: Diego Rodriguez <diegorodriguezmancini@gmail.com>
Date: Wed, 14 Jul 2021 13:07:24 -0400
Subject: [PATCH 05/12] fix bug when trying to use missing preprocessor

---
 .../custom/forced_photometry_extractor.py     | 25 ++++++++++---------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/lc_classifier/features/custom/forced_photometry_extractor.py b/lc_classifier/features/custom/forced_photometry_extractor.py
index 72d0d06..6ae442d 100644
--- a/lc_classifier/features/custom/forced_photometry_extractor.py
+++ b/lc_classifier/features/custom/forced_photometry_extractor.py
@@ -18,7 +18,7 @@
 from lc_classifier.features import GPDRWExtractor
 
 from ..core.base import FeatureExtractor, FeatureExtractorSingleBand
-from ..preprocess import DetectionsPreprocessorZTF, StreamDetectionsPreprocessorZTF
+from ..preprocess import DetectionsPreprocessorZTF
 
 import pandas as pd
 import logging
@@ -42,7 +42,7 @@ def __init__(self, bands=None):
             PowerRateExtractor(),
             FoldedKimExtractor(),
             HarmonicsExtractor(),
-            GPDRWExtractor()
+            GPDRWExtractor(),
         ]
         self.preprocessor = DetectionsPreprocessorZTF()
 
@@ -107,9 +107,7 @@ def _compute_features(self, detections, **kwargs):
         shared_data = dict()
         grouped_detections = detections.groupby(level=0)
         for ex in self.extractors:
-            df = ex.compute_features(
-                grouped_detections,
-                shared_data=shared_data)
+            df = ex.compute_features(grouped_detections, shared_data=shared_data)
             logging.info(f"FLAG={ex}")
             features.append(df)
         df = pd.concat(features, axis=1, join="inner")
@@ -135,9 +133,9 @@ def __init__(self, bands=None):
             PowerRateExtractor(),
             FoldedKimExtractor(),
             HarmonicsExtractor(),
-            GPDRWExtractor()
+            GPDRWExtractor(),
         ]
-        self.preprocessor = StreamDetectionsPreprocessorZTF()
+        self.preprocessor = DetectionsPreprocessorZTF()
 
     def get_features_keys(self) -> List[str]:
         features_keys = []
@@ -185,13 +183,16 @@ def _compute_features(self, detections, **kwargs):
 
         """
         if not isinstance(detections, pd.core.frame.DataFrame):
-            raise TypeError('detections has to be a DataFrame')
-        
-        required = ["xmatches", "metadata"]
+            raise TypeError("detections has to be a DataFrame")
+
+        required = ["xmatches", "metadata", "objects"]
         for key in required:
             if key not in kwargs:
-                raise Exception(f"StreamedForcedPhotometryExtractor requires {key} argument")
-        detections = self.preprocessor.preprocess(detections)
+                raise Exception(
+                    f"StreamedForcedPhotometryExtractor requires {key} argument"
+                )
+        objects = kwargs["objects"]
+        detections = self.preprocessor.preprocess(detections, objects=objects)
         has_enough_alerts = self.get_enough_alerts_mask(detections)
         too_short_oids = has_enough_alerts[~has_enough_alerts]
         too_short_features = pd.DataFrame(index=too_short_oids.index)

From bac74c0da05cf7bbbb26377c7fdcf5b8e517aa40 Mon Sep 17 00:00:00 2001
From: Javier <javier.arredondo.c@usach.cl>
Date: Thu, 22 Jul 2021 14:59:31 -0400
Subject: [PATCH 06/12] the script doesnt use sgscore in batch

---
 lc_classifier/features/custom/custom_hierarchical.py      | 8 ++++----
 .../features/extractors/sn_non_detections_extractor.py    | 1 +
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/lc_classifier/features/custom/custom_hierarchical.py b/lc_classifier/features/custom/custom_hierarchical.py
index 5bd1a22..195789b 100644
--- a/lc_classifier/features/custom/custom_hierarchical.py
+++ b/lc_classifier/features/custom/custom_hierarchical.py
@@ -29,7 +29,7 @@ def __init__(self, bands=None):
         self.bands = bands if bands is not None else [1, 2]
         self.extractors = [
             GalacticCoordinatesExtractor(),
-            SGScoreExtractor(),
+            # SGScoreExtractor(),
             ColorFeatureExtractor(),
             RealBogusExtractor(),
             MHPSExtractor(),
@@ -102,8 +102,8 @@ def _compute_features(self, detections, **kwargs):
         objects = kwargs["objects"]
         detections = self.preprocessor.preprocess(detections, objects=objects)
         has_enough_alerts = self.get_enough_alerts_mask(detections)
-        too_short_oids = has_enough_alerts[~has_enough_alerts]
-        too_short_features = pd.DataFrame(index=too_short_oids.index)
+        # too_short_oids = has_enough_alerts[~has_enough_alerts]
+        # too_short_features = pd.DataFrame(index=too_short_oids.index)
         detections = detections.loc[has_enough_alerts]
         non_detections = kwargs["non_detections"]
 
@@ -121,7 +121,7 @@ def _compute_features(self, detections, **kwargs):
             logging.info(f"FLAG={ex}")
             features.append(df)
         df = pd.concat(features, axis=1, join="inner")
-        df = pd.concat([df, too_short_features], axis=0, join="outer", sort=True)
+        # df = pd.concat([df, too_short_features], axis=0, join="outer", sort=True)
         return df
 
 
diff --git a/lc_classifier/features/extractors/sn_non_detections_extractor.py b/lc_classifier/features/extractors/sn_non_detections_extractor.py
index 395c30f..6b4c016 100644
--- a/lc_classifier/features/extractors/sn_non_detections_extractor.py
+++ b/lc_classifier/features/extractors/sn_non_detections_extractor.py
@@ -217,4 +217,5 @@ def aux_function(oid_detections, **kwargs):
 
         sn_features = detections.apply(aux_function)
         sn_features.index.name = 'oid'
+        sn_features = sn_features.astype(float)
         return sn_features

From 4cca0eed0fbebe58086f0992064413ba8cc1d314 Mon Sep 17 00:00:00 2001
From: Javier <javier.arredondo.c@usach.cl>
Date: Fri, 23 Jul 2021 12:07:44 -0400
Subject: [PATCH 07/12] fixed probabilites operation between levels of
 classifier

---
 lc_classifier/classifier/models.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lc_classifier/classifier/models.py b/lc_classifier/classifier/models.py
index 78494dc..5f73e68 100644
--- a/lc_classifier/classifier/models.py
+++ b/lc_classifier/classifier/models.py
@@ -297,13 +297,13 @@ def predict_in_pipeline(self, input_features: pd.DataFrame) -> dict:
 
         prob_children = []
         resp_children = {}
-
         child_models = [
             self.stochastic_classifier,
-            self.periodic_classifier,
             self.transient_classifier,
+            self.periodic_classifier,
         ]
-        child_names = ["Stochastic", "Periodic", "Transient"]
+        child_names = ["Transient", "Stochastic", "Periodic"]
+
         for name, model in zip(child_names, child_models):
             prob_child = pd.DataFrame(
                 model.predict_proba(input_features),
@@ -312,7 +312,7 @@ def predict_in_pipeline(self, input_features: pd.DataFrame) -> dict:
             )
 
             resp_children[name] = prob_child
-            prob_child = prob_child.mul(prob_root[name].values, axis="rows")
+            prob_child = prob_child.mul(prob_root[name].values, axis=0)
             prob_children.append(prob_child)
         prob_all = pd.concat(prob_children, axis=1, sort=False)
 

From 2b3f5fefb99182cd46be8536a7ba393863dc5747 Mon Sep 17 00:00:00 2001
From: Ignacio Reyes <ireyesj93@gmail.com>
Date: Thu, 5 Aug 2021 22:20:32 -0400
Subject: [PATCH 08/12] fixing tests

---
 tests/features/test_forced_photometry_extractor.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tests/features/test_forced_photometry_extractor.py b/tests/features/test_forced_photometry_extractor.py
index 655276b..5d028dc 100644
--- a/tests/features/test_forced_photometry_extractor.py
+++ b/tests/features/test_forced_photometry_extractor.py
@@ -34,7 +34,9 @@ def test_forced_photometry_extractor(self):
             detections=detections)
 
         self.assertEqual(98, features_df.shape[0])
-        self.assertEqual(162, features_df.shape[1])
+        self.assertEqual(
+            len(self.forced_photometry_extractor.get_features_keys()),
+            features_df.shape[1])
         self.assertFalse(np.all(np.isnan(features_df.values), axis=(0, 1)))
 
     def test_forced_photometry_extractor_stream(self):
@@ -61,8 +63,8 @@ def test_forced_photometry_extractor_stream(self):
     def test_get_features_keys(self):
         keys = self.forced_photometry_extractor.get_features_keys()
         keys_stream = self.streamed_forced_photometry_extractor.get_features_keys()
-        self.assertEqual(162, len(keys))
-        self.assertEqual(162, len(keys_stream))
+        self.assertEqual(154, len(keys))
+        self.assertEqual(154, len(keys_stream))
 
 
 if __name__ == '__main__':

From 13a2040fad04d9f1c048f63e2b42d34ed2f597ca Mon Sep 17 00:00:00 2001
From: Ignacio Reyes <ireyesj93@gmail.com>
Date: Mon, 9 Aug 2021 11:11:29 -0400
Subject: [PATCH 09/12] external resources should be available now

---
 lc_classifier/classifier/models.py                         | 2 +-
 lc_classifier/features/extractors/wise_static_extractor.py | 2 +-
 tests/classifier/test_hierarchical_random_forest.py        | 2 +-
 tests/classifier/test_metrics.py                           | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/lc_classifier/classifier/models.py b/lc_classifier/classifier/models.py
index a55bfcb..4f82009 100644
--- a/lc_classifier/classifier/models.py
+++ b/lc_classifier/classifier/models.py
@@ -109,7 +109,7 @@ def load_model(self, directory: str) -> None:
 
 class HierarchicalRandomForest(BaseClassifier):
     MODEL_NAME = "hierarchical_random_forest"
-    MODEL_VERSION = "1.1.0"
+    MODEL_VERSION = "1.1.1"
     MODEL_VERSION_NAME = f"{MODEL_NAME}_{MODEL_VERSION}"
     MODEL_PICKLE_PATH = os.path.join(PICKLE_PATH, f"{MODEL_VERSION_NAME}")
     taxonomy_dictionary = {
diff --git a/lc_classifier/features/extractors/wise_static_extractor.py b/lc_classifier/features/extractors/wise_static_extractor.py
index e7ea695..1df3074 100644
--- a/lc_classifier/features/extractors/wise_static_extractor.py
+++ b/lc_classifier/features/extractors/wise_static_extractor.py
@@ -10,7 +10,7 @@
 
 FILE_PATH = os.path.dirname(os.path.abspath(__file__))
 WISE_CSV = os.path.abspath(os.path.join(FILE_PATH, "data/wise_bands.csv"))
-WISE_CSV_URL = "https://droppy.alerce.online/$/5tZUY"
+WISE_CSV_URL = "https://alerce-static.s3.amazonaws.com/datasets/wise_bands.csv"
 
 
 def compute_colors_from_bands(bands: pd.DataFrame) -> pd.DataFrame:
diff --git a/tests/classifier/test_hierarchical_random_forest.py b/tests/classifier/test_hierarchical_random_forest.py
index 4ca572c..9dd28e4 100644
--- a/tests/classifier/test_hierarchical_random_forest.py
+++ b/tests/classifier/test_hierarchical_random_forest.py
@@ -15,7 +15,7 @@ def setUp(self) -> None:
         del self.test_labels["objectId"]
 
         self.model_trained = HierarchicalRandomForest()
-        # self.model_trained.download_model()
+        self.model_trained.download_model()
         self.model_trained.load_model(self.model_trained.MODEL_PICKLE_PATH)
 
         self.taxonomy = {
diff --git a/tests/classifier/test_metrics.py b/tests/classifier/test_metrics.py
index 085be1d..4972478 100644
--- a/tests/classifier/test_metrics.py
+++ b/tests/classifier/test_metrics.py
@@ -26,7 +26,7 @@ def test_all_label_classes_in_prediction(self):
 class HRFMetricsTest(unittest.TestCase):
     def setUp(self):
         self.model_trained = HierarchicalRandomForest()
-        # self.model_trained.download_model()
+        self.model_trained.download_model()
         self.model_trained.load_model(self.model_trained.MODEL_PICKLE_PATH)
 
         self.train_features = pd.read_csv('data_examples/2000_features.csv')

From 3198432ba0d170e5f4bd5dd2ca3182c117965be4 Mon Sep 17 00:00:00 2001
From: Ignacio Reyes <ireyesj93@gmail.com>
Date: Mon, 9 Aug 2021 11:31:56 -0400
Subject: [PATCH 10/12] pickle5 for python 3.6 and 3.7 compatibility

---
 lc_classifier/classifier/models.py | 32 +++++++++++++++---------------
 requirements.txt                   |  1 +
 2 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/lc_classifier/classifier/models.py b/lc_classifier/classifier/models.py
index 4f82009..3516f29 100644
--- a/lc_classifier/classifier/models.py
+++ b/lc_classifier/classifier/models.py
@@ -1,7 +1,7 @@
 import numpy as np
 import pandas as pd
 import os
-import pickle
+import pickle5 as pickle
 import wget
 from imblearn.ensemble import BalancedRandomForestClassifier as RandomForestClassifier
 from lc_classifier.classifier.preprocessing import FeaturePreprocessor
@@ -266,21 +266,21 @@ def save_model(self, directory: str) -> None:
             pickle.dump(self.feature_list, f, pickle.HIGHEST_PROTOCOL)
 
     def load_model(self, directory: str) -> None:
-        self.top_classifier = pd.read_pickle(
-            os.path.join(directory, self.pickles["top_rf"])
-        )
-        self.stochastic_classifier = pd.read_pickle(
-            os.path.join(directory, self.pickles["stochastic_rf"])
-        )
-        self.periodic_classifier = pd.read_pickle(
-            os.path.join(directory, self.pickles["periodic_rf"])
-        )
-        self.transient_classifier = pd.read_pickle(
-            os.path.join(directory, self.pickles["transient_rf"])
-        )
-        self.feature_list = pd.read_pickle(
-            os.path.join(directory, self.pickles["features_list"])
-        )
+        with open(os.path.join(directory, self.pickles["top_rf"]), 'rb') as f:
+            self.top_classifier = pickle.load(f)
+
+        with open(os.path.join(directory, self.pickles["stochastic_rf"]), 'rb') as f:
+            self.stochastic_classifier = pickle.load(f)
+
+        with open(os.path.join(directory, self.pickles["periodic_rf"]), 'rb') as f:
+            self.periodic_classifier = pickle.load(f)
+
+        with open(os.path.join(directory, self.pickles["transient_rf"]), 'rb') as f:
+            self.transient_classifier = pickle.load(f)
+
+        with open(os.path.join(directory, self.pickles["features_list"]), 'rb') as f:
+            self.feature_list = pickle.load(f)
+
         self.check_loaded_models()
 
     def check_loaded_models(self):
diff --git a/requirements.txt b/requirements.txt
index ee9e3d7..6f13c9f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -16,6 +16,7 @@ tensorflow >=2.3
 pandas >=1.1
 wget >=3.2
 celerite2 >=0.1
+pickle5
 -e git+https://git@github.com/alercebroker/turbo-fats#egg=turbofats
 -e git+https://git@github.com/alercebroker/mhps#egg=mhps
 -e git+https://git@github.com/alercebroker/P4J#egg=P4J

From cccaf18adec302ef4296f675fb6ee1b39801df09 Mon Sep 17 00:00:00 2001
From: Ignacio Reyes <ireyesj93@gmail.com>
Date: Mon, 9 Aug 2021 11:44:57 -0400
Subject: [PATCH 11/12] [2nd attempt] pickle5 for python 3.6 and 3.7
 compatibility

---
 lc_classifier/classifier/models.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/lc_classifier/classifier/models.py b/lc_classifier/classifier/models.py
index 3516f29..a0a7009 100644
--- a/lc_classifier/classifier/models.py
+++ b/lc_classifier/classifier/models.py
@@ -102,7 +102,9 @@ def save_model(self, directory: str) -> None:
             pickle.dump(self.feature_list, f, pickle.HIGHEST_PROTOCOL)
 
     def load_model(self, directory: str) -> None:
-        rf = pd.read_pickle(os.path.join(directory, self.model_filename))
+        with open(os.path.join(directory, self.model_filename), 'rb') as f:
+            rf = pickle.load(f)
+
         self.random_forest_classifier = rf
         self.feature_list = pd.read_pickle(os.path.join(directory, "feature_list.pkl"))
 

From 014d9dc7e71d128bf0cb74448844a6cded61f24f Mon Sep 17 00:00:00 2001
From: Ignacio Reyes <ireyesj93@gmail.com>
Date: Mon, 9 Aug 2021 11:56:01 -0400
Subject: [PATCH 12/12] [3rd attempt] pickle5 for python 3.6 and 3.7
 compatibility

---
 lc_classifier/classifier/models.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/lc_classifier/classifier/models.py b/lc_classifier/classifier/models.py
index a0a7009..93f8271 100644
--- a/lc_classifier/classifier/models.py
+++ b/lc_classifier/classifier/models.py
@@ -106,7 +106,9 @@ def load_model(self, directory: str) -> None:
             rf = pickle.load(f)
 
         self.random_forest_classifier = rf
-        self.feature_list = pd.read_pickle(os.path.join(directory, "feature_list.pkl"))
+
+        with open(os.path.join(directory, "feature_list.pkl"), 'rb') as f:
+            self.feature_list = pickle.load(f)
 
 
 class HierarchicalRandomForest(BaseClassifier):