From 6e62ff8f665ed5ef8de55e9dd266bff03188cff3 Mon Sep 17 00:00:00 2001
From: Vishal <vishalmhjn3@gmail.com>
Date: Thu, 30 May 2024 15:19:51 +0200
Subject: [PATCH] rearrange: delete old scripts

---
 src/prepare_data.py | 255 --------------------------------------------
 1 file changed, 255 deletions(-)
 delete mode 100644 src/prepare_data.py

diff --git a/src/prepare_data.py b/src/prepare_data.py
deleted file mode 100644
index 5611472..0000000
--- a/src/prepare_data.py
+++ /dev/null
@@ -1,255 +0,0 @@
-from itertools import groupby
-from operator import itemgetter
-from typing import List, Tuple
-import numpy as np
-import pandas as pd
-
-
-# def preprocess_data(
-#     X_list: List[np.ndarray],
-#     W_list: List[np.ndarray],
-#     lookback_timesteps: int,
-#     dyn_to_static: List[str],
-#     y_list: List[np.ndarray] = None,
-# ) -> Tuple[np.array, np.array, np.array]:
-#     """
-#     Preprocesses data for input to a model.
-#     Can be used for both training (with y) and inference (without y).
-
-#     Args:
-#         X_list (List[np.ndarray]): List of numpy arrays containing dynamic features.
-#         W_list (List[np.ndarray]): List of numpy arrays containing static features.
-#         lookback_timesteps (int): Number of timesteps to look back in the sequence.
-#         dyn_to_static (List[str]): List of strings representing dynamic features to be treated as static.
-#         y_list (List[np.ndarray], optional): List of numpy arrays containing target values. Defaults to None.
-
-#     Returns:
-#         Tuple[np.array, np.array, np.array]: Preprocessed data as numpy arrays (X, W, and optionally y).
-#     """
-#     X = np.array(X_list, dtype="float64")
-#     W = np.array(W_list, dtype="float64")
-
-#     W = np.concatenate(
-#         [
-#             W,
-#             X[:, int(np.floor(lookback_timesteps / 2)), : len(dyn_to_static)].reshape(
-#                 X.shape[0], -1
-#             ),
-#         ],
-#         axis=1,
-#     )
-#     X = X[:, :, len(dyn_to_static) :]
-
-#     W[:, -1] = np.round(W[:, -1])
-#     W[:, -2] = np.round(W[:, -2])
-
-#     if y_list is not None:
-#         y = np.array(y_list, dtype="float64")
-#         return X, y, W
-#     else:
-#         return X, W
-
-
-def split_sequences(
-    sequences: np.ndarray,
-    static: np.ndarray,
-    id_det: int,
-    n_steps: int,
-    p_horizon: int = None,
-    auto_regressive: bool = False,
-    inference: bool = False,
-) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
-    """
-    Split a multivariate sequence into samples for training or inference.
-
-    Args:
-        sequences (np.ndarray): Multivariate sequence to split.
-        static (np.ndarray): Static values for the sequence.
-        id_det (int): Identifier for the sequence.
-        n_steps (int): Number of time steps for each sample.
-        p_horizon (int, optional): Prediction horizon for each sample. Not required for inference.
-        auto_regressive (bool, optional): Whether the sequence is auto-regressive. Defaults to False.
-        inference (bool, optional): Whether the function is used for inference. Defaults to False.
-
-    Returns:
-        Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: Tuple containing arrays (W, X, y, z) where:
-            - W is the static values,
-            - X is the input sequences,
-            - y is the output sequences (only for training),
-            - z is the identifier.
-    """
-    W, X, y, z = list(), list(), list(), list()
-
-    if inference:
-        indices = [0]
-    else:
-        indices = range(len(sequences))
-
-    for i in indices:
-        end_ix = i + n_steps
-
-        if not inference and (end_ix + p_horizon > len(sequences)):
-            break
-
-        if not auto_regressive:
-            seq_x = sequences[i:end_ix, :-1]
-        else:
-            seq_x = sequences[i:end_ix, :]
-
-        X.append(seq_x)
-        W.append(static)
-        z.append(id_det)
-
-        if not inference:
-            seq_y = sequences[end_ix : end_ix + p_horizon, -1]
-            y.append(seq_y)
-
-    if inference:
-        return np.array(W), np.array(X), np.array(z)
-    else:
-        return np.array(W), np.array(X), np.array(y), np.array(z)
-
-
-def format_data(
-    df: pd.DataFrame,
-    lookback_timesteps: int,
-    features_static: List[str],
-    features_dynamic: List[str],
-    prediction_horizon: int = None,
-    auto_regressive: bool = False,
-    inference: bool = False,
-) -> Tuple[List[np.ndarray], List[np.ndarray], List[np.ndarray], List[int]]:
-    """
-    Format the data for training or inference.
-
-    Args:
-        df (pd.DataFrame): DataFrame containing the data.
-        lookback_timesteps (int): Number of lookback timesteps.
-        features_static (List[str]): List of static features.
-        features_dynamic (List[str]): List of dynamic features.
-        prediction_horizon (int, optional): Prediction horizon. Required for training.
-        auto_regressive (bool, optional): Whether the sequence is auto-regressive. Defaults to False.
-        inference (bool, optional): Whether the function is used for inference. Defaults to False.
-
-    Returns:
-        Tuple[List[np.ndarray], List[np.ndarray], List[np.ndarray], List[int]]:
-        Tuple containing lists of arrays (W_list, X_list, y_list, z_list) where:
-            - W_list contains static values,
-            - X_list contains input sequences,
-            - y_list contains output sequences (only for training),
-            - z_list contains identifiers.
-    """
-    W_list, X_list, y_list, z_list = list(), list(), list(), list()
-    for i in df.paris_id.unique():
-        temp = df[df.paris_id == i]
-        temp = temp.sort_values(by="time_idx")
-        temp.index = temp.time_idx
-
-        w = np.array(temp[features_static].drop_duplicates())[0]
-        for k, g in groupby(enumerate(list(temp.index)), lambda ix: ix[0] - ix[1]):
-            temp_list = list(map(itemgetter(1), g))
-
-            if len(temp_list) >= lookback_timesteps:
-                temp_df = temp.loc[temp_list, features_dynamic]
-                if inference:
-                    W, X, z = split_sequences(
-                        np.array(temp_df),
-                        w,
-                        i,
-                        lookback_timesteps,
-                        auto_regressive=auto_regressive,
-                        inference=True,
-                    )
-                    W_list.extend(W)
-                    X_list.extend(X)
-                    z_list.extend(z)
-                else:
-                    W, X, y, z = split_sequences(
-                        np.array(temp_df),
-                        w,
-                        i,
-                        lookback_timesteps,
-                        prediction_horizon,
-                        auto_regressive=auto_regressive,
-                        inference=False,
-                    )
-                    W_list.extend(W)
-                    X_list.extend(X)
-                    y_list.extend(y)
-                    z_list.extend(z)
-    if inference:
-        return W_list, X_list, z_list
-    else:
-        return W_list, X_list, y_list, z_list
-
-
-# def split_data(
-#     X: np.ndarray, W: np.ndarray, y: np.ndarray = None
-# ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
-#     """
-#     Splits the input data into categorical and continuous parts.
-#     Can be used for both training (with y) and inference (without y).
-
-#     Args:
-#         X (np.ndarray): Array containing input features.
-#         W (np.ndarray): Array containing static features.
-#         y (np.ndarray, optional): Array containing target values. Defaults to None.
-
-#     Returns:
-#         Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: A tuple containing:
-#             - cat_stat (np.ndarray): Array of categorical static features.
-#             - cont_stat (np.ndarray): Array of continuous static features.
-#             - cat_dyn (np.ndarray): Array of categorical dynamic features.
-#             - cont_dyn (np.ndarray): Array of continuous dynamic features.
-#     """
-#     static_cols = 3
-#     cat_stat = W[:, static_cols:]
-#     cont_stat = W[:, :static_cols]
-#     cat_dyn = X[:, :, :0]
-#     cont_dyn = X[:, :, 0:]
-
-#     return cat_stat, cont_stat, cat_dyn, cont_dyn
-
-
-def scale_and_create_df(
-    X_formatted,
-    scaler,
-    continous_feature_columns,
-    categorical_feature_columns,
-    other_columns,
-    target_as_autoregressive_feature_name,
-    target_column_name,
-):
-    """
-    Scales the continuous feature columns, creates a DataFrame with scaled features,
-    and assigns categorical features and other columns to the scaled DataFrame.
-
-    Args:
-            X_formatted (pd.DataFrame): Original DataFrame with features.
-            scaler: Scaler object to transform the continuous feature columns.
-            continous_feature_columns (List[str]): List of column names for continuous features.
-            categorical_feature_columns (List[str]): List of column names for categorical features.
-            other_columns (List[str]): List of column names for other features.
-
-    Returns:
-            pd.DataFrame: DataFrame containing scaled features.
-    """
-    scaled_features = scaler.transform(X_formatted[continous_feature_columns].values)
-
-    # Create DataFrame with scaled features
-    scaled_features_df = pd.DataFrame(
-        scaled_features, index=X_formatted.index, columns=continous_feature_columns
-    )
-
-    # Assign categorical features and other columns to scaled DataFrame
-    scaled_features_df[categorical_feature_columns] = X_formatted[
-        categorical_feature_columns
-    ]
-    scaled_features_df[other_columns] = X_formatted[other_columns]
-
-    # Assign 'q' column to scaled DataFrame
-    scaled_features_df[target_column_name] = X_formatted[
-        target_as_autoregressive_feature_name
-    ]
-
-    return scaled_features_df