Added try catch to generate metrics and new error log to submission r…

…esults
slacgismo · Aug 20, 2024 · 846b0ce · 846b0ce
1 parent a534c70
commit 846b0ce
Show file tree

Hide file tree

Showing 6 changed files with 77 additions and 90 deletions.
diff --git a/ec2/Dockerfile b/ec2/Dockerfile
@@ -1,12 +1,12 @@
-FROM python:3.10-slim as base
+FROM python:3.10-slim
 
 # docker build --progress=plain -t "hmm:Dockerfile" -f valhub/Dockerfile .
 WORKDIR /root
 RUN mkdir admin
 WORKDIR /root/admin
 COPY . .
 
-ENV PORT 7000
+ENV PORT=7000
 EXPOSE 7000
 
 RUN apt-get update -qq

diff --git a/ec2/routes.json b/ec2/routes.json
@@ -8,7 +8,7 @@
         "s3": "s3://pv-validation-hub-bucket",
         "api": "https://api.pv-validation-hub.org"
     },
-    "config_file_path": "./config.json",
+    "config_file_path": "./time-shift-validation-hub/config.json",
     "file_data_folder_path": "./time-shift-validation-hub/data/file_data",
     "sys_metadata_file_path": "./time-shift-validation-hub/data/system_metadata.csv",
     "file_metadata_file_path": "./time-shift-validation-hub/data/file_metadata.csv",

diff --git a/valhub/valhub/settings.py b/valhub/valhub/settings.py
@@ -137,7 +137,6 @@ def get_secret(secret_name: str):
         raise Exception("DJANGO_SECRET_KEY not in secret")
     SECRET_KEY = secret["DJANGO_SECRET_KEY"]
 
-
 except:
     SECRET_KEY = os.environ["DJANGO_SECRET_KEY"]
 
@@ -267,7 +266,6 @@ def get_secret(secret_name: str):
     logger.error(
         "Error retrieving secrets from AWS Secrets Manager, using default values"
     )
-    logger.exception(e)
 
     DATABASES = {
         "default": {

diff --git a/workers/src/logging_config.json b/workers/src/logging_config.json
@@ -42,6 +42,12 @@
             "level": "DEBUG",
             "formatter": "json",
             "filename": "logs/submission.log.jsonl"
+        },
+        "error_file": {
+            "class": "logging.handlers.RotatingFileHandler",
+            "level": "ERROR",
+            "formatter": "detailed",
+            "filename": "logs/submission.error.log"
         }
     },
     "loggers": {
@@ -50,7 +56,8 @@
             "handlers": [
                 "stdout",
                 "file",
-                "json_file"
+                "json_file",
+                "error_file"
             ],
             "propagate": false
         }

diff --git a/workers/src/pvinsight-validation-runner.py b/workers/src/pvinsight-validation-runner.py
@@ -539,18 +539,23 @@ def run(  # noqa: C901
 
     # raise Exception("Finished Successfully")
 
-    number_of_errors = loop_over_files_and_generate_results(
+    number_of_submission_errors = loop_over_files_and_generate_results(
         func_arguments_list
     )
-    logger.info(f"number_of_errors: {number_of_errors}")
+    logger.info(f"number_of_submission_errors: {number_of_submission_errors}")
 
     # raise Exception("Finished Successfully")
 
-    results_list = loop_over_results_and_generate_metrics(
-        data_dir=data_dir,
-        results_dir=results_dir,
-        current_evaluation_dir=current_evaluation_dir,
+    results_list, number_of_metrics_errors = (
+        loop_over_results_and_generate_metrics(
+            data_dir=data_dir,
+            results_dir=results_dir,
+            current_evaluation_dir=current_evaluation_dir,
+        )
     )
+    logger.info(f"number_of_metrics_errors: {number_of_metrics_errors}")
+
+    number_of_errors = number_of_submission_errors + number_of_metrics_errors
 
     # raise Exception("Finished Successfully")
 
@@ -714,49 +719,6 @@ def m_median(df: pd.DataFrame, column: str):
         logger.error("Error generating private report for submission.")
         logger.exception(e)
 
-    # Loop through all of the plot dictionaries and generate plots and
-    # associated tables for reporting
-    # for plot in config_data["plots"]:
-    #     if plot["type"] == "histogram":
-    #         if "color_code" in plot:
-    #             color_code = plot["color_code"]
-    #         else:
-    #             color_code = None
-    #         gen_plot = generate_histogram(
-    #             results_df_private, plot["x_val"], plot["title"], color_code
-    #         )
-    #         # Save the plot
-    #         gen_plot.savefig(os.path.join(results_dir, plot["save_file_path"]))
-    #         plt.close()
-    #         plt.clf()
-    #         # Write the stratified results to a table for private reporting
-    #         # (if color_code param is not None)
-    #         if color_code:
-    #             stratified_results_tbl = pd.DataFrame(
-    #                 results_df_private.groupby(color_code)[
-    #                     plot["x_val"]
-    #                 ].mean()
-    #             )
-    #             stratified_results_tbl.to_csv(
-    #                 os.path.join(
-    #                     results_dir,
-    #                     module_name
-    #                     + "_"
-    #                     + str(color_code)
-    #                     + "_"
-    #                     + plot["x_val"]
-    #                     + ".csv",
-    #                 )
-    #             )
-    #     if plot["type"] == "scatter_plot":
-    #         gen_plot = generate_scatter_plot(
-    #             results_df_private, plot["x_val"], plot["y_val"], plot["title"]
-    #         )
-    #         # Save the plot
-    #         gen_plot.savefig(os.path.join(results_dir, plot["save_file_path"]))
-    #         plt.close()
-    #         plt.clf()
-
     logger.info(f"number_of_errors: {number_of_errors}")
 
     success_rate = (
@@ -767,7 +729,7 @@ def m_median(df: pd.DataFrame, column: str):
         f"{total_number_of_files - number_of_errors} out of {total_number_of_files} files processed successfully"
     )
 
-    # public_metrics_dict["success_rate"] = success_rate
+    public_metrics_dict["success_rate"] = success_rate
     return public_metrics_dict
 
 
@@ -1083,8 +1045,9 @@ def loop_over_results_and_generate_metrics(
     data_dir: str,
     results_dir: str,
     current_evaluation_dir: str,
-) -> list[dict[str, Any]]:
+) -> tuple[list[dict[str, Any]], int]:
     all_results: list[dict[str, Any]] = []
+    number_of_errors = 0
 
     file_metadata_df: pd.DataFrame = pd.read_csv(
         os.path.join(data_dir, "metadata", "file_metadata.csv")
@@ -1128,20 +1091,28 @@ def loop_over_results_and_generate_metrics(
 
         function_parameters = ["time_series", *config_data["allowable_kwargs"]]
 
-        result = generate_performance_metrics_for_submission(
-            file_name,
-            config_data,
-            system_metadata_dict,
-            results_dir,
-            data_dir,
-            submission_runtime,
-            function_parameters,
-        )
+        try:
+            result = generate_performance_metrics_for_submission(
+                file_name,
+                config_data,
+                system_metadata_dict,
+                results_dir,
+                data_dir,
+                submission_runtime,
+                function_parameters,
+            )
 
-        logger.info(f"{file_name}: {result}")
-        all_results.append(result)
+            logger.info(f"{file_name}: {result}")
+            all_results.append(result)
+        except Exception as e:
+            number_of_errors += 1
+            # TODO: add error code
+            logger.error(
+                f"Error generating performance metrics for {file_name}"
+            )
+            logger.exception(e)
 
-    return all_results
+    return all_results, number_of_errors
 
 
 def generate_performance_metrics_for_submission(

diff --git a/workers/src/submission_worker.py b/workers/src/submission_worker.py
@@ -821,34 +821,45 @@ def main():
                 stop_event.set()
                 t.join()
 
-            log_file = os.path.join(LOG_FILE_DIR, "submission.log")
-            json_log_file = os.path.join(LOG_FILE_DIR, "submission.log.jsonl")
-
-            # push log files to s3
-
-            push_to_s3(
-                log_file,
-                f"submission_files/submission_user_{user_id}/submission_{submission_id}/logs/submission.log",
-                analysis_id,
-                submission_id,
-            )
-            push_to_s3(
-                json_log_file,
-                f"submission_files/submission_user_{user_id}/submission_{submission_id}/logs/submission.log.jsonl",
-                analysis_id,
-                submission_id,
-            )
-
-            # Remove all log files
-            os.remove(log_file)
-            os.remove(json_log_file)
+            upload_logs_to_s3(user_id, analysis_id, submission_id)
 
             is_finished = True
             break
 
         time.sleep(1)
 
 
+def upload_logs_to_s3(user_id, analysis_id, submission_id):
+    log_file = os.path.join(LOG_FILE_DIR, "submission.log")
+    error_log_file = os.path.join(LOG_FILE_DIR, "submission.error.log")
+    json_log_file = os.path.join(LOG_FILE_DIR, "submission.log.jsonl")
+
+    # push log files to s3
+
+    push_to_s3(
+        log_file,
+        f"submission_files/submission_user_{user_id}/submission_{submission_id}/logs/submission.log",
+        analysis_id,
+        submission_id,
+    )
+    push_to_s3(
+        error_log_file,
+        f"submission_files/submission_user_{user_id}/submission_{submission_id}/logs/submission.error.log",
+        analysis_id,
+        submission_id,
+    )
+    push_to_s3(
+        json_log_file,
+        f"submission_files/submission_user_{user_id}/submission_{submission_id}/logs/submission.log.jsonl",
+        analysis_id,
+        submission_id,
+    )
+
+    # Remove all log files
+    os.remove(log_file)
+    os.remove(json_log_file)
+
+
 if __name__ == "__main__":
     logger.info(f"Starting Submission Worker.")