From 818019303245da364ed0eb9918ee315c32f1df88 Mon Sep 17 00:00:00 2001
From: Jason Baumohl <jkbaumohl@lbl.gov>
Date: Fri, 17 Nov 2023 23:35:34 +0000
Subject: [PATCH 01/11] adding adam categories for weekly app categories

---
 README.md                                     | 16 ++++--
 bin/dump_weekly_ADAM_app_categories.sh        |  3 +
 source/custom_scripts/dump_query_results.py   |  5 ++
 .../dump_weekly_ADAM_app_categories.py        | 56 +++++++++++++++++++
 4 files changed, 76 insertions(+), 4 deletions(-)
 create mode 100755 bin/dump_weekly_ADAM_app_categories.sh
 create mode 100644 source/custom_scripts/dump_weekly_ADAM_app_categories.py

diff --git a/README.md b/README.md
index 283a7d5..a612fcd 100644
--- a/README.md
+++ b/README.md
@@ -43,6 +43,15 @@ source/daily/upload_public_narratives_count.py
 source/daily/make_reporting_tables.py
 
 
+-------------------
+Within the logstash dockerfile there is:
+https://github.com/kbase/logstash/blob/41778da1238129a65296bdddcb6ff26e9c694779/Dockerfile#L24-L29
+The rm at the end I believe is just cleaning up after itself. This was set up by Steve for Cheyenne's work
+This is used by this code:
+https://github.com/kbase/metrics/blob/master/source/daily_cron_jobs/methods_upload_elasticsearch_sumrydicts.py
+
+
+
 -------------------
 
 CRON Jobs are run from mysql-metrics
@@ -53,10 +62,10 @@ There are nightly CRON jobs that get run are located in bin/master_cron_shell.sh
 which runs scripts from the source/daily directory
 
 Then there are also monthly CRON jobs that get run are located in bin/upload_workspace_stats.sh
-It used to be workspaces (user info needed first for FK potential issues), but now it also conatins scripts for
-DOI metrics.)
+It used to be workspaces (user info needed first for FK potential issues),
 Runs scripts from source/monthly directory
 
+There is a doi_monthly CRON job for Credit Engine that runs are located in bin/upload_doi_metrics.sh
 
 These create Logs to keep track of (note nightly metrics is calling master_cron_shell
 01 17 * * * /root/metrics/nightly_metrics.sh >>/mnt/metrics_logs/crontab_nightly 2>&1
@@ -64,12 +73,11 @@ These create Logs to keep track of (note nightly metrics is calling master_cron_
 01 0  15 * * /root/metrics/monthly_metrics.sh >>/mnt/metrics_logs/crontab_doi_monthly 2>&1
 01 07 * * * /root/metrics/nightly_errorlogs.sh >>/mnt/metrics_logs/crontab_errorlogs 2>&1
 
-From Docker03 the logs can be checked by going doing the following. (Note no y at end of monthly)
+From Docker03 the logs can be checked by going doing the following.
 cat /mnt/nfs3/data1/metrics/crontab_logs/crontab_nightly
 cat /mnt/nfs3/data1/metrics/crontab_logs/crontab_monthly
 cat /mnt/nfs3/data1/metrics/crontab_logs/crontab_doi_monthly
 
-
 Can also confirm things ran by looking in the database (if not need to do backfills).
 Example: (should be first of each month)
 select DATE_FORMAT(`record_date`,'%Y-%m') as narrative_cron_month, count(*) as narrative_count from metrics.workspaces ws group by narrative_cron_month;
diff --git a/bin/dump_weekly_ADAM_app_categories.sh b/bin/dump_weekly_ADAM_app_categories.sh
new file mode 100755
index 0000000..70bb542
--- /dev/null
+++ b/bin/dump_weekly_ADAM_app_categories.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+python custom_scripts/dump_weekly_ADAM_app_categories.py
diff --git a/source/custom_scripts/dump_query_results.py b/source/custom_scripts/dump_query_results.py
index 1aa32ea..ab8486c 100644
--- a/source/custom_scripts/dump_query_results.py
+++ b/source/custom_scripts/dump_query_results.py
@@ -88,6 +88,11 @@ def dump_query_results():
     #          order by avg_hours_active desc, session_count, total_hours_active")
     #print("username\tsession_count\ttotal_hours_active\tavg_hours_active\tstd_hours_active\tfirst_seen\tlast_seen")
 
+    # Custom apps updates for RSV
+#    query = ("select app_name, git_commit_hash, min(finish_date) as first_run_date from user_app_usage \
+#              group by app_name, git_commit_hash having first_run_date > '2021-01-01'")
+#    print("appname\tgit_commit_hash\tfirst_run_date")
+    
     #Blobstore cumulative sizes over users
 #    query = ("select sum(total_size) as blobstore_size, bs.username from blobstore_stats bs \
 #             group by username order by blobstore_size")
diff --git a/source/custom_scripts/dump_weekly_ADAM_app_categories.py b/source/custom_scripts/dump_weekly_ADAM_app_categories.py
new file mode 100644
index 0000000..3fc67ae
--- /dev/null
+++ b/source/custom_scripts/dump_weekly_ADAM_app_categories.py
@@ -0,0 +1,56 @@
+#!/usr/local/bin/python
+
+import os
+import mysql.connector as mysql
+
+metrics_mysql_password = os.environ["METRICS_MYSQL_PWD"]
+sql_host = os.environ["SQL_HOST"]
+metrics = os.environ["QUERY_ON"]
+
+
+def dump_weekly_app_categories():
+    # Dumps the weekly app catagory users report used in the quarterly report
+
+    # connect to mysql
+    db_connection = mysql.connect(
+        host=sql_host,  # "mysql1", #"localhost",
+        user="metrics",  # "root",
+        passwd=metrics_mysql_password,
+        database="metrics",  # "datacamp"
+    )
+
+    cursor = db_connection.cursor()
+    query = "use " + metrics
+    cursor.execute(query)
+
+    # CHANGE QUERY HERE
+    query = ("select * from metrics_reporting.app_category_unique_users_weekly")
+    query = ("select in_query.week_run, in_query.master_category, count(*) as unique_users "
+             "from (select distinct DATE_FORMAT(`finish_date`,'%Y-%u') as week_run, "
+             "IFNULL(master_category,'None') as master_category, uau.username "
+             "from metrics.user_app_usage uau inner join "
+             "metrics.user_info ui on uau.username = ui.username "
+             "left outer join "
+             "metrics.adams_app_name_category_map anc on uau.app_name = anc.app_name "
+             "where ui.kb_internal_user = 0 "
+             "and func_name != 'kb_gtdbtk/run_kb_gtdbtk') as in_query "
+             "group by in_query.week_run, in_query.master_category;")
+    # CHANGE COLUMN HEADERS HERE TO MATCH QUERY HEADERS
+    print("week_run\tmaster_category\tunique_users")
+
+    cursor.execute(query)
+    row_values = list()
+
+    for row_values in cursor:
+        temp_string = ""
+        for i in range(len(row_values) - 1):
+            if row_values[i] is not None:
+                temp_string += str(row_values[i])
+            temp_string += "\t"
+        if row_values[-1] is not None:
+            temp_string += str(row_values[-1])
+        print(temp_string)
+    return 1
+
+
+dump_weekly_app_categories()

From e036d3801a28379f1444cc1ad088093632c1c864 Mon Sep 17 00:00:00 2001
From: Jason Baumohl <jkbaumohl@lbl.gov>
Date: Fri, 27 Sep 2024 02:31:46 +0000
Subject: [PATCH 02/11] blobstore_reports

---
 .../daily_cron_jobs/make_reporting_tables.py  | 63 ++++++++++++++-
 .../sql_create_statements.txt                 | 21 ++++-
 .../sql_reporting_views_and_tables.sql        | 77 ++++++++++++++++++-
 3 files changed, 158 insertions(+), 3 deletions(-)

diff --git a/source/daily_cron_jobs/make_reporting_tables.py b/source/daily_cron_jobs/make_reporting_tables.py
index 49f112d..69f8024 100644
--- a/source/daily_cron_jobs/make_reporting_tables.py
+++ b/source/daily_cron_jobs/make_reporting_tables.py
@@ -340,10 +340,71 @@ def make_reporting_tables():
         "where uip.exclude != 1 ")    
     cursor.execute(user_super_summary_create_statement)
     print("user_super_summary_create_statement created")
+
+
+    # Blobstroe detial related tables
+    blobstore_detail_by_ws_create_statement = (
+        "create or replace table blobstore_detail_by_ws as "
+        "(select in_q.ws_id, sum(in_q.orig_saver_count) as orig_saver_count, "
+        "sum(in_q.non_orig_saver_count) as non_orig_saver_count, "
+        "sum(in_q.orig_saver_size_GB) as orig_saver_size_GB, "
+        "sum(in_q.non_orig_saver_size_GB) as non_orig_saver_size_GB, "
+        "sum(in_q.total_blobstore_size_GB) as total_blobstore_size_GB "
+        "from ("
+        "select ws_id, DATE_FORMAT(`save_date`,'%Y-%m') as month, "
+        "sum(orig_saver) as orig_saver_count, 0 - sum((orig_saver - 1)) as non_orig_saver_count, "
+        "sum(orig_saver * size)/1000000000 as orig_saver_size_GB, "
+        "0 - sum((orig_saver - 1) * size)/1000000000 as non_orig_saver_size_GB, "
+        "sum(size)/1000000000 as total_blobstore_size_GB "
+        "from blobstore_detail bd "
+        "group by ws_id, month) in_q "
+        "group by ws_id ) ")
+    cursor.execute(blobstore_detail_by_ws_create_statement)
+    print("blobstore_detail_by_ws_create_statement created")
+
+    blobstore_detail_by_user_monthly_create_statement = (
+        "create or replace table blobstore_detail_by_user_monthly as "
+        "(select saver_username, DATE_FORMAT(`save_date`,'%Y-%m') as month, "
+        "sum(orig_saver) as orig_saver_count, 0 - sum((orig_saver - 1)) as non_orig_saver_count, "
+        "sum(orig_saver * size)/1000000000 as orig_saver_size_GB, "
+        "0 - sum((orig_saver - 1) * size)/1000000000 as non_orig_saver_size_GB, "
+        "sum(size)/1000000000 as total_blobstore_size_GB "
+        "from blobstore_detail bd "
+        "group by saver_username, month) ")
+    cursor.execute(blobstore_detail_by_user_monthly_create_statement)
+    print("blobstore_detail_by_user_monthly_create_statement created")
     
-    return
+    blobstore_detail_by_user_create_statement = (
+        "create or replace table blobstore_detail_by_user as "
+        "(select saver_username, "
+        "sum(orig_saver_count) as orig_saver_count, sum(non_orig_saver_count) as non_orig_saver_count, "
+        "sum(orig_saver_size_GB) as orig_saver_size_GB, "
+        "sum(non_orig_saver_size_GB) as non_orig_saver_size_GB, "
+        "sum(total_blobstore_size_GB) as total_blobstore_size_GB "
+        "from blobstore_detail_by_user_monthly "
+        "group by saver_username) ")
+    cursor.execute(blobstore_detail_by_user_create_statement)
+    print("blobstore_detail_by_user_create_statement created")
+
+    blobstore_detail_by_object_type_monthly_create_statement = (
+        "create or replace table blobstore_detail_by_object_type_monthly as "
+        "(select LEFT(object_type,LOCATE('-',object_type) - 1) as object_type, "
+        "DATE_FORMAT(`save_date`,'%Y-%m') as month, "
+        "sum(orig_saver) as orig_saver_count, 0 - sum((orig_saver - 1)) as non_orig_saver_count, "
+        "sum(orig_saver * size)/1000000000 as orig_saver_size_GB, "
+        "0 - sum((orig_saver - 1) * size)/1000000000 as non_orig_saver_size_GB, "
+        "sum(size)/1000000000 as total_blobstore_size_GB "
+        "from blobstore_detail bd "
+        "group by object_type, month) ")
+    cursor.execute(blobstore_detail_by_object_type_monthly_create_statement)
+    print("blobstore_detail_by_object_type_monthly_create_statement created")
 
 
+
+
+    
+    return
+
 import time
 import datetime
 
diff --git a/sql_create_statements/sql_create_statements.txt b/sql_create_statements/sql_create_statements.txt
index d48d982..48af83c 100644
--- a/sql_create_statements/sql_create_statements.txt
+++ b/sql_create_statements/sql_create_statements.txt
@@ -1,3 +1,4 @@
+
 --######################
 --# user_info table create and indices.
 
@@ -311,7 +312,7 @@ ON public_narrative_count(public_narrative_count,record_date);
 CREATE or replace TABLE metrics.session_info (
         username VARCHAR(255) NOT NULL,
         record_date DATE NOT NULL,
-        ip_address VARCHAR(15) NOT NULL,
+        ip_address VARCHAR(40) NOT NULL,
         country_name VARCHAR(255) NOT NULL,
         country_code VARCHAR(3) NOT NULL,
         city VARCHAR(255) NOT NULL,
@@ -827,3 +828,21 @@ CREATE TABLE `blobstore_detail` (
   KEY `idx_bsd_objecttype` (`object_type`),
   CONSTRAINT `fk_bsd_username` FOREIGN KEY (`saver_username`) REFERENCES `user_info` (`username`) ON UPDATE CASCADE
 ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+
+
+--##################
+-- past narrative copies. Need to store, because other's do not want to touch the WS and correct the data.
+-- stores the past narrative copy information that was able to be determined, before narrative copy/tracking was fixed/implemented.
+--##################
+CREATE TABLE `past_narrative_copies` (
+  `source_narrative_id` int(15) NOT NULL,
+  `source_narrative_upa` varchar(255) NOT NULL,
+  `destination_narrative_id` int(15) NOT NULL,
+  `destination_narrative_upa` varchar(255) NOT NULL,
+  `destination_narrative_save_date` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
+  UNIQUE KEY `uk_destination_narrative_id_pnc` (`destination_narrative_id`),
+  KEY `idx_source_narrative_id_pnc` (`source_narrative_id`),
+  KEY `idx_source_narrative_upa_pnc` (`source_narrative_upa`),
+  KEY `idx_destination_narrative_save_date_pnc` (`destination_narrative_save_date`)
+) ENGINE=InnoDB;
+
diff --git a/sql_create_statements/sql_reporting_views_and_tables.sql b/sql_create_statements/sql_reporting_views_and_tables.sql
index cf72a3a..1d5c016 100644
--- a/sql_create_statements/sql_reporting_views_and_tables.sql
+++ b/sql_create_statements/sql_reporting_views_and_tables.sql
@@ -1450,7 +1450,7 @@ and is_temporary = 0
 group by wc.username, ui.kb_internal_user; 
 
 #------------------------------
-Final user_super_summary table
+# Final user_super_summary table
 #------------------------------
 
 # NEEDS A CRON JOB
@@ -1606,3 +1606,78 @@ from metrics.doi_ws_map dwm inner join
 metrics_reporting.doi_metrics_current dmc on dwm.ws_id = dmc.ws_id
 inner join metrics_reporting.workspaces_current wc on dmc.ws_id = wc.ws_id
 order by dwm.doi_url,dwm.is_parent_ws desc);
+
+
+#------------------------------
+# Blobstore_detail reports
+#  Note massive table. Some of these are done in CRON job as tables, other are views.
+#------------------------------
+
+create or replace table blobstore_detail_by_ws as
+(
+select in_q.ws_id, sum(in_q.orig_saver_count) as orig_saver_count,
+sum(in_q.non_orig_saver_count) as non_orig_saver_count,
+sum(in_q.orig_saver_size_GB) as orig_saver_size_GB,
+sum(in_q.non_orig_saver_size_GB) as non_orig_saver_size_GB,
+sum(in_q.total_blobstore_size_GB) as total_blobstore_size_GB
+from
+(select ws_id, DATE_FORMAT(`save_date`,'%Y-%m') as month,
+sum(orig_saver) as orig_saver_count, 0 - sum((orig_saver - 1)) as non_orig_saver_count,
+sum(orig_saver * size)/1000000000 as orig_saver_size_GB,
+0 - sum((orig_saver - 1) * size)/1000000000 as non_orig_saver_size_GB,
+sum(size)/1000000000 as total_blobstore_size_GB
+from blobstore_detail bd
+group by ws_id, month) in_q
+group by ws_id );
+Query OK, 108871 rows affected (6 min 52.38 sec)
+Records: 108871  Duplicates: 0  Warnings: 0
+
+create or replace view blobstore_detail_by_ws_monthly as
+(select ws_id, DATE_FORMAT(`save_date`,'%Y-%m') as month,
+sum(orig_saver) as orig_saver_count, 0 - sum((orig_saver - 1)) as non_orig_saver_count,
+sum(orig_saver * size)/1000000000 as orig_saver_size_GB,
+0 - sum((orig_saver - 1) * size)/1000000000 as non_orig_saver_size_GB,
+sum(size)/1000000000 as total_blobstore_size_GB
+from blobstore_detail bd
+group by ws_id, month);
+
+
+create or replace table blobstore_detail_by_user_monthly as
+(select saver_username, DATE_FORMAT(`save_date`,'%Y-%m') as month,
+sum(orig_saver) as orig_saver_count, 0 - sum((orig_saver - 1)) as non_orig_saver_count,
+sum(orig_saver * size)/1000000000 as orig_saver_size_GB,
+0 - sum((orig_saver - 1) * size)/1000000000 as non_orig_saver_size_GB,
+sum(size)/1000000000 as total_blobstore_size_GB
+from blobstore_detail bd
+group by saver_username, month);
+
+create or replace table blobstore_detail_by_user as
+(select saver_username,
+sum(orig_saver_count) as orig_saver_count, sum(non_orig_saver_count) as non_orig_saver_count,
+sum(orig_saver_size_GB) as orig_saver_size_GB,
+sum(non_orig_saver_size_GB) as non_orig_saver_size_GB,
+sum(total_blobstore_size_GB) as total_blobstore_size_GB
+from blobstore_detail_by_user_monthly
+group by saver_username);
+
+
+create or replace table blobstore_detail_by_object_type_monthly as
+(select LEFT(object_type,LOCATE('-',object_type) - 1) as object_type,
+DATE_FORMAT(`save_date`,'%Y-%m') as month,
+sum(orig_saver) as orig_saver_count, 0 - sum((orig_saver - 1)) as non_orig_saver_count,
+sum(orig_saver * size)/1000000000 as orig_saver_size_GB,
+0 - sum((orig_saver - 1) * size)/1000000000 as non_orig_saver_size_GB,
+sum(size)/1000000000 as total_blobstore_size_GB
+from blobstore_detail bd
+group by object_type, month);
+
+create or replace view blobstore_detail_by_object_type as
+(select object_type,
+sum(orig_saver_count) as orig_saver_count,
+sum(non_orig_saver_count) as non_orig_saver_count,
+sum(orig_saver_size_GB) as orig_saver_size_GB,
+sum(non_orig_saver_size_GB) as non_orig_saver_size_GB,
+sum(total_blobstore_size_GB) as total_blobstore_size_GB
+from blobstore_detail_by_object_type_monthly
+group by object_type);
+

From 163b1a251c22e287cc0431f3633b527e914f1d90 Mon Sep 17 00:00:00 2001
From: Jason Baumohl <jkbaumohl@lbl.gov>
Date: Tue, 1 Oct 2024 23:56:16 +0000
Subject: [PATCH 03/11] more code for blobstore details and narrative
 copy/duplicate info

---
 .../get_copy_info_for_narratives.py           | 194 ++++++
 .../get_duplicate_narrative_object_ids.py     | 187 ++++++
 ...pulate_orphaned_blobstore_nodes_handles.py | 293 ++++++++
 .../upload_get_copy_info_for_narratives.py    | 367 +++++++++++
 .../methods_upload_blobstore_details.py       | 623 ++++++++++++++++++
 .../upload_blobstore_details.py               |  28 +
 6 files changed, 1692 insertions(+)
 create mode 100644 source/custom_scripts/get_copy_info_for_narratives.py
 create mode 100644 source/custom_scripts/get_duplicate_narrative_object_ids.py
 create mode 100644 source/custom_scripts/populate_orphaned_blobstore_nodes_handles.py
 create mode 100644 source/custom_scripts/upload_get_copy_info_for_narratives.py
 create mode 100644 source/daily_cron_jobs/methods_upload_blobstore_details.py
 create mode 100644 source/daily_cron_jobs/upload_blobstore_details.py

diff --git a/source/custom_scripts/get_copy_info_for_narratives.py b/source/custom_scripts/get_copy_info_for_narratives.py
new file mode 100644
index 0000000..225df03
--- /dev/null
+++ b/source/custom_scripts/get_copy_info_for_narratives.py
@@ -0,0 +1,194 @@
+from pymongo import MongoClient
+from pymongo import ReadPreference
+#from biokbase.workspace.client import Workspace
+#from installed_clients.AbstractHandleClient import AbstractHandle as HandleService
+#from biokbase.service.Client import Client as ServiceClient
+#import json as _json
+import os
+import mysql.connector as mysql
+import requests
+import time
+#from splitting import split_sequence
+from datetime import date
+from datetime import datetime
+
+debug_mode = 1
+
+if debug_mode == 1:
+    print("############################################")
+    print("############################################")
+    print("############################################")
+    print("START TIME (UTC): " + str(datetime.utcnow()))
+
+start_time = time.time()
+
+requests.packages.urllib3.disable_warnings()
+
+mongoDB_metrics_connection = os.environ["MONGO_PATH"]
+
+ws_url = os.environ["WS_URL"]
+ws_user_token = os.environ["METRICS_WS_USER_TOKEN"]
+to_workspace = os.environ["WRK_SUFFIX"]
+
+metrics_mysql_password = os.environ["METRICS_MYSQL_PWD"]
+sql_host = os.environ["SQL_HOST"]
+metrics = os.environ["QUERY_ON"]
+
+# connect to mysql
+db_connection = mysql.connect(
+    host=sql_host,  # "mysql1", #"localhost",
+    user="metrics",  # "root",
+    passwd=metrics_mysql_password,
+    database="metrics",  # "datacamp"
+)
+
+cursor = db_connection.cursor()
+query = "use " + metrics
+cursor.execute(query)
+
+workspaces_with_copied_reports_and_no_narratives = list()
+
+client = MongoClient(mongoDB_metrics_connection + to_workspace)
+db = client.workspace
+
+# dict soucce_ws => {destination_ws => min_savedate (MIGHT NEED NARRATIVE OBJECT NUMBER)
+source_ws_to_destination_ws_dict = dict()
+destination_ws_set = set()
+
+# Key destination ws_id , key = object id of the narrative
+destination_narrative_obj_id_lookup = dict() 
+
+# Final results object;
+# Key = narrative_obj_id , value = ws_obj_version of the source ws object: 
+destination_results_dict = dict()
+
+# get unique list of Report types:
+query = ('select object_type, object_type_full from metrics_reporting.workspace_object_counts_current where object_type like "KBaseReport.Report%"')
+
+cursor.execute(query)
+row_values = list()
+
+report_list = list()
+for row_values in cursor:
+    report_list.append(row_values[1])
+
+# GET THE INITIAL INFORMATION ABOUT COPIED REPORTS TO EXTRAPOLATE COPIED NARRATIVES:
+ws_objVersions_copied_reports_cursor = db.workspaceObjVersions.find({"type":{"$in":report_list},
+                                                                     "copied":{"$ne": None}
+                                                                     #, "ws":{"$in":[145373, 43266, 116952, 154109]}
+                                                                     },
+                                                                    {"ws": 1, "_id": 0, "savedate": 1, "copied" : 1 })
+
+for ws_objVersions_copied_report in ws_objVersions_copied_reports_cursor:
+    destination_ws =  ws_objVersions_copied_report["ws"]
+    savedate = ws_objVersions_copied_report["savedate"]
+    copied_from = ws_objVersions_copied_report["copied"]
+    source_ws = int(copied_from.split("/")[0])
+    destination_ws_set.add(destination_ws)
+    if source_ws not in source_ws_to_destination_ws_dict:
+        source_ws_to_destination_ws_dict[source_ws] = dict()
+    if destination_ws not in source_ws_to_destination_ws_dict[source_ws]:
+        source_ws_to_destination_ws_dict[source_ws][destination_ws] = dict()
+    if "creation_date" not in source_ws_to_destination_ws_dict[source_ws][destination_ws]:
+        source_ws_to_destination_ws_dict[source_ws][destination_ws]["creation_date"] = savedate
+    else:
+        if savedate < source_ws_to_destination_ws_dict[source_ws][destination_ws]["creation_date"]:
+            source_ws_to_destination_ws_dict[source_ws][destination_ws]["creation_date"] = savedate
+
+if debug_mode == 1:
+    print("source_ws_to_destination_ws_dict: " + str(source_ws_to_destination_ws_dict))
+#
+#split the copy get source WS, fill in Datastructure, replace the min_date accordingly.
+ #   
+  #  
+
+# GET THE DESTINATION WS NARRATIVE OBJECT ID
+# Has the obj id (middlw part of UPA) of the narrative obj in the new WS. Copied narratives are not object 1, but rather
+# the max object id in source ws (at time of the copy)  + 1
+destination_narratives_ids_lookup = dict()
+
+#get narrative typed objects
+query = ('select object_type, object_type_full from metrics_reporting.workspace_object_counts_current where object_type like "KBaseNarrative.Narrative%"')
+
+cursor.execute(query)
+row_values = list()
+
+narrative_type_list = list()
+for row_values in cursor:
+    narrative_type_list.append(row_values[1])
+  
+destination_narrative_ids_cursor = db.workspaceObjVersions.find({"type":{"$in":narrative_type_list},
+                                                                 "ws":{"$in":list(destination_ws_set)},
+                                                                 "ver":1},
+                                                                {"ws":1, "id":1, "_id":0})
+  
+for dest_narrative_ws_id in destination_narrative_ids_cursor:
+    destination_narrative_obj_id_lookup[dest_narrative_ws_id["ws"]] = dest_narrative_ws_id["id"]
+
+if debug_mode == 1:
+    print("destination_narrative_obj_id_lookup : " + str(destination_narrative_obj_id_lookup))
+
+
+# GET THE COPIED FROM NARRATIVES TIMESTAMPS OF THEIR VERSIONS TO HAVE A LOOKUP FOR THE
+for source_ws_id in source_ws_to_destination_ws_dict:
+    ordered_save_points = list()
+    source_version_save_points_cursor = db.workspaceObjVersions.find({"type":"KBaseNarrative.Narrative-4.0",
+                                                                      "ws":source_ws_id},
+                                                                     {"id":1, "ver":1, "savedate":1, "_id":0}).sort("savedate")
+    for source_version_save_point in source_version_save_points_cursor:
+        source_obj_id = str(source_ws_id) + "/" + str(source_version_save_point["id"]) + "/" + str(source_version_save_point["ver"])
+        savedate = source_version_save_point["savedate"]
+        ordered_save_points.append([savedate,source_obj_id])
+    if debug_mode == 1:
+        print("ordered_save_points : " + str(ordered_save_points))
+
+    for destination_ws_id in source_ws_to_destination_ws_dict[source_ws_id]:
+        destination_ws_savedate = source_ws_to_destination_ws_dict[source_ws_id][destination_ws_id]["creation_date"]
+        source_obj_id_used = None
+        for ordered_save_point in ordered_save_points:
+            if ordered_save_point[0] <= destination_ws_savedate:
+                source_obj_id_used = ordered_save_point[1]
+            else:
+                break
+        if source_obj_id_used == None:
+            if debug_mode == 1:
+                print("ERROR: " + str(destination_ws_id) + " does not a source ws_obj that it found, could be due to saved REPORT indipendently")
+        if destination_ws_id not in destination_narrative_obj_id_lookup:
+            if debug_mode == 1:
+                print("It is a WS without a narrative object")
+            workspaces_with_copied_reports_and_no_narratives.append(destination_ws_id)
+            continue
+        destination_narrative_obj_id = str(destination_ws_id) + "/" + str(destination_narrative_obj_id_lookup[destination_ws_id]) + "/1"
+        destination_results_dict[destination_narrative_obj_id] = source_obj_id_used
+
+if debug_mode == 1:
+    print("destination_results_dict : " + str(destination_results_dict))
+    print("===============================")
+    print("===============================")
+    print("===============================")
+
+destination_obj_id_is_none = list()
+
+narrative_copy_count = 0
+print("Destination_WS\tSource_WS")
+for destination_obj_id in destination_results_dict:
+    if destination_results_dict[destination_obj_id] == None:
+        destination_obj_id_is_none.append(destination_obj_id)
+        continue
+    print(destination_obj_id + "\t" + destination_results_dict[destination_obj_id])
+    narrative_copy_count += 1
+    
+if debug_mode == 1:
+    print("DESTINATION WORKSPACES HAVE NO NARRATIVE workspaces_with_copied_reports_and_no_narratives : " + str(workspaces_with_copied_reports_and_no_narratives))
+    print("workspaces_with_copied_reports_and_no_narratives length " + str(len(workspaces_with_copied_reports_and_no_narratives)))
+
+    print("SOURCE WS DOES NOT HAVE A NARRATIVE::::::::destination_obj_id_is_none : " + str(destination_obj_id_is_none))
+    print("destination_obj_id_is_none length : " + str(len(destination_obj_id_is_none)))
+
+
+    print("destination_narrative_obj_id_lookup length: " + str(len(destination_narrative_obj_id_lookup)))
+    print("destination_results_dict length: " + str(len(destination_results_dict)))
+
+    print("total narrative_copy_count : " + str(narrative_copy_count))
+    print("--- total seconds %s seconds ---" % (time.time() - start_time))
+exit()
diff --git a/source/custom_scripts/get_duplicate_narrative_object_ids.py b/source/custom_scripts/get_duplicate_narrative_object_ids.py
new file mode 100644
index 0000000..b1ce05c
--- /dev/null
+++ b/source/custom_scripts/get_duplicate_narrative_object_ids.py
@@ -0,0 +1,187 @@
+#import pymongo
+from pymongo import MongoClient
+from pymongo import ReadPreference
+#from biokbase.workspace.client import Workspace
+#from installed_clients.AbstractHandleClient import AbstractHandle as HandleService
+#from biokbase.service.Client import Client as ServiceClient
+#import json as _json
+import os
+import mysql.connector as mysql
+import requests
+import time
+#from splitting import split_sequence
+from datetime import date
+from datetime import datetime
+
+debug_mode = 1
+
+if debug_mode == 1:
+    print("############################################")
+    print("############################################")
+    print("############################################")
+    print("START TIME (UTC): " + str(datetime.utcnow()))
+
+start_time = time.time()
+
+requests.packages.urllib3.disable_warnings()
+
+mongoDB_metrics_connection = os.environ["MONGO_PATH"]
+
+ws_url = os.environ["WS_URL"]
+ws_user_token = os.environ["METRICS_WS_USER_TOKEN"]
+to_workspace = os.environ["WRK_SUFFIX"]
+
+metrics_mysql_password = os.environ["METRICS_MYSQL_PWD"]
+sql_host = os.environ["SQL_HOST"]
+metrics = os.environ["QUERY_ON"]
+
+# connect to mysql
+db_connection = mysql.connect(
+    host=sql_host,  # "mysql1", #"localhost",
+    user="metrics",  # "root",
+    passwd=metrics_mysql_password,
+    database="metrics",  # "datacamp"
+)
+
+cursor = db_connection.cursor()
+query = "use " + metrics
+cursor.execute(query)
+
+client = MongoClient(mongoDB_metrics_connection + to_workspace)
+db = client.workspace
+
+workspaces_without_corresponding_versions_data = list()
+
+#workspaces_with_multiple_narrative_obj_ids = dict()
+
+##############################
+#
+# get the list of narrative_typed objects
+#
+############################
+def get_narrative_typed_objects_list(cursor):
+    # get list of narrative typed objects on the system
+    query = ('select object_type, object_type_full from metrics_reporting.workspace_object_counts_current where object_type like "KBaseNarrative.Narrative%"')
+    cursor.execute(query)
+    narrative_type_list = list()
+    for row_values in cursor:
+        narrative_type_list.append(row_values[1])
+    return narrative_type_list
+
+########################3
+#
+#   get_ws_narratives with duplicate narrative_ids
+#
+#####################
+def get_multiple_narratives_count_dict(cursor):
+    # get list of narrative typed objects on the system
+    query = ('select ws_id, num_nar_obj_ids  from metrics_reporting.workspaces_current where num_nar_obj_ids > 1')
+    cursor.execute(query)
+    multiple_narrative_count_dict = dict()
+    for row_values in cursor:
+        multiple_narrative_count_dict[row_values[0]] = row_values[1]
+#    print(" multiple_narrative_count_dict : " + str(multiple_narrative_count_dict))
+#    print(" multiple_narrative_count_dict length : " + str(len(multiple_narrative_count_dict)))
+    return multiple_narrative_count_dict
+
+####################
+#
+#   get active narrative for all of these workspaces (note may have by name)
+#   Then get the list of all non_active obj_ids for these narratives
+#   Confirm the length of each list is n-1 relative to the count list
+#
+##################
+def get_non_active_narrative_object_ids(narrative_type_list, multiple_narrative_count_dict, db):
+    narrative_active_id_dict = dict()
+    list_of_workspace_to_check = list(multiple_narrative_count_dict.keys())
+#    print("list_of_workspace_to_check len : " + str(len(list_of_workspace_to_check)))
+    
+    ws_narratives_dict = dict()
+#    
+    narrative_obj_ids_not_int_dict = dict() #key ws -> value the narrative value
+#
+    narrative_obj_ids_not_int_never_resolved = dict() #key ws -> value the narrative value
+#
+    meta_without_narrative_count = 0
+#
+    meta_with_multiple_narratives_count = 0
+
+    workspaces_with_meta_cursor = db.workspaces.find({"meta" : {"$exists": True}, "ws" : {"$in":list_of_workspace_to_check}},{"ws":1,"meta":1})
+    workspaces_with_meta_cursor_count = 0
+    for workspace_with_meta in workspaces_with_meta_cursor:
+        workspaces_with_meta_cursor_count += 1
+        narrative_ws_id = workspace_with_meta["ws"]
+        meta_narrative = None
+        for meta_element in workspace_with_meta["meta"]:
+#            print("    meta_element : " + str(meta_element))
+            if "narrative" == meta_element["k"]:
+#                print("narrative in meta element")
+                if meta_narrative is None:
+                    meta_narrative = meta_element["v"]
+                else:
+                    if meta_narrative != meta_element["v"]:
+                        meta_with_multiple_narratives_count += 1
+#                        print("  workspace_with_meta multiple narratives : " + str(  workspace_with_meta["meta"]))
+        if meta_narrative is None:
+            meta_without_narrative_count += 1
+        else:
+            try:
+                narrative_active_id_dict[narrative_ws_id] = int(meta_narrative)
+            except ValueError:
+#                del(narrative_active_id_dict[narrative_ws_id])
+                narrative_obj_ids_not_int_dict[narrative_ws_id] = meta_narrative
+    #NOW NEED TO RESOLVE THE narrative id indicator that is not an integer:
+    for narrative_obj_id_not_int in narrative_obj_ids_not_int_dict:
+#        print("narrative_obj_id_not_int : " + str(narrative_obj_id_not_int))
+#        print("narrative_obj_ids_not_int_dict[narrative_obj_id_not_int] : " + str(narrative_obj_ids_not_int_dict[narrative_obj_id_not_int]))
+        workspaceObjectsName_cursor = db.workspaceObjects.find({"ws": narrative_obj_id_not_int,
+                                                                "name": narrative_obj_ids_not_int_dict[narrative_obj_id_not_int]},
+                                                               {"ws":1,"id":1})
+        record_found = 0
+        for workspaceObjectsName in workspaceObjectsName_cursor:
+            record_found = 1
+            narrative_active_id_dict[narrative_obj_id_not_int] = workspaceObjectsName["id"]
+        if record_found == 0:
+            narrative_obj_ids_not_int_never_resolved[narrative_obj_id_not_int] = narrative_obj_ids_not_int_dict[narrative_obj_id_not_int]
+
+#    print("workspaces_with_meta_cursor count : " + str(workspaces_with_meta_cursor_count))
+#    print("meta_without_narrative_count : " + str(meta_without_narrative_count))
+#    print("meta_with_multiple_narratives_count : " + str(meta_with_multiple_narratives_count))
+#    print("narrative_obj_ids_not_int_never_resolved : " + str(narrative_obj_ids_not_int_never_resolved))
+#    print("narrative_obj_ids_not_int_never_resolved length : " + str(len(narrative_obj_ids_not_int_never_resolved)))
+#    print("narrative_active_id_dict length  : " + str(len(narrative_active_id_dict)))
+#    print("narrative_active_id_dict  : " + str(narrative_active_id_dict))
+#    print("narrative_type_list : " + str(narrative_type_list))
+
+#    exit()
+
+    # key narrative id -> value comma delimited string of non_active_ids
+    return_non_active_ids_dict = dict()
+    
+    for narrative_with_active_id in narrative_active_id_dict:
+        # now determine which obj_ids are non-active narrative objects.
+        # confirm the number gotten back metches the count in  (multiple_narrative_count_dict - 1)
+        non_active_narrative_ids_set = set()
+        narrative_obj_ids_cursor = db.workspaceObjVersions.find({ "ws": narrative_with_active_id, "type" : {"$in":narrative_type_list}},{"id":1, "ws":1, "_id":0})
+        for narrative_obj_ids_row in narrative_obj_ids_cursor:
+            narrative_obj_id = narrative_obj_ids_row["id"]
+#            print("narrative_obj_id : " + str(narrative_obj_id))
+            if narrative_obj_id != narrative_active_id_dict[narrative_with_active_id] :
+                non_active_narrative_ids_set.add(narrative_obj_id)
+        if len(non_active_narrative_ids_set) != (multiple_narrative_count_dict[narrative_with_active_id] - 1):
+            print("narrative_with_active_id : " + str(narrative_with_active_id) + " has a length of non_actives of " + str(len(non_active_narrative_ids_set)) +
+                  " but the multiple_narrative_count_dict has a value of : " + str(multiple_narrative_count_dict[narrative_with_active_id]) +
+                  " here are the non actives : " + str(non_active_narrative_ids_set))
+        else:
+            return_non_active_ids_dict[narrative_with_active_id] = ",".join(str(x) for x in list(non_active_narrative_ids_set))
+
+    for return_non_active_id in return_non_active_ids_dict:
+        print(str(return_non_active_id) + "\t" + return_non_active_ids_dict[return_non_active_id])
+#    print("return_non_active_ids_dict : " + str(return_non_active_ids_dict))
+ 
+
+narrative_type_list = get_narrative_typed_objects_list(cursor)
+multiple_narrative_count_dict = get_multiple_narratives_count_dict(cursor)
+get_non_active_narrative_object_ids(narrative_type_list, multiple_narrative_count_dict, db)
+
+
diff --git a/source/custom_scripts/populate_orphaned_blobstore_nodes_handles.py b/source/custom_scripts/populate_orphaned_blobstore_nodes_handles.py
new file mode 100644
index 0000000..1776779
--- /dev/null
+++ b/source/custom_scripts/populate_orphaned_blobstore_nodes_handles.py
@@ -0,0 +1,293 @@
+from pymongo import MongoClient
+from pymongo import ReadPreference
+from biokbase.workspace.client import Workspace
+#from installed_clients.AbstractHandleClient import AbstractHandle as HandleService
+from biokbase.service.Client import Client as ServiceClient
+import json as _json
+import os
+import mysql.connector as mysql
+import requests
+import time
+#from splitting import split_sequence
+from datetime import date
+from datetime import datetime
+
+print("############################################")
+print("############################################")
+print("############################################")
+print("START TIME (UTC): " + str(datetime.utcnow()))
+start_time = time.time()
+
+requests.packages.urllib3.disable_warnings()
+
+mongoDB_metrics_connection = os.environ["MONGO_PATH"]
+
+ws_url = os.environ["WS_URL"]
+ws_user_token = os.environ["METRICS_WS_USER_TOKEN"]
+to_workspace = os.environ["WRK_SUFFIX"]
+
+to_blobstore = os.environ["BLOBSTORE_SUFFIX"]
+to_handle_db = os.environ["HANDLE_DB_SUFFIX"]
+
+
+client = MongoClient(mongoDB_metrics_connection + to_workspace)
+db = client.workspace
+handle_service_url = "https://kbase.us/services/handle_service"
+
+metrics_mysql_password = os.environ["METRICS_MYSQL_PWD"]
+sql_host = os.environ["SQL_HOST"]
+query_on = os.environ["QUERY_ON"]
+# connect to mysql
+db_connection = mysql.connect(
+    host=sql_host, user="metrics", passwd=metrics_mysql_password, database="metrics"
+)
+cursor = db_connection.cursor()
+query = "use " + query_on
+cursor.execute(query)
+
+#wsadmin = Workspace(ws_url, token=ws_user_token)
+#hs =  HandleService(handle_service_url, token=ws_user_token)
+
+def get_blobstore_nodes ():
+    client_blobstore = MongoClient(mongoDB_metrics_connection + to_blobstore)
+    db_blobstore = client_blobstore.blobstore
+
+    blobstore_nodes_set = set()
+    blobstore_dict = dict()
+    
+    nodes_query = db_blobstore.nodes.find({},{"_id": 0, "id": 1, "own.user": 1, "time": 1})
+    for record in nodes_query:
+        blobstore_node_id = record["id"]
+        user = "empty"
+        if "own" in record and "user" in record["own"]:
+            user = record["own"]["user"]
+        save_date = record["time"]
+        blobstore_nodes_set.add(blobstore_node_id)
+        blobstore_dict[blobstore_node_id] = {"user": user,
+                                             "date": save_date,
+                                             }
+    return (blobstore_nodes_set, blobstore_dict)
+
+def get_handles_and_blobstore_ids ():
+    client_handle_db = MongoClient(mongoDB_metrics_connection + to_handle_db)
+    db_handle = client_handle_db.handle_db
+
+    handles_set = set()
+    handles_blobstore_ids_set = set()
+    handles_by_hid_dict = dict()
+    handles_by_bsid_dict = dict()
+    
+    handles_query = db_handle.handle.find({},{"_id": 0, "id": 1, "hid": 1, "created_by":1, "creation_date":1})
+    for record in handles_query:
+        blobstore_id = record["id"]
+        handle = record["hid"]
+        user = record["created_by"]
+        save_date = record["creation_date"]
+        handles_set.add(handle)
+        handles_blobstore_ids_set.add(blobstore_id)
+        handles_by_hid_dict[handle] = {"bsid": blobstore_id,
+                                       "user": user,
+                                       "date": save_date,
+                                       }
+        handles_by_bsid_dict[blobstore_id] = {"handle" : handle,
+                                              "user": user,
+                                              "date": save_date,
+                                              }      
+        
+    return (handles_set, handles_blobstore_ids_set, handles_by_hid_dict, handles_by_bsid_dict)
+
+def get_workspace_handles ():
+    workspace_handles_set = set()
+    workspace_dict = dict()
+    ws_obj_vers_cursor = db.workspaceObjVersions.find(
+        {#"ws":312,
+            "extids.handle" : { "$exists": True }},
+        {
+            "type": 1,
+            "ws": 1, 
+            "id": 1,
+            "ver": 1,
+            "extids": 1,
+            "savedate": 1,
+            "savedby": 1,
+            "_id": 0,
+        },
+        no_cursor_timeout=True
+    )
+    for ws_obj_ver in ws_obj_vers_cursor:
+        obj_type = ws_obj_ver["type"]
+        ws = ws_obj_ver["ws"]
+        obj_id = ws_obj_ver["id"]
+        ver = ws_obj_ver["ver"]
+        savedate = ws_obj_ver["savedate"]
+        savedby = ws_obj_ver["savedby"]
+        extids = ws_obj_ver["extids"]
+        handles = extids["handle"]
+        full_obj_id = str(ws) + "/" + str(obj_id) + "/" + str(ver)
+        for handle in handles:
+            (kbh_prefix, str_handle_id) = handle.split("_")
+            int_handle = int(str_handle_id)
+            workspace_handles_set.add(int_handle)
+            if int_handle not in workspace_dict :
+                workspace_dict[int_handle] = dict()
+            workspace_dict[int_handle][full_obj_id] = { "ws" : ws,
+                                                        "date" : savedate,
+                                                        "user" : savedby,
+                                                        "type" : obj_type
+                                                       }
+    return (workspace_handles_set, workspace_dict)
+
+(blobstore_nodes_set, blobstore_dict) = get_blobstore_nodes()
+print("blobstore_nodes_set length : " + str(len(blobstore_nodes_set)))
+(handles_set, handles_blobstore_ids_set, handles_by_hid_dict, handles_by_bsid_dict) = get_handles_and_blobstore_ids()
+print("handles_set length : " + str(len(handles_set)))
+print("handle_blobstore_ids_set length : " + str(len(handles_blobstore_ids_set)))
+(workspace_handles_set, workspaces_dict) = get_workspace_handles()
+print("workspace_handles_set length : " + str(len(workspace_handles_set)))
+
+blobstore_nodes_not_in_handles_set = blobstore_nodes_set.difference(handles_blobstore_ids_set)
+handles_blobstores_not_in_blobstore_nodes = handles_blobstore_ids_set.difference(blobstore_nodes_set)
+
+handles_not_in_worspace_handles_set = handles_set.difference(workspace_handles_set)
+workspace_handles_not_in_handles_set = workspace_handles_set.difference(handles_set)
+
+
+wsov_handle_ids_not_in_handle_insert_cursor = db_connection.cursor(prepared=True)
+wsov_handle_ids_not_in_handle_insert_statement = (
+    "insert into metrics.wsov_handle_ids_not_in_handle "
+    "(ws_obj_ver_id, save_date, ws_id, handle_id, username, type) "
+    "values(%s, %s, %s, %s, %s, %s)"
+)
+
+
+for handle_id in workspace_handles_not_in_handles_set:
+    for full_obj_id in workspaces_dict[handle_id]:
+        ws_id = workspaces_dict[handle_id][full_obj_id]["ws"]
+        save_date = workspaces_dict[handle_id][full_obj_id]["date"]
+        user = workspaces_dict[handle_id][full_obj_id]["user"]
+        obj_type = workspaces_dict[handle_id][full_obj_id]["type"]
+
+        input_vals = (
+            full_obj_id,
+            save_date,
+            ws_id,
+            handle_id,
+            user,
+            obj_type,
+        )
+        wsov_handle_ids_not_in_handle_insert_cursor.execute(wsov_handle_ids_not_in_handle_insert_statement, input_vals)
+    
+handle_ids_not_in_ws_obj_ver_insert_cursor = db_connection.cursor(prepared=True)
+handle_ids_not_in_ws_obj_ver_insert_statement = (
+    "insert into metrics.handle_ids_not_in_ws_obj_ver "
+    "(blobstore_id, handle_id, username, save_date) "
+    "values(%s, %s, %s, %s) "
+)
+
+for handle_id in handles_not_in_worspace_handles_set:
+    bsid = handles_by_hid_dict[handle_id]["bsid"]
+    user = handles_by_hid_dict[handle_id]["user"]
+    if user is None:
+        print("Entry for handle_id " + str(handle_id)  + " :: " + str(handles_by_hid_dict[handle_id]))
+        user = "No User Found"
+    save_date = handles_by_hid_dict[handle_id]["date"]
+    input_vals = (
+        bsid,
+        handle_id,
+        user,
+        save_date,
+        )
+    handle_ids_not_in_ws_obj_ver_insert_cursor.execute(handle_ids_not_in_ws_obj_ver_insert_statement, input_vals)
+
+handles_blobstore_ids_not_in_nodes_insert_cursor = db_connection.cursor(prepared=True)
+handles_blobstore_ids_not_in_nodes_insert_statement = (
+    "insert into metrics.handles_blobstore_ids_not_in_nodes "
+    "(blobstore_id, handle_id, username, save_date) "
+    "values(%s, %s, %s, %s) "
+)
+
+for bsid in handles_blobstores_not_in_blobstore_nodes:
+    handle_id = handles_by_bsid_dict[bsid]["handle"]
+    user = handles_by_bsid_dict[bsid]["user"]
+    if user is None:
+        print("Entry for bsid " + str(bsid)  + " :: " + str(handles_by_bsid_dict[bsid]))
+        user = "No User Found"
+    save_date = handles_by_bsid_dict[bsid]["date"]
+    input_vals = (
+        bsid,
+        handle_id,
+        user,
+        save_date,
+        )
+    handles_blobstore_ids_not_in_nodes_insert_cursor.execute(handles_blobstore_ids_not_in_nodes_insert_statement, input_vals)
+
+
+blobstore_ids_not_in_handle_insert_cursor = db_connection.cursor(prepared=True)
+blobstore_ids_not_in_handle_insert_statement = (
+        "insert into metrics.blobstore_ids_not_in_handle "
+        "(blobstore_id, username, save_date) "
+        "values(%s, %s, %s) "
+    )
+
+for blobstore_id in blobstore_nodes_not_in_handles_set:
+    user = blobstore_dict[blobstore_id]["user"]
+    if user is None:
+        print("Entry for bsid " + str(blobstore_id)  + " :: " + str(blobstore_dict[blobstore_id]))
+        user = "No User Found"
+    save_date = blobstore_dict[blobstore_id]["date"]
+    input_vals = (
+        blobstore_id,
+        user,
+        save_date,
+        )
+    blobstore_ids_not_in_handle_insert_cursor.execute(blobstore_ids_not_in_handle_insert_statement, input_vals)
+
+    
+i = 0
+print("Blobstore_dict :")
+for bs_id in blobstore_dict:
+    i += 1
+    if i > 4:
+        break
+    print("Blobstore : " + bs_id + " ::: " + str(blobstore_dict[bs_id]))
+
+i = 0
+print("handle_by_hid_dict :")
+for hid in handles_by_hid_dict:
+    i += 1
+    if i > 4:
+        break
+    print("Handle : " + str(hid) + " ::: " + str(handles_by_hid_dict[hid]))
+
+i = 0
+print("handle_by_bsid_dict :")
+for bsid in handles_by_bsid_dict:
+    i += 1
+    if i > 4:
+        break
+    print("BSID : " + str(bsid) + " ::: " + str(handles_by_bsid_dict[bsid]))
+
+i = 0
+print("workspaces_dict :")
+for hid in workspaces_dict:
+    i += 1
+    if i > 4:
+        break
+    print("Handle : " + str(hid) + " ::: " + str(workspaces_dict[hid]))
+
+print("blobstore_nodes_set length : " + str(len(blobstore_nodes_set)))
+print("handle_blobstore_ids_set length : " + str(len(handles_blobstore_ids_set)))
+print("handles_set length : " + str(len(handles_set)))
+print("workspace_handles_set length : " + str(len(workspace_handles_set)))
+
+print("blobstore_nodes_not_in_handles_set length : " +  str(len(blobstore_nodes_not_in_handles_set)))
+print("handles_blobstores_not_in_blobstore_nodes length : " + str(len(handles_blobstores_not_in_blobstore_nodes)))
+print("handles_not_in_worspace_handles_set length : " + str(len(handles_not_in_worspace_handles_set)))
+print("workspace_handles_not_in_handles_set : " + str(len(workspace_handles_not_in_handles_set)))
+
+print("--- total seconds %s seconds ---" % (time.time() - start_time))
+
+db_connection.commit()
+db_connection.close()
+
+exit()
diff --git a/source/custom_scripts/upload_get_copy_info_for_narratives.py b/source/custom_scripts/upload_get_copy_info_for_narratives.py
new file mode 100644
index 0000000..251a830
--- /dev/null
+++ b/source/custom_scripts/upload_get_copy_info_for_narratives.py
@@ -0,0 +1,367 @@
+#import pymongo
+from pymongo import MongoClient
+from pymongo import ReadPreference
+#from biokbase.workspace.client import Workspace
+#from installed_clients.AbstractHandleClient import AbstractHandle as HandleService
+#from biokbase.service.Client import Client as ServiceClient
+#import json as _json
+import os
+import mysql.connector as mysql
+import requests
+import time
+#from splitting import split_sequence
+from datetime import date
+from datetime import datetime
+
+debug_mode = 1
+
+if debug_mode == 1:
+    print("############################################")
+    print("############################################")
+    print("############################################")
+    print("START TIME (UTC): " + str(datetime.utcnow()))
+
+start_time = time.time()
+
+requests.packages.urllib3.disable_warnings()
+
+mongoDB_metrics_connection = os.environ["MONGO_PATH"]
+
+ws_url = os.environ["WS_URL"]
+ws_user_token = os.environ["METRICS_WS_USER_TOKEN"]
+to_workspace = os.environ["WRK_SUFFIX"]
+
+metrics_mysql_password = os.environ["METRICS_MYSQL_PWD"]
+sql_host = os.environ["SQL_HOST"]
+metrics = os.environ["QUERY_ON"]
+
+# connect to mysql
+db_connection = mysql.connect(
+    host=sql_host,  # "mysql1", #"localhost",
+    user="metrics",  # "root",
+    passwd=metrics_mysql_password,
+    database="metrics",  # "datacamp"
+)
+
+cursor = db_connection.cursor()
+query = "use " + metrics
+cursor.execute(query)
+
+client = MongoClient(mongoDB_metrics_connection + to_workspace)
+db = client.workspace
+
+workspaces_without_corresponding_versions_data = list()
+
+#workspaces_with_multiple_narrative_obj_ids = dict()
+
+##############################
+#
+# get the list of narrative_typed objects
+#
+############################
+def get_narrative_typed_objects_list(cursor):
+    # get list of narrative typed objects on the system
+    query = ('select object_type, object_type_full from metrics_reporting.workspace_object_counts_current where object_type like "KBaseNarrative.Narrative%"')
+    cursor.execute(query)
+    narrative_type_list = list()
+    for row_values in cursor:
+        narrative_type_list.append(row_values[1])
+    return narrative_type_list
+    
+###############################
+#
+#   Get a dict of Workspaces that contain a narrative - with its corresponding info
+#     {key: ws_id => {"id" => Object ID of the version 1 of the narratove,
+#                     "savedate" => date that version 1 of the narrative was created.
+#
+###############################
+def get_ws_narratives(db):
+#def get_ws_narratives(db, narrative_type_list):
+    ws_narratives_dict = dict()
+#    workspaces_with_multiple_narrative_obj_ids = dict()
+    narrative_obj_ids_not_int_dict = dict() #key ws -> value the narrative value
+    narrative_obj_ids_not_int_never_resolved = dict() #key ws -> value the narrative value
+
+    meta_without_narrative_count = 0
+    meta_with_multiple_narratives_count = 0
+
+    workspaces_with_meta_cursor = db.workspaces.find({"meta" : {"$exists": True}},{"ws":1,"meta":1})
+    for workspace_with_meta in workspaces_with_meta_cursor:
+        narrative_ws_id = workspace_with_meta["ws"]
+#        if narrative_ws_id != 100417:
+#            continue
+        meta_narrative = None
+#        print("narrative_ws_id : " + str(narrative_ws_id))
+#        print("  workspace_with_meta meta : " + str(  workspace_with_meta["meta"]))
+        for meta_element in workspace_with_meta["meta"]:
+#            print("    meta_element : " + str(meta_element))
+            if "narrative" == meta_element["k"]:
+#                print("narrative in meta element")
+                if meta_narrative is None:
+                    meta_narrative = meta_element["v"]
+                else:
+                    if meta_narrative != meta_element["v"]:
+                        meta_with_multiple_narratives_count += 1
+#                        print("  workspace_with_meta multiple narratives : " + str(  workspace_with_meta["meta"]))
+        if meta_narrative is None:
+            meta_without_narrative_count += 1
+        else:
+            ws_narratives_dict[narrative_ws_id] = dict()
+            try:
+                ws_narratives_dict[narrative_ws_id]["id"] = int(meta_narrative)
+            except ValueError:
+                del(ws_narratives_dict[narrative_ws_id])
+                narrative_obj_ids_not_int_dict[narrative_ws_id] = meta_narrative
+    #NOW NEED TO RESOLVE THE narrative id indicator that is not an integer:
+    for narrative_obj_id_not_int in narrative_obj_ids_not_int_dict:
+#        print("narrative_obj_id_not_int : " + str(narrative_obj_id_not_int))
+#        print("narrative_obj_ids_not_int_dict[narrative_obj_id_not_int] : " + str(narrative_obj_ids_not_int_dict[narrative_obj_id_not_int]))
+        workspaceObjectsName_cursor = db.workspaceObjects.find({"ws": narrative_obj_id_not_int,
+                                                                "name": narrative_obj_ids_not_int_dict[narrative_obj_id_not_int]},
+                                                               {"ws":1,"id":1})
+        record_found = 0
+        for workspaceObjectsName in workspaceObjectsName_cursor:
+            record_found = 1
+            ws_narratives_dict[narrative_obj_id_not_int] = dict()
+            ws_narratives_dict[narrative_obj_id_not_int]["id"] = workspaceObjectsName["id"]
+        if record_found == 0:
+            narrative_obj_ids_not_int_never_resolved[narrative_obj_id_not_int] = narrative_obj_ids_not_int_dict[narrative_obj_id_not_int]
+            narrative_obj_id_not_int
+    
+    print("meta_without_narrative_count : " + str(meta_without_narrative_count))
+    print("meta_with_multiple_narratives_count : " + str(meta_with_multiple_narratives_count))
+    print("ws_narratives_dict_length : " + str(len(ws_narratives_dict)))
+    print("narrative_obj_ids_not_int_never_resolved : " + str(narrative_obj_ids_not_int_never_resolved))
+    print("narrative_obj_ids_not_int_never_resolved length : " + str(len(narrative_obj_ids_not_int_never_resolved)))
+    print("ws_narratives_dict length 1 : " + str(len(ws_narratives_dict)))
+
+#    exit()
+
+    processed_narratives_count = 0
+    test_ws_narratives_dict = dict()
+
+    # NOW DETERMINE THE SAVEDATE
+    for narrative_ws_id in ws_narratives_dict:
+        processed_narratives_count += 1
+#        if processed_narratives_count < 140000:
+#            continue
+#        test_list = [ 13129,59769,56261,100417 ]
+#        if narrative_ws_id in test_list:
+        # NOW GET THE SAVE DATE FOR THE FIRST NARRATIVE VERSION
+#        print("Narrative ws id : " + str(narrative_ws_id))
+#        obj_id = ws_narratives_dict[narrative_ws_id]["id"]
+#        print("id : " + str(ws_narratives_dict[narrative_ws_id]["id"]))
+        get_narrative_savedate_cursor = db.workspaceObjVersions.find({"ws": narrative_ws_id, "id":ws_narratives_dict[narrative_ws_id]["id"], "ver":1},{"ws":1, "id":1, "savedate":1, "_id":0})
+        found_object_ver = 0
+        for narrative_savedate_record in get_narrative_savedate_cursor:
+            ws_narratives_dict[narrative_ws_id]["savedate"] = narrative_savedate_record["savedate"]
+            found_object_ver = 1
+#            test_ws_narratives_dict[narrative_ws_id] = ws_narratives_dict[narrative_ws_id]
+        if found_object_ver == 0:
+            workspaces_without_corresponding_versions_data.append(narrative_ws_id)
+        if processed_narratives_count % 1000 == 0:
+            print("Processed savedate for : " + str(processed_narratives_count) + " narratives")
+#    print("test_ws_narratives_dict : " + str(test_ws_narratives_dict))
+#    print("test_ws_narratives_dict length 2: " + str(len(test_ws_narratives_dict)))
+#    print("test_ws_narratives_dict : " + str(test_ws_narratives_dict))
+
+    for ws_id_to_delete in workspaces_without_corresponding_versions_data:
+        del(ws_narratives_dict[ws_id_to_delete])
+    print("ws_narratives_dict length 2: " + str(len(ws_narratives_dict)))
+
+    return ws_narratives_dict
+
+
+#############################
+#
+#   Determine if the narrative was created from a copied operation
+#   Grab all ws_obj_versions that have a savedate <= the savedate of the first version of the narratoive object
+#   If all those objects have copied and from the same source WS, all are version 1, and all have a lower object id than the narrative object.
+#       Then it was copied from that WS.  Now determine which version of that narrative was it copied from.
+#   Then look at versions of source narrative and take correct one with max date that is less than destination narrative savedate
+#
+#############################
+def determine_if_narratives_are_copies(db, ws_narratives_dict, narrative_type_list):
+    ws_that_were_narrative_copy_list = list()
+    copied_ws_narratives_dict = dict()
+    source_ws_id_to_copied_ws_ids = dict()
+
+    multiple_workspace_source_count = 0
+    multiple_workspace_source_set = set()
+    fresh_narrative_count = 0
+    not_all_pre_objects_copied_count = 0
+    not_all_pre_objects_copied_set = set()
+    final_else_count = 0
+    
+#    temp_ws_narratives_dict = dict()
+#    temp_ws_narratives_dict[103334] = ws_narratives_dict[103334]
+#    ws_narratives_dict = temp_ws_narratives_dict
+
+    print("ws_narratives_dict length : " + str(len(ws_narratives_dict)))
+
+    for potential_narrative_ws in sorted(ws_narratives_dict):
+        objects_to_check_count = 0
+        objects_copied_count = 0
+        workspace_ids_copied_from_set = set()
+        print("potential_narrative_ws : " + str(potential_narrative_ws) + " Dict: " + str(ws_narratives_dict[potential_narrative_ws]))
+        object_to_check_cursor = db.workspaceObjVersions.find({"savedate":{"$lt":ws_narratives_dict[potential_narrative_ws]["savedate"]},
+                                                               "ws":potential_narrative_ws},
+                                                              {"ws":1, "id":1, "copied":1,"savedate":1, "ver":1, "type":1, "_id":0});
+        
+        for object_to_check in object_to_check_cursor:
+            object_type = object_to_check["type"]
+            if object_type in narrative_type_list:
+                # skip narrative objects
+                continue
+            copied_from = object_to_check["copied"]
+#            print("copied_from : " + str(copied_from))
+            if copied_from is not None:
+                source_ws = int(copied_from.split("/")[0])
+#                if objects_copied_count == 2:
+#                    source_ws = 111
+                workspace_ids_copied_from_set.add(source_ws)
+                objects_copied_count += 1
+            objects_to_check_count += 1
+        if objects_copied_count > 0 and (objects_to_check_count == objects_copied_count) and (len(workspace_ids_copied_from_set) == 1):
+            copied_ws_narratives_dict[potential_narrative_ws] = ws_narratives_dict[potential_narrative_ws]
+            source_ws_id = list(workspace_ids_copied_from_set)[0]
+            copied_ws_narratives_dict[potential_narrative_ws]["copied_from"] = source_ws_id 
+            if source_ws_id not in source_ws_id_to_copied_ws_ids:
+                source_ws_id_to_copied_ws_ids[source_ws_id] = list()
+            source_ws_id_to_copied_ws_ids[source_ws_id].append(potential_narrative_ws)
+#            print("IT WAS COPIED : WS : " + str(potential_narrative_ws) + " copied from : " + str(workspace_ids_copied_from_set))
+        elif len(workspace_ids_copied_from_set) > 1:
+#            print("NOT COPIED FROM ONE WS : " + str(workspace_ids_copied_from_set))
+            multiple_workspace_source_count += 1
+            multiple_workspace_source_set.add(potential_narrative_ws)
+        elif objects_copied_count == 0:
+#            print("This was a fresh narrative")
+            fresh_narrative_count += 1                                
+        elif objects_copied_count != objects_to_check_count:
+#            print("Not all objectswere copied")
+            not_all_pre_objects_copied_count += 1
+            not_all_pre_objects_copied_set.add(potential_narrative_ws)
+        else:
+#            print("Should hopefully never get here")
+            final_else_count += 1
+        print("Processed ws : " + str(potential_narrative_ws))
+#    print("copied_ws_narratives_dict : " + str(copied_ws_narratives_dict))
+
+
+
+    print("multiple_workspace_source_count : " + str(multiple_workspace_source_count))
+    print("multiple_workspace_source_set : " + str(sorted(multiple_workspace_source_set)))
+    print("fresh_narrative_count : " + str(fresh_narrative_count))
+    print("not_all_pre_objects_copied_count : " + str(not_all_pre_objects_copied_count))
+    print("not_all_pre_objects_copied_set : " + str(sorted(not_all_pre_objects_copied_set)))
+    print("final_else_count : " + str(final_else_count))
+
+#    multiple_workspace_source_in_multi_narrative_count = 0
+#    for temp_ws_id in multiple_workspace_source_set:
+#        if temp_ws_id in workspaces_with_multiple_narrative_obj_ids:
+#            multiple_workspace_source_in_multi_narrative_count += 1
+#    print("multiple_workspace_source_in_multi_narrative_count : " + str(multiple_workspace_source_in_multi_narrative_count))
+            
+#    not_all_pre_objects_copied_in_multi_narrative_count = 0
+#    for temp_ws_id in not_all_pre_objects_copied_set:
+#        if temp_ws_id in workspaces_with_multiple_narrative_obj_ids:
+#            not_all_pre_objects_copied_in_multi_narrative_count += 1
+#    print("not_all_pre_objects_copied_in_multi_narrative_count : " + str(not_all_pre_objects_copied_in_multi_narrative_count))
+            
+    return (copied_ws_narratives_dict,source_ws_id_to_copied_ws_ids)
+
+def determine_source_narrative_version(db, copied_ws_narratives_dict, source_ws_id_to_copied_ws_ids, narrative_type_list):
+    destination_upa_from_source_upa_dict = dict()
+    returned_copied_ws_narratives_dict = dict()
+    unable_to_find_source_upa = list()
+    for source_ws_id in source_ws_id_to_copied_ws_ids:
+        ordered_save_points = list()
+        source_version_save_points_cursor = db.workspaceObjVersions.find({"type":{"$in":narrative_type_list},
+                                                                          "ws":source_ws_id},
+                                                                         {"id":1, "ver":1, "savedate":1, "_id":0}).sort("savedate")
+        for source_version_save_point in source_version_save_points_cursor:
+            source_obj_id = str(source_ws_id) + "/" + str(source_version_save_point["id"]) + "/" + str(source_version_save_point["ver"])
+            savedate = source_version_save_point["savedate"]
+            ordered_save_points.append([savedate,source_obj_id])
+
+        for destination_ws_id in source_ws_id_to_copied_ws_ids[source_ws_id]:
+            destination_ws_savedate = copied_ws_narratives_dict[destination_ws_id]["savedate"]
+            source_obj_id_used = None
+            for ordered_save_point in ordered_save_points:
+                if ordered_save_point[0] <= destination_ws_savedate:
+                    source_obj_id_used = ordered_save_point[1]
+                else:
+                    break
+            if source_obj_id_used == None:
+                unable_to_find_source_upa.append(destination_ws_id)
+            else:
+                destination_upa = str(destination_ws_id) + "/" + str(copied_ws_narratives_dict[destination_ws_id]["id"]) + "/1"
+                destination_upa_from_source_upa_dict[destination_upa] = source_obj_id_used
+                returned_copied_ws_narratives_dict[destination_ws_id] = copied_ws_narratives_dict[destination_ws_id]
+                returned_copied_ws_narratives_dict[destination_ws_id]["destination_narrative_upa"] = destination_upa
+                returned_copied_ws_narratives_dict[destination_ws_id]["source_narrative_upa"] = source_obj_id_used
+    return (destination_upa_from_source_upa_dict,returned_copied_ws_narratives_dict)
+
+def upload_past_narrative_copies(returned_copied_ws_narratives_dict):
+    prep_cursor = db_connection.cursor(prepared=True)
+    past_narrative_copies_insert_statement = (
+        "insert into past_narrative_copies "
+        "(source_narrative_id, source_narrative_upa, destination_narrative_id, destination_narrative_upa, destination_narrative_save_date) "
+        "values(%s, %s, %s, %s, %s);")
+    for copied_narrative_ws_id in returned_copied_ws_narratives_dict:
+        input = (returned_copied_ws_narratives_dict[copied_narrative_ws_id]['copied_from'],
+                    returned_copied_ws_narratives_dict[copied_narrative_ws_id]['source_narrative_upa'],
+                    copied_narrative_ws_id,
+                    returned_copied_ws_narratives_dict[copied_narrative_ws_id]['destination_narrative_upa'],
+                    returned_copied_ws_narratives_dict[copied_narrative_ws_id]['savedate'])
+        prep_cursor.execute(past_narrative_copies_insert_statement, input)
+    db_connection.commit()
+
+narrative_type_list = get_narrative_typed_objects_list(cursor)
+#ws_narratives_dict = get_ws_narratives(db, narrative_type_list)
+ws_narratives_dict = get_ws_narratives(db)
+print("ws_narratives_dict length : " + str(len(ws_narratives_dict)))
+
+# NEED TO CODE UP AND WS ADMISNISTER TO DO AND THEN REPOPULATE WS_NARRATIVES DICT WITH THE PROPER NARRATIVE
+# SEE methods_upload_workspace_stats line 337 to 339.
+#ws_narratives_dict = cleanup_multiple_narrative_object_ids(db, ws_narratives_dict, workspaces_with_multiple_narrative_obj_ids)
+(copied_ws_narratives_dict,source_ws_id_to_copied_ws_ids) = determine_if_narratives_are_copies(db, ws_narratives_dict, narrative_type_list)
+(destination_upa_from_source_upa_dict,returned_copied_ws_narratives_dict) = determine_source_narrative_version(db, copied_ws_narratives_dict, source_ws_id_to_copied_ws_ids, narrative_type_list)
+upload_past_narrative_copies(returned_copied_ws_narratives_dict)
+
+print("copied_ws_narratives_dict length : " + str(len(copied_ws_narratives_dict)))
+print("source_ws_id_to_copied_ws_ids length : " + str(len(source_ws_id_to_copied_ws_ids)))
+print("destination_upa_from_source_upa_dict length : " + str(len(destination_upa_from_source_upa_dict)))
+print("workspaces_without_corresponding_versions_data : " + str(workspaces_without_corresponding_versions_data))
+print("workspaces_without_corresponding_versions_data length : " + str(len(workspaces_without_corresponding_versions_data)))
+
+i = 0
+for destination_upa in destination_upa_from_source_upa_dict :
+    if i < 5:                                                                                   
+        print(destination_upa + "\t" +  destination_upa_from_source_upa_dict[destination_upa])
+    else:
+        break
+    i += 1
+
+print("returned_copied_ws_narratives_dict examples:")
+i = 0
+for copied_narrative_ws_id in returned_copied_ws_narratives_dict:
+    if i < 5:
+        print(str(copied_narrative_ws_id) + "\t" +  str(returned_copied_ws_narratives_dict[copied_narrative_ws_id]))
+    else:
+        break
+    i += 1
+
+# loop through each of the sources, get all versions timestamps
+# THen determine which version of the source for each distination copy event
+
+
+
+
+##################
+#
+# Still need to do determination of which source narrative version.
+#
+# Need to do a reverse lookup object source_narrative_id -> [list of destination narratives]
+#
+#####################
diff --git a/source/daily_cron_jobs/methods_upload_blobstore_details.py b/source/daily_cron_jobs/methods_upload_blobstore_details.py
new file mode 100644
index 0000000..0ea3f5a
--- /dev/null
+++ b/source/daily_cron_jobs/methods_upload_blobstore_details.py
@@ -0,0 +1,623 @@
+from pymongo import MongoClient
+from pymongo import ReadPreference
+from biokbase.workspace.client import Workspace
+#from installed_clients.AbstractHandleClient import AbstractHandle as HandleService
+from biokbase.service.Client import Client as ServiceClient
+import json as _json
+import os
+import mysql.connector as mysql
+import methods_upload_user_stats
+import requests
+#import time
+#from splitting import split_sequence
+#from datetime import date
+#from datetime
+import datetime, time
+
+requests.packages.urllib3.disable_warnings()
+
+mongoDB_metrics_connection = os.environ["MONGO_PATH"]
+
+ws_url = os.environ["WS_URL"]
+ws_user_token = os.environ["METRICS_WS_USER_TOKEN"]
+to_workspace = os.environ["WRK_SUFFIX"]
+
+to_blobstore = os.environ["BLOBSTORE_SUFFIX"]
+to_handle_db = os.environ["HANDLE_DB_SUFFIX"]
+
+client = MongoClient(mongoDB_metrics_connection + to_workspace)
+db = client.workspace
+handle_service_url = "https://kbase.us/services/handle_service"
+
+metrics_mysql_password = os.environ["METRICS_MYSQL_PWD"]
+sql_host = os.environ["SQL_HOST"]
+query_on = os.environ["QUERY_ON"]
+
+yesterday = datetime.date.today() - datetime.timedelta(days=1)
+start_time = time.time()
+
+#################
+#
+#    Creates lookup for size by blobstore_id
+#
+################
+def make_blobstore_lookup ():
+    client_blobstore = MongoClient(mongoDB_metrics_connection + to_blobstore)
+    db_blobstore = client_blobstore.blobstore
+
+    blobstore_nodes_size_lookup = dict()
+    
+    nodes_query = db_blobstore.nodes.find({},{"_id": 0, "id": 1, "size": 1})
+    for record in nodes_query:
+        blobstore_node_id = record["id"]
+        size = record["size"]
+        blobstore_nodes_size_lookup[blobstore_node_id] = size
+    return blobstore_nodes_size_lookup
+
+###################
+#
+#    Create a lookup for blobstore_id by handle_id
+#
+###################
+def make_handle_id_lookup ():
+    client_handle_db = MongoClient(mongoDB_metrics_connection + to_handle_db)
+    db_handle = client_handle_db.handle_db
+
+    handle_id_lookup = dict()
+    
+    handles_query = db_handle.handle.find({},{"_id": 0, "id": 1, "hid": 1})
+    for record in handles_query:
+        blobstore_node_id = record["id"]
+        handle = record["hid"]
+        handle_id_lookup[handle] = blobstore_node_id
+    return handle_id_lookup
+
+####################
+#
+#     GETS EXISTING BLOBSTORE RECORDS to see if new insert needs to be done
+#
+###################
+def get_existing_blobstore_details_records (db_connection):
+    existing_bs_details_cursor = db_connection.cursor(buffered=True)
+    existing_bs_details_statement = ("select blobstore_id, ws_obj_id, core_ws_obj_id, is_deleted from blobstore_detail")
+    existing_bs_details_cursor.execute(existing_bs_details_statement)
+    existing_records_set = set()
+    existing_deleted_blobstore_details_set = set()
+    for (blobstore_id, ws_obj_id, core_ws_obj_id, is_deleted) in existing_bs_details_cursor:
+        lookup_key = blobstore_id + "::" + ws_obj_id
+        existing_records_set.add(lookup_key)
+        if is_deleted == 1:
+            existing_deleted_blobstore_details_set.add(core_ws_obj_id)
+    return (existing_records_set, existing_deleted_blobstore_details_set)
+
+#################
+#
+#    Lookup for the first save date for each blobstore id
+#
+################
+def get_existing_bsid_first_save_date (db_connection):
+    bsid_first_save_date_cursor = db_connection.cursor(buffered=True)
+    bsid_first_save_date_statement = ("select blobstore_id, min(save_date) as first_save_date from blobstore_detail group by blobstore_id")
+    bsid_first_save_date_cursor.execute(bsid_first_save_date_statement)
+    bsid_first_save_date_dict = {}
+    for (blobstore_id, first_save_date) in bsid_first_save_date_cursor:
+        bsid_first_save_date_dict[blobstore_id] = first_save_date
+    return bsid_first_save_date_dict
+
+################
+#
+#    Populates user_info table, this gets triggered when an user is not in the user_info table.
+#    This insures the foreign key does not fail.
+#
+################
+def populate_user_info_table():
+    print("Blobstore refreshing of User Stats Upload (UTC)")
+    user_stats_dict = methods_upload_user_stats.get_user_info_from_auth2()
+    user_stats_dict = methods_upload_user_stats.get_internal_users(user_stats_dict)
+    user_stats_dict = methods_upload_user_stats.get_user_orgs_count(user_stats_dict)
+    user_stats_dict = methods_upload_user_stats.get_user_narrative_stats(user_stats_dict)
+    #user_stats_dict = methods_upload_user_stats.get_institution_and_country(user_stats_dict)
+    user_stats_dict = methods_upload_user_stats.get_profile_info(user_stats_dict)
+    print("--- gather data %s seconds ---" % (time.time() - start_time))
+    methods_upload_user_stats.upload_user_data(user_stats_dict)
+    print("Refresh of Upload user stats completed")
+
+##############
+#
+#    Creates set of usernames in user_info. this is used to make sure the username that is seen in the
+#    wsObjVersion is already in user_info table
+#
+#############
+def get_usernames (db_connection):
+    usernames_cursor = db_connection.cursor(buffered=True)
+    usernames_statement = ("select username, user_id from metrics.user_info")
+    usernames_cursor.execute(usernames_statement)
+    temp_usernames_set = set()
+    for (username, user_id) in usernames_cursor:
+        temp_usernames_set.add(username)
+    print("Usernames length : " + str(len(temp_usernames_set)))
+    return temp_usernames_set
+
+#############
+#
+#  creates set of deleted objects in the workspace collection
+#
+############
+#def get_deleted_workspace_objects_set():
+#    deleted_objects = set()
+#    ws_obj_deleted_cursor = db.workspaceObjects.find({"del":True},{"_id":0, "ws": 1,"id":1})
+#    for ws_obj_deleted in ws_obj_deleted_cursor:
+#        deleted_temp_ws_id = ws_obj_deleted["ws"]
+#        deleted_obj_id = ws_obj_deleted["id"]
+#        deleted_ws_obj_id = str(deleted_temp_ws_id) + "/" + str(deleted_obj_id)
+#        deleted_objects.add(deleted_ws_obj_id)
+#    return deleted_objects
+
+##############
+#
+#   creates set of ws_obj_ids that also have a handle
+#
+#############
+def get_deleted_workspace_objects_set():
+    deleted_workspace_objects_set = set()
+    ws_obj_deleted_cursor = db.workspaceObjects.find({"del":True},{"_id":0, "ws": 1,"id":1})
+    for ws_obj_deleted in ws_obj_deleted_cursor:
+        deleted_temp_ws_id = ws_obj_deleted["ws"]
+        deleted_obj_id = ws_obj_deleted["id"]
+        deleted_ws_obj_id = str(deleted_temp_ws_id) + "/" + str(deleted_obj_id)
+        deleted_workspace_objects_set.add(deleted_ws_obj_id)
+
+    deleted_objects_with_handles_set = set()
+    ws_obj_vers_cursor = db.workspaceObjVersions.find(
+        {
+            "extids.handle" : { "$exists": True },
+        },
+        {
+            "ws": 1,
+            "id": 1,
+            "_id": 0,
+        },
+        no_cursor_timeout=True
+    )
+    for ws_obj_ver in ws_obj_vers_cursor:
+        object_id = str(ws_obj_ver["ws"]) + "/" + str(ws_obj_ver["id"])
+        if object_id in deleted_workspace_objects_set:
+            deleted_objects_with_handles_set.add(object_id)    
+    return (deleted_workspace_objects_set,deleted_objects_with_handles_set)
+
+#############
+#
+#  creates set of deleted objects already in the blobstore_detail MySQL table
+#
+############
+def get_existing_blobstore_detail_ws_objects (db_connection):
+    deleted_ws_obj_cursor = db_connection.cursor(buffered=True)
+    deleted_ws_obj_statement = ("select core_ws_obj_id, is_deleted from metrics.blobstore_detail where is_deleted = 1")
+    deleted_ws_obj_cursor.execute(deleted_ws_obj_statement)
+    existing_bs_deleted_objects_set = set()
+    for (core_ws_obj_id, is_deleted) in deleted_ws_obj__cursor:
+        existing_bs_deleted_objects_set.add(core_ws_obj_id)
+    print("Existing Blobstore deleted ws_obj set length : " + str(len(existing_bs_deleted_objects_set)))
+    return existing_bs_deleted_objects_set
+
+
+
+############
+#
+#   Gets all the blobsgtore information and uploads it into the blobstre_details table
+#     Defaults to previous full if start_date and end_date is passed
+#     Allows for backfilling records if specific dates are chosen
+#     Note this contains logic to insure all users are user_info
+#     It will duplicate existing records (so it is safe to use a datge range previously done
+#     It will always figure out what was the original saver object for a blobstore based on the records present
+#       in the upload and existing records in the blobstore details table
+############
+def upload_blobstore_details_data(start_date, end_date):
+    """
+    Upload blobstore_date
+    """
+    # object_id -> {handle=>handle, node=node, type=object_type, savedate=> sd}
+    objects_with_problem_nodes_with_no_size = dict()
+    objects_with_problem_handles_with_no_nodes = dict()
+
+    running_size_total = 0
+
+    deleted_object_with_data_found_count = 0
+    deleted_object_without_data_found_count = 0
+    deleted_object_without_data_found_set = set()
+    
+    #exit()
+
+    # blobstore_id => {ws_obj_id => (save_date, saver)}
+    blobstore_object_results = dict()
+
+    # blobstore_id =>{first_saver_ws_obj_id => blah,
+    #                 first_save_date = date}
+    #blobstore_id_first_saver = dict()
+
+    #ws_ids = [146324]  # small
+    #ws_ids = [28129]  # fungal phytosome s
+    #ws_ids = [146324,28129]  # fungal phytosome and small ws, took 203 mins
+    #ws_ids = [19217]  # refseq reference
+
+#    #for ws_id in ws_ids:
+#    deleted_objects = set()
+#    ws_obj_deleted_cursor = db.workspaceObjects.find({"del":True},{"_id":0, "ws": 1,"id":1})
+#    for ws_obj_deleted in ws_obj_deleted_cursor:
+#        deleted_temp_ws_id = ws_obj_deleted["ws"]
+#        deleted_obj_id = ws_obj_deleted["id"]
+#        deleted_ws_obj_id = str(deleted_temp_ws_id) + "/" + str(deleted_obj_id)
+#        deleted_objects.add(deleted_ws_obj_id)
+        
+#    deleted_workspace_objects = get_deleted_workspace_objects_set()
+    (deleted_workspace_objects, deleted_objects_with_handles_set) = get_deleted_workspace_objects_set()
+
+    print("TOTAL DELETED OBJECT LENGTH: " + str(len(deleted_workspace_objects)))
+    print("TOTAL DELETED OBJECT LENGTH: " + str(len(deleted_objects_with_handles_set)))
+    print("--- total time for the deleted objects lookup  %s seconds ---" % (time.time() - start_time))
+
+    ws_obj_vers_cursor = db.workspaceObjVersions.find(
+        {#"ws":312,
+            "extids.handle" : { "$exists": True },
+            "savedate": {"$gt": start_date, "$lt": end_date},
+        },
+        {
+            "type": 1,
+            "ws": 1, 
+            "id": 1,
+            "ver": 1,
+            "savedate": 1,
+            "savedby": 1,
+            "extids": 1,
+            "_id": 0,
+        },
+        no_cursor_timeout=True
+    )
+    i = 0
+    ws_obj_info = dict()
+    deleted_ext_ids_counter = 0
+
+    for ws_obj_ver in ws_obj_vers_cursor:
+        is_deleted = 0
+        object_type_full = ws_obj_ver["type"]
+        (object_type, object_spec_version) = object_type_full.split("-")
+        #if (object_type != "KBaseNarrative.Narrative" and object_type != "KBaseReport.Report"):
+        ws_id = ws_obj_ver["ws"]
+        obj_id = ws_obj_ver["id"]
+        temp_ws_obj_id = str(ws_id) + "/" + str(obj_id)
+        if temp_ws_obj_id in deleted_workspace_objects:
+            deleted_ext_ids_counter += 1
+            is_deleted = 1
+    #        continue
+        obj_ver = ws_obj_ver["ver"]
+        obj_save_date = ws_obj_ver["savedate"]
+        savedby = ws_obj_ver["savedby"]
+        extids = ws_obj_ver["extids"]
+        handles = extids["handle"]    
+#       for handle in handles:
+#           handles_set.add(handle)
+#           obj_copied = 0
+        full_obj_id = str(ws_id) + "/" + str(obj_id) + "/" + str(obj_ver)
+#        print("Full obj id : " + full_obj_id)
+#        print("Object Type : " + object_type_full)
+#            if (object_type != "KBaseNarrative.Narrative" and object_type != "KBaseReport.Report"):
+#            if (object_type == "KBaseNarrative.Narrative" or object_type == "KBaseReport.Report"):
+
+        ws_obj_info[full_obj_id] = {"save_date" : obj_save_date,
+                                    "savedby" : savedby,
+                                    "obj_type" : object_type_full,
+                                    "handles" : handles,
+                                    "is_deleted" : is_deleted}
+
+    print("--- total time for the ws_object_version objects query  %s seconds ---" % (time.time() - start_time))
+    
+    ##########################################################################
+    print("BLOBSTORE LOOKUP:")
+    blobstore_lookup = make_blobstore_lookup()
+#    test_counter = 0
+#    for temp_key in blobstore_lookup:
+#        if test_counter < 10:
+#            print("ID: " + str(temp_key) + "   :::   size: " + str(blobstore_lookup[temp_key]))
+#        else:
+#            break
+#        test_counter = test_counter + 1
+    print("Total BLOBSTORE Lookuplength: " + str(len(blobstore_lookup)))
+
+    print("--- total time for the blobstore size lookup creation  %s seconds ---" % (time.time() - start_time))
+
+    handle_id_lookup = make_handle_id_lookup()
+#    test_counter = 0
+#    for temp_key in handle_id_lookup:
+#        if test_counter < 10:
+#            print("ID: " + str(temp_key) + "   :::   blobstore_id: " + str(handle_id_lookup[temp_key]))
+#        else:
+#            break
+#        test_counter = test_counter + 1
+    print("Total HANDLE ID lookup length: " + str(len(handle_id_lookup)))
+
+    print("--- total time for the blobstore size lookup creation  %s seconds ---" % (time.time() - start_time))
+##############################################    
+
+    for full_obj_id in  ws_obj_info:
+#        print("ws_obj_info[full_obj_id][handles] : " + str(ws_obj_info[full_obj_id]["handles"]))
+        for handle in ws_obj_info[full_obj_id]["handles"]:
+            blobstore_id = None
+            (kbh_prefix, str_handle_id) = handle.split("_")
+            if int(str_handle_id) in handle_id_lookup:
+                blobstore_id = handle_id_lookup[int(str_handle_id)]
+            else:
+                objects_with_problem_handles_with_no_nodes[full_obj_id] = ws_obj_info[full_obj_id]
+                if ws_obj_info[full_obj_id]["is_deleted"] == 1:
+                    deleted_object_without_data_found_count += 1
+                    (temp_core_object_id, temp_ver) = full_obj_id.rsplit("/",1) 
+                    deleted_object_without_data_found_set.add(temp_core_object_id)
+
+            if blobstore_id and blobstore_id in blobstore_lookup:
+                if blobstore_id not in blobstore_object_results:
+                    blobstore_object_results[blobstore_id] = dict()
+                blobstore_object_results[blobstore_id][full_obj_id] = (ws_obj_info[full_obj_id]["save_date"],
+                                                                       ws_obj_info[full_obj_id]["savedby"])
+#                print("Blobstore lookup file_size : " + str(blobstore_lookup[blobstore_id]))
+                if ws_obj_info[full_obj_id]["is_deleted"] == 1:
+                    deleted_object_with_data_found_count += 1
+                file_size = blobstore_lookup[blobstore_id]
+                running_size_total = running_size_total + file_size
+            else:
+#               print("HUGE PROBLEM: obj_id : " + full_obj_id +  " blobstore_id: "  + str(blobstore_id) + " IS NOT IN THE LOOKUP")
+#               del blobstore_object_results[blobstore_id]
+                 objects_with_problem_nodes_with_no_size[full_obj_id] = ws_obj_info[full_obj_id]
+                 if ws_obj_info[full_obj_id]["is_deleted"] == 1:
+                     deleted_object_without_data_found_count += 1
+                     (temp_core_object_id, temp_ver) = full_obj_id.rsplit("/",1)
+                     deleted_object_without_data_found_set.add(temp_core_object_id)
+
+    db_connection = mysql.connect(
+        host=sql_host, user="metrics", passwd=metrics_mysql_password, database="metrics"
+    )
+    cursor = db_connection.cursor()
+    query = "use " + query_on
+    cursor.execute(query)                
+#    update_zero_orig_saver_cursor = db_connection.cursor(prepared=True)
+#    blobstore_detail_zero_orig_saver_update_statement = (
+#        "update metrics.blobstore_detail "
+#        "set orig_saver = 0  where blobstore_id = %s;"
+#    )
+
+#    update_cursor = db_connection.cursor(prepared=True)
+#    blobstore_detail_update_statement = (
+#        "update metrics.blobstore_detail "
+#        "set orig_saver = 1  where blobstore_id = %s and ws_obj_id = %s;"
+#    )
+                
+    bsid_first_save_date_dict = get_existing_bsid_first_save_date(db_connection)
+    existing_blobstore_records, existing_deleted_blobstore_details_set = get_existing_blobstore_details_records(db_connection)
+    usernames_set = get_usernames(db_connection)
+    print("Usernames length = " + str(len(usernames_set)))
+    db_connection.close()
+
+    insert_count = 0
+    needed_existing_update_orig_saver_count = 0
+    skip_insert_because_exists_count = 0
+
+#    loop over all the blobstore details and pull together all the needed information and do the inserts
+    for blobstore_id in blobstore_object_results:
+        db_connection = mysql.connect(
+            host=sql_host, user="metrics", passwd=metrics_mysql_password, database="metrics"
+        )
+        cursor = db_connection.cursor()
+        query = "use " + query_on
+        cursor.execute(query)
+        bsid_new_first_save_date = None
+        bsid_new_first_save_date_ws_obj_id = None    
+        existing_bsid_first_save_date = None
+        insert_cursor = db_connection.cursor(prepared=True)
+        blobstore_detail_insert_statement = (
+            "insert into metrics.blobstore_detail "
+            "(blobstore_id, ws_obj_id, save_date, ws_id, size, saver_username, orig_saver, object_type, core_ws_obj_id) "
+            "values(%s, %s, %s, %s, %s, %s, 0, %s, %s)"
+        )
+
+        update_zero_orig_saver_cursor = db_connection.cursor(prepared=True)
+        blobstore_detail_zero_orig_saver_update_statement = (
+            "update metrics.blobstore_detail "
+            "set orig_saver = 0  where blobstore_id = %s;"
+        )
+
+        update_cursor = db_connection.cursor(prepared=True)
+        blobstore_detail_update_statement = (
+            "update metrics.blobstore_detail "
+            "set orig_saver = 1  where blobstore_id = %s and ws_obj_id = %s;"
+        )
+
+        had_a_reference_ws = 0
+        if blobstore_id in bsid_first_save_date_dict:
+            existing_bsid_first_save_date = bsid_first_save_date_dict[blobstore_id]
+        for full_ws_obj_id in blobstore_object_results[blobstore_id]:
+            (ws_id, obj_id, version_number) = full_ws_obj_id.split("/")
+            save_date = blobstore_object_results[blobstore_id][full_ws_obj_id][0]
+            saver = blobstore_object_results[blobstore_id][full_ws_obj_id][1]
+        
+            lookup_key = blobstore_id + "::" + full_ws_obj_id
+            if lookup_key in existing_blobstore_records:
+                skip_insert_because_exists_count += 1
+                continue
+
+        #    TO GET ONLY REFERENCE GENOME WORKSPACES
+#            if int(ws_id) in (19217, 16026,  28129, 80490):
+#                had_a_reference_ws = 1
+        #    DO INSERT SET ORIG_SAVER = 0
+
+            if saver not in usernames_set:
+                print("Usernames pre length = " + str(len(usernames_set)))
+                populate_user_info_table()
+                usernames_set = get_usernames(db_connection)
+                print("Usernames post length = " + str(len(usernames_set)))
+        
+            size = blobstore_lookup[blobstore_id]
+            object_type = ws_obj_info[full_ws_obj_id]["obj_type"]
+            temp = full_ws_obj_id.split("/")
+            core_ws_obj_id = "/".join(temp[:-1])
+
+            input_vals = (
+                blobstore_id,
+                full_ws_obj_id,
+                save_date,
+                ws_id,
+                size,
+                saver,
+                object_type,
+                core_ws_obj_id,
+            )
+            insert_cursor.execute(blobstore_detail_insert_statement, input_vals)
+            insert_count += 1
+        
+            # record is fresh and needs to be inserted.
+            #DO SAVE DATE LOGIC LOOKING FOR MIN_DATE
+            if (existing_bsid_first_save_date and save_date < existing_bsid_first_save_date):
+                bsid_new_first_save_date = save_date
+                bsid_new_first_save_date_ws_obj_id = full_ws_obj_id
+            if existing_bsid_first_save_date is None:
+                if (bsid_new_first_save_date is None or save_date < bsid_new_first_save_date):
+                    bsid_new_first_save_date = save_date
+                    bsid_new_first_save_date_ws_obj_id = full_ws_obj_id
+        
+
+#        if had_a_reference_ws == 1:
+    #    AFTER ALL THE INSERTS DONE (update the record that is now the min_date, potentially change min_date from an existing or-ig_saver
+        if existing_bsid_first_save_date is not None and bsid_new_first_save_date is not None:
+            #meand a new seen record has lower save date than an existing one.  Should not occur.
+            update_vals = (blobstore_id,)
+            update_zero_orig_saver_cursor.execute(blobstore_detail_zero_orig_saver_update_statement, update_vals)
+            needed_existing_update_orig_saver_count += 1
+        if bsid_new_first_save_date_ws_obj_id is not None:
+            update_cursor = db_connection.cursor(prepared=True)
+            blobstore_detail_update_statement = (
+                "update metrics.blobstore_detail "
+                "set orig_saver = 1  where blobstore_id = %s and ws_obj_id = %s;"
+            )
+            update_vals = (blobstore_id, bsid_new_first_save_date_ws_obj_id)
+            update_cursor.execute(blobstore_detail_update_statement, update_vals)
+        insert_cursor.close()
+        db_connection.commit()
+        db_connection.close()
+
+    # RESOLVE THE MISSING DELETED OBJECTS
+    deleted_objects_to_update_set = deleted_objects_with_handles_set.difference(existing_deleted_blobstore_details_set)
+    if len(deleted_objects_to_update_set) > 0:
+        print("Length of core obj ids that need to be marked as deleted : " + str(len(deleted_objects_to_update_set)))
+        print("length of deleted_object_without_data_found_set : " + str(len(deleted_object_without_data_found_set)))
+        db_connection = mysql.connect(
+            host=sql_host, user="metrics", passwd=metrics_mysql_password, database="metrics"
+        )
+        cursor = db_connection.cursor()
+        query = "use " + query_on
+        cursor.execute(query)
+
+        update_deleted_objects_cursor = db_connection.cursor()
+        update_deleted_objects_statement = ("update metrics.blobstore_detail set is_deleted = 1 where core_ws_obj_id = %s;")
+        for core_deleted_obj_id in deleted_objects_to_update_set:
+            update_deleted_objects_vals = (core_deleted_obj_id,)
+            update_deleted_objects_cursor.execute(update_deleted_objects_statement, update_deleted_objects_vals)
+        update_deleted_objects_cursor.close
+        db_connection.commit()
+        db_connection.close()
+
+    # UNDELETE THE OBJECTS THAT HAVE BEEN UNDELETED
+    undeleted_objects_to_update_set = existing_deleted_blobstore_details_set.difference(deleted_objects_with_handles_set)
+    if len(undeleted_objects_to_update_set) > 0:
+        print("Length of core obj ids that need to be marked as undeleted : " + str(len(undeleted_objects_to_update_set)))
+        db_connection = mysql.connect(
+            host=sql_host, user="metrics", passwd=metrics_mysql_password, database="metrics"
+        )
+        cursor = db_connection.cursor()
+        query = "use " + query_on
+        cursor.execute(query)
+
+        update_undeleted_objects_cursor = db_connection.cursor()
+        update_undeleted_objects_statement = ("update metrics.blobstore_detail set is_deleted = 0 where core_ws_obj_id = %s;")
+        for core_undeleted_obj_id in undeleted_objects_to_update_set:
+            update_undeleted_objects_vals = (core_undeleted_obj_id,)
+            update_undeleted_objects_cursor.execute(update_undeleted_objects_statement, update_undeleted_objects_vals)
+        update_undeleted_objects_cursor.close
+        db_connection.commit()
+        db_connection.close()
+        
+        
+    #print("objects_with_problem_nodes_with_no_size : " + str(objects_with_problem_nodes_with_no_size))            
+    print("TOTAL objects_with_problem_nodes_with_no_size : " + str(len(objects_with_problem_nodes_with_no_size)))
+
+    #print("objects_with_problem_handles_with_no_nodes : " + str(objects_with_problem_handles_with_no_nodes))
+    print("TOTAL objects_with_problem_handles_with_no_nodes : " + str(len(objects_with_problem_handles_with_no_nodes)))
+
+    print("deleted_object_with_data_found_count :" + str(deleted_object_with_data_found_count))
+    print("deleted_object_without_data_found_count :" + str(deleted_object_without_data_found_count))
+
+#    print("blobstore_object_results :  " +  str(blobstore_object_results))
+#    for blobstore_id in blobstore_object_results:
+#        if len( blobstore_object_results[blobstore_id]) > 5:
+#            print("blobstore ID : " + str(blobstore_id))
+#            print(str(blobstore_object_results[blobstore_id]))
+    print("blobstore_object_results length :  " +  str(len(blobstore_object_results)))
+    print("RUNNING TOTAL SIZE : " + str(running_size_total))
+
+    obj_id_set = set()
+    for blobstore_id in blobstore_object_results :
+        for obj_id in  blobstore_object_results[blobstore_id]:
+            obj_id_set.add(obj_id)
+    print("Total number of objects with handles that could be fully determined : " + str(len(obj_id_set)))
+
+    print("Total ext_ids objects that were deleted : " + str(deleted_ext_ids_counter))
+
+    #print("blobstore_object_results : " + str(blobstore_object_results))
+
+
+    print("Insert Count = " + str(insert_count))
+    print("needed_existing_update_orig_saver_count = " + str(needed_existing_update_orig_saver_count))
+    print("skip_insert_because_exists_count = " + str(skip_insert_because_exists_count))
+
+    print("--- total seconds %s seconds ---" % (time.time() - start_time))
+    #db_connection.commit()
+    #db_connection.cLOSE()
+
+    ####################
+    # END upload_blobstore_details_data
+    ###################
+
+
+
+#####################
+#
+#   Essentially the main caller program that deals with start and end date information
+#   Whether there were passed values or the defaut of the previous full day
+#
+####################
+def process_blobstore_details_data(
+            start_date=datetime.datetime.combine(yesterday, datetime.datetime.min.time()),
+            end_date=datetime.datetime.combine(yesterday, datetime.datetime.max.time()),
+        ):
+    # get mongo set up
+    #    client_blobstore = MongoClient(mongoDB_metricsro_connection + to_blobstore)
+    client_blobstore = MongoClient(mongoDB_metrics_connection + to_blobstore)
+    db_blobstore = client_blobstore.blobstore
+
+    print("############################################")
+    print("START TIME (UTC): " + str(datetime.datetime.utcnow()))
+    start_time = time.time()
+
+    # From str to datetime, defaults to zero time.
+    if type(start_date) == str:
+        start_date_partial = datetime.datetime.strptime(start_date, "%Y-%m-%d")
+        start_date = datetime.datetime.combine(
+            start_date_partial, datetime.datetime.min.time()
+        )
+        end_date_partial = datetime.datetime.strptime(end_date, "%Y-%m-%d")
+        end_date = datetime.datetime.combine(
+            end_date_partial, datetime.datetime.max.time()
+        )
+
+    print("Start date : " + str(start_date))
+    print("End date : " + str(end_date))
+        
+    upload_blobstore_details_data(start_date, end_date)
+    print("############################################")    
+#exit()
diff --git a/source/daily_cron_jobs/upload_blobstore_details.py b/source/daily_cron_jobs/upload_blobstore_details.py
new file mode 100644
index 0000000..a30d1c4
--- /dev/null
+++ b/source/daily_cron_jobs/upload_blobstore_details.py
@@ -0,0 +1,28 @@
+# UploadBlobstoreDetails
+#
+import methods_upload_blobstore_details
+import time
+import datetime
+
+yesterday = datetime.date.today() - datetime.timedelta(days=1)
+print("############################################")
+print("############################################")
+print("############################################")
+print("Blobstore Detais Upload (UTC): " + str(datetime.datetime.utcnow()))
+print("START TIME (UTC): " + str(datetime.datetime.utcnow()))
+start_time = time.time()
+
+
+start_time = time.time()
+start_date = "2024-09-07"
+end_date = "2024-09-28"
+methods_upload_blobstore_details.process_blobstore_details_data(start_date,end_date)
+#methods_upload_blobstore_details.process_blobstore_details_data()
+#print("Uploading blobstore details took ", time.time() - start_time, " seconds to run")
+
+
+start_date=datetime.datetime.combine(yesterday, datetime.datetime.min.time())
+end_date=datetime.datetime.combine(yesterday, datetime.datetime.max.time())
+
+print("Start date: " + str(start_date))
+print("End date: " + str(end_date))

From 3a6d0859dab9f14bd9200b4d3c23e0942b76b7fe Mon Sep 17 00:00:00 2001
From: Jason Baumohl <jkbaumohl@lbl.gov>
Date: Fri, 4 Oct 2024 02:56:29 +0000
Subject: [PATCH 04/11] added blobstore detail information to
 user_super_summary

---
 source/custom_scripts/dump_query_results.py              | 4 +++-
 source/daily_cron_jobs/make_reporting_tables.py          | 6 ++++++
 sql_create_statements/sql_reporting_views_and_tables.sql | 8 +++++++-
 3 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/source/custom_scripts/dump_query_results.py b/source/custom_scripts/dump_query_results.py
index ab8486c..ba42ea1 100644
--- a/source/custom_scripts/dump_query_results.py
+++ b/source/custom_scripts/dump_query_results.py
@@ -42,7 +42,9 @@ def dump_query_results():
         "last_narrative_modified_date\ttotal_narrative_objects_count\ttop_lvl_narrative_objects_count\ttotal_narrative_objects_size\t"
         "top_lvl_narrative_objects_size\ttotal_narrative_count\ttotal_public_narrative_count\tdistinct_static_narratives_count\t"
         "static_narratives_created_count\ttotal_visible_app_cells\ttotal_code_cells_count\tfirst_file_date\tlast_file_date\t"
-        "total_file_sizes_MB\ttotal_file_count\tmost_used_app\tdistinct_apps_used\ttotal_apps_run_all_time\ttotal_apps_run_last365\t"
+        "total_file_sizes_MB\ttotal_file_count\tblobstore_orig_saver_count\tblobstore_non_orig_saver_count\t"
+        "blobstore_orig_saver_size_GB\tblobstore_non_orig_saver_size_GB\t"
+        "most_used_app\tdistinct_apps_used\ttotal_apps_run_all_time\ttotal_apps_run_last365\t"
         "total_apps_run_last90\ttotal_apps_run_last30\ttotal_app_errors_all_time\tfirst_app_run\tlast_app_run\ttotal_run_time_hours\t"
         "total_queue_time_hours\ttotal_CPU_hours\tsession_count_all_time\tsession_count_last_year\tsession_count_last_90\tsession_count_last_30"
     )
diff --git a/source/daily_cron_jobs/make_reporting_tables.py b/source/daily_cron_jobs/make_reporting_tables.py
index 69f8024..8bade7b 100644
--- a/source/daily_cron_jobs/make_reporting_tables.py
+++ b/source/daily_cron_jobs/make_reporting_tables.py
@@ -302,6 +302,10 @@ def make_reporting_tables():
         "uns.total_visible_app_cells, uns.total_code_cells_count, "
         "bus.first_file_date, bus.last_file_date, "
         "bus.total_file_sizes_MB, bus.total_file_count, "
+        "bdu.orig_saver_count as blobstore_orig_saver_count, "
+        "bdu.non_orig_saver_count as blobstore_non_orig_saver_count, "
+        "bdu.orig_saver_size_GB as blobstore_orig_saver_size_GB, "
+        "bdu.non_orig_saver_size_GB as blobstore_non_orig_saver_size_GB, "
         "umua.mu_func_name as most_used_app, "
         "udauc.distinct_apps_used, "
         "uapc.total_apps_run_all_time, uapc.total_apps_run_last365, "
@@ -337,6 +341,8 @@ def make_reporting_tables():
         "on uip.username = usc90.username "
         "left outer join metrics.hv_user_session_count_last_30 usc30 "
         "on uip.username = usc30.username "
+        "left outer join metrics.blobstore_detail_by_user bdu "
+        "on uip.username = bdu.saver_username "
         "where uip.exclude != 1 ")    
     cursor.execute(user_super_summary_create_statement)
     print("user_super_summary_create_statement created")
diff --git a/sql_create_statements/sql_reporting_views_and_tables.sql b/sql_create_statements/sql_reporting_views_and_tables.sql
index 1d5c016..294cf9c 100644
--- a/sql_create_statements/sql_reporting_views_and_tables.sql
+++ b/sql_create_statements/sql_reporting_views_and_tables.sql
@@ -1475,7 +1475,11 @@ uns.total_narrative_count, uns.total_public_narrative_count,
 uns.distinct_static_narratives_count, uns.static_narratives_created_count, 
 uns.total_visible_app_cells, uns.total_code_cells_count, 
 bus.first_file_date, bus.last_file_date, 
-bus.total_file_sizes_MB, bus.total_file_count, 
+bus.total_file_sizes_MB, bus.total_file_count,
+bdu.orig_saver_count as blobstore_orig_saver_count,
+bdu.non_orig_saver_count as blobstore_non_orig_saver_count,
+bdu.orig_saver_size_GB as blobstore_orig_saver_size_GB,
+bdu.non_orig_saver_size_GB as blobstore_non_orig_saver_size_GB,
 umua.mu_func_name as most_used_app,  
 udauc.distinct_apps_used, 
 uapc.total_apps_run_all_time, uapc.total_apps_run_last365, 
@@ -1511,6 +1515,8 @@ left outer join metrics.hv_user_session_count_last_90 usc90
 on uip.username = usc90.username
 left outer join metrics.hv_user_session_count_last_30 usc30
 on uip.username = usc30.username
+left outer join metrics.blobstore_detail_by_user bdu
+on uip.username = bdu.saver_username
 where uip.exclude != 1;
 
 # END OF USER_SUPER_SUMMARY

From 99e219639d0f9105bfefbf4104e57ac246bae2b9 Mon Sep 17 00:00:00 2001
From: Jason Baumohl <jkbaumohl@lbl.gov>
Date: Fri, 4 Oct 2024 03:00:52 +0000
Subject: [PATCH 05/11] added blobstore detail information to
 user_super_summary

---
 .../daily_cron_jobs/make_reporting_tables.py  | 119 +++++++++---------
 1 file changed, 58 insertions(+), 61 deletions(-)

diff --git a/source/daily_cron_jobs/make_reporting_tables.py b/source/daily_cron_jobs/make_reporting_tables.py
index 8bade7b..cf1a0ed 100644
--- a/source/daily_cron_jobs/make_reporting_tables.py
+++ b/source/daily_cron_jobs/make_reporting_tables.py
@@ -185,6 +185,64 @@ def make_reporting_tables():
     cursor.execute(narrative_app_flows_create_statement)
     print("narrative_app_flows created")
 
+    # Blobstroe detial related tables
+    blobstore_detail_by_ws_create_statement = (
+        "create or replace table blobstore_detail_by_ws as "
+        "(select in_q.ws_id, sum(in_q.orig_saver_count) as orig_saver_count, "
+        "sum(in_q.non_orig_saver_count) as non_orig_saver_count, "
+        "sum(in_q.orig_saver_size_GB) as orig_saver_size_GB, "
+        "sum(in_q.non_orig_saver_size_GB) as non_orig_saver_size_GB, "
+        "sum(in_q.total_blobstore_size_GB) as total_blobstore_size_GB "
+        "from ("
+        "select ws_id, DATE_FORMAT(`save_date`,'%Y-%m') as month, "
+        "sum(orig_saver) as orig_saver_count, 0 - sum((orig_saver - 1)) as non_orig_saver_count, "
+        "sum(orig_saver * size)/1000000000 as orig_saver_size_GB, "
+        "0 - sum((orig_saver - 1) * size)/1000000000 as non_orig_saver_size_GB, "
+        "sum(size)/1000000000 as total_blobstore_size_GB "
+        "from blobstore_detail bd "
+        "group by ws_id, month) in_q "
+        "group by ws_id ) ")
+    cursor.execute(blobstore_detail_by_ws_create_statement)
+    print("blobstore_detail_by_ws_create_statement created")
+
+    blobstore_detail_by_user_monthly_create_statement = (
+        "create or replace table blobstore_detail_by_user_monthly as "
+        "(select saver_username, DATE_FORMAT(`save_date`,'%Y-%m') as month, "
+        "sum(orig_saver) as orig_saver_count, 0 - sum((orig_saver - 1)) as non_orig_saver_count, "
+        "sum(orig_saver * size)/1000000000 as orig_saver_size_GB, "
+        "0 - sum((orig_saver - 1) * size)/1000000000 as non_orig_saver_size_GB, "
+        "sum(size)/1000000000 as total_blobstore_size_GB "
+        "from blobstore_detail bd "
+        "group by saver_username, month) ")
+    cursor.execute(blobstore_detail_by_user_monthly_create_statement)
+    print("blobstore_detail_by_user_monthly_create_statement created")
+    
+    blobstore_detail_by_user_create_statement = (
+        "create or replace table blobstore_detail_by_user as "
+        "(select saver_username, "
+        "sum(orig_saver_count) as orig_saver_count, sum(non_orig_saver_count) as non_orig_saver_count, "
+        "sum(orig_saver_size_GB) as orig_saver_size_GB, "
+        "sum(non_orig_saver_size_GB) as non_orig_saver_size_GB, "
+        "sum(total_blobstore_size_GB) as total_blobstore_size_GB "
+        "from blobstore_detail_by_user_monthly "
+        "group by saver_username) ")
+    cursor.execute(blobstore_detail_by_user_create_statement)
+    print("blobstore_detail_by_user_create_statement created")
+
+    blobstore_detail_by_object_type_monthly_create_statement = (
+        "create or replace table blobstore_detail_by_object_type_monthly as "
+        "(select LEFT(object_type,LOCATE('-',object_type) - 1) as object_type, "
+        "DATE_FORMAT(`save_date`,'%Y-%m') as month, "
+        "sum(orig_saver) as orig_saver_count, 0 - sum((orig_saver - 1)) as non_orig_saver_count, "
+        "sum(orig_saver * size)/1000000000 as orig_saver_size_GB, "
+        "0 - sum((orig_saver - 1) * size)/1000000000 as non_orig_saver_size_GB, "
+        "sum(size)/1000000000 as total_blobstore_size_GB "
+        "from blobstore_detail bd "
+        "group by object_type, month) ")
+    cursor.execute(blobstore_detail_by_object_type_monthly_create_statement)
+    print("blobstore_detail_by_object_type_monthly_create_statement created")
+
+    
     ##################
     # a whole bunch of tables related user_super_summary (some helpers that can also be used stadn alone)
     ##################
@@ -348,67 +406,6 @@ def make_reporting_tables():
     print("user_super_summary_create_statement created")
 
 
-    # Blobstroe detial related tables
-    blobstore_detail_by_ws_create_statement = (
-        "create or replace table blobstore_detail_by_ws as "
-        "(select in_q.ws_id, sum(in_q.orig_saver_count) as orig_saver_count, "
-        "sum(in_q.non_orig_saver_count) as non_orig_saver_count, "
-        "sum(in_q.orig_saver_size_GB) as orig_saver_size_GB, "
-        "sum(in_q.non_orig_saver_size_GB) as non_orig_saver_size_GB, "
-        "sum(in_q.total_blobstore_size_GB) as total_blobstore_size_GB "
-        "from ("
-        "select ws_id, DATE_FORMAT(`save_date`,'%Y-%m') as month, "
-        "sum(orig_saver) as orig_saver_count, 0 - sum((orig_saver - 1)) as non_orig_saver_count, "
-        "sum(orig_saver * size)/1000000000 as orig_saver_size_GB, "
-        "0 - sum((orig_saver - 1) * size)/1000000000 as non_orig_saver_size_GB, "
-        "sum(size)/1000000000 as total_blobstore_size_GB "
-        "from blobstore_detail bd "
-        "group by ws_id, month) in_q "
-        "group by ws_id ) ")
-    cursor.execute(blobstore_detail_by_ws_create_statement)
-    print("blobstore_detail_by_ws_create_statement created")
-
-    blobstore_detail_by_user_monthly_create_statement = (
-        "create or replace table blobstore_detail_by_user_monthly as "
-        "(select saver_username, DATE_FORMAT(`save_date`,'%Y-%m') as month, "
-        "sum(orig_saver) as orig_saver_count, 0 - sum((orig_saver - 1)) as non_orig_saver_count, "
-        "sum(orig_saver * size)/1000000000 as orig_saver_size_GB, "
-        "0 - sum((orig_saver - 1) * size)/1000000000 as non_orig_saver_size_GB, "
-        "sum(size)/1000000000 as total_blobstore_size_GB "
-        "from blobstore_detail bd "
-        "group by saver_username, month) ")
-    cursor.execute(blobstore_detail_by_user_monthly_create_statement)
-    print("blobstore_detail_by_user_monthly_create_statement created")
-    
-    blobstore_detail_by_user_create_statement = (
-        "create or replace table blobstore_detail_by_user as "
-        "(select saver_username, "
-        "sum(orig_saver_count) as orig_saver_count, sum(non_orig_saver_count) as non_orig_saver_count, "
-        "sum(orig_saver_size_GB) as orig_saver_size_GB, "
-        "sum(non_orig_saver_size_GB) as non_orig_saver_size_GB, "
-        "sum(total_blobstore_size_GB) as total_blobstore_size_GB "
-        "from blobstore_detail_by_user_monthly "
-        "group by saver_username) ")
-    cursor.execute(blobstore_detail_by_user_create_statement)
-    print("blobstore_detail_by_user_create_statement created")
-
-    blobstore_detail_by_object_type_monthly_create_statement = (
-        "create or replace table blobstore_detail_by_object_type_monthly as "
-        "(select LEFT(object_type,LOCATE('-',object_type) - 1) as object_type, "
-        "DATE_FORMAT(`save_date`,'%Y-%m') as month, "
-        "sum(orig_saver) as orig_saver_count, 0 - sum((orig_saver - 1)) as non_orig_saver_count, "
-        "sum(orig_saver * size)/1000000000 as orig_saver_size_GB, "
-        "0 - sum((orig_saver - 1) * size)/1000000000 as non_orig_saver_size_GB, "
-        "sum(size)/1000000000 as total_blobstore_size_GB "
-        "from blobstore_detail bd "
-        "group by object_type, month) ")
-    cursor.execute(blobstore_detail_by_object_type_monthly_create_statement)
-    print("blobstore_detail_by_object_type_monthly_create_statement created")
-
-
-
-
-    
     return
 
 import time

From 24a9a912948a73881c1b7f23128be915908e63fe Mon Sep 17 00:00:00 2001
From: Jason Baumohl <jkbaumohl@lbl.gov>
Date: Fri, 4 Oct 2024 21:44:15 +0000
Subject: [PATCH 06/11] code to explore and dump using Adams custom app
 category mappings

---
 bin/dump_weekly_app_categories_v2.sh          |   3 +
 .../dump_weekly_app_categories_v2.py          |  46 ++++
 ...s_upload_all_tags_app_category_mappings.py | 229 +++++++++++++++++
 ...thods_upload_function_category_mappings.py | 185 ++++++++++++++
 ...methods_upload_v2_app_category_mappings.py | 237 ++++++++++++++++++
 5 files changed, 700 insertions(+)
 create mode 100755 bin/dump_weekly_app_categories_v2.sh
 create mode 100644 source/custom_scripts/dump_weekly_app_categories_v2.py
 create mode 100644 source/daily_cron_jobs/methods_upload_all_tags_app_category_mappings.py
 create mode 100644 source/daily_cron_jobs/methods_upload_function_category_mappings.py
 create mode 100644 source/daily_cron_jobs/methods_upload_v2_app_category_mappings.py

diff --git a/bin/dump_weekly_app_categories_v2.sh b/bin/dump_weekly_app_categories_v2.sh
new file mode 100755
index 0000000..3561f07
--- /dev/null
+++ b/bin/dump_weekly_app_categories_v2.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+python custom_scripts/dump_weekly_app_categories_v2.py
diff --git a/source/custom_scripts/dump_weekly_app_categories_v2.py b/source/custom_scripts/dump_weekly_app_categories_v2.py
new file mode 100644
index 0000000..e97ec93
--- /dev/null
+++ b/source/custom_scripts/dump_weekly_app_categories_v2.py
@@ -0,0 +1,46 @@
+#!/usr/local/bin/python
+
+import os
+import mysql.connector as mysql
+
+metrics_mysql_password = os.environ["METRICS_MYSQL_PWD"]
+sql_host = os.environ["SQL_HOST"]
+metrics = os.environ["QUERY_ON"]
+
+
+def dump_weekly_app_categories():
+    # Dumps the weekly app catagory users report used in the quarterly report
+
+    # connect to mysql
+    db_connection = mysql.connect(
+        host=sql_host,  # "mysql1", #"localhost",
+        user="metrics",  # "root",
+        passwd=metrics_mysql_password,
+        database="metrics",  # "datacamp"
+    )
+
+    cursor = db_connection.cursor()
+    query = "use " + metrics
+    cursor.execute(query)
+
+    # CHANGE QUERY HERE
+    query = ("select * from metrics_reporting.app_category_unique_users_weekly_v2")
+    # CHANGE COLUMN HEADERS HERE TO MATCH QUERY HEADERS
+    print("week_run\tapp_category\tunique_users")
+
+    cursor.execute(query)
+    row_values = list()
+
+    for row_values in cursor:
+        temp_string = ""
+        for i in range(len(row_values) - 1):
+            if row_values[i] is not None:
+                temp_string += str(row_values[i])
+            temp_string += "\t"
+        if row_values[-1] is not None:
+            temp_string += str(row_values[-1])
+        print(temp_string)
+    return 1
+
+
+dump_weekly_app_categories()
diff --git a/source/daily_cron_jobs/methods_upload_all_tags_app_category_mappings.py b/source/daily_cron_jobs/methods_upload_all_tags_app_category_mappings.py
new file mode 100644
index 0000000..082adb1
--- /dev/null
+++ b/source/daily_cron_jobs/methods_upload_all_tags_app_category_mappings.py
@@ -0,0 +1,229 @@
+import os
+import requests
+import pandas as pd
+import mysql.connector as mysql
+import time
+import datetime
+from biokbase.catalog.Client import Catalog
+from biokbase.narrative_method_store.client import NarrativeMethodStore
+
+metrics_mysql_password = os.environ["METRICS_MYSQL_PWD"]
+sql_host = os.environ["SQL_HOST"]
+query_on = os.environ["QUERY_ON"]
+
+specific_string = "empty"
+
+# Configure App Data: Function
+
+tags = ("release","beta","dev")
+
+def create_function_dictionary( tag ):
+    # Create App Dictionary: Main function
+    requests.packages.urllib3.disable_warnings()
+    catalog = Catalog(url=os.environ["CATALOG_URL"])
+    nms = NarrativeMethodStore(url=os.environ["NARRATIVE_METHOD_STORE"])
+
+    apps = nms.list_methods({"tag": tag})
+#    apps = nms.list_methods({"tag": "release"})
+#    apps = nms.list_methods({"tag": "beta"})
+#    apps = nms.list_methods({"tag": "dev"})
+#    apps = nms.list_methods({})
+
+    global specific_string
+
+    print("APPS : "+ str(apps))
+    print("============================")
+
+    category_app_dict = dict()
+    #key category,=> dict("active"=>[list of apps], "inactive"=>[list_of_apps], "unknown" => [list of apps])
+
+    apps_with_both_list = list()
+    apps_with_none_list = list()
+    apps_with_no_cats_list = list()
+    
+    for temp_app_dict in apps:
+#        if temp_app_dict["id"] == "kb_uploadmethods/batch_import_assembly_from_staging":
+#PRESENT  
+#        if temp_app_dict["id"] == "kb_uploadmethods/batch_import_assemblies_from_staging":
+#NOT PRESENT
+#            temp_specific_string = str(temp_app_dict)
+#            specific_string = temp_specific_string  + "\n"
+        
+        if temp_app_dict["id"] == "view_expression_gene_table_heatmap":
+            print("DETAIL : " + str(temp_app_dict))
+        
+
+        app_id = temp_app_dict["id"]
+        app_cat_list = temp_app_dict["categories"]
+
+        if app_id == "BBTools/RQCFilter":
+            print("BBTools/RQCFilter app categories : " + str(app_cat_list))
+
+        if app_id == "view_expression_heatmap":
+            print("view_expression_heatmap : " + str(app_cat_list))
+
+        active_type = None
+        active_flag_has_both = 0
+        active_inactive_count = 0
+        if "active" in app_cat_list:
+            active_inactive_count += 1
+        if "inactive" in app_cat_list:
+            active_inactive_count += 1
+        if "active" in app_cat_list and "inactive" in app_cat_list:
+            active_flag_has_both = 1
+            print("UH OH!!!!!!!! : " + str(app_id) + " is both active and inactive")
+            apps_with_both_list.append(app_id)
+            active_type = "both"
+#            exit(0)
+#        else:
+        elif "active" in app_cat_list:
+            #CURRENTLY SET IF APP HAS BOTH IS SEEN AS ACTIVE
+            active_type = "active"
+        elif "inactive" in app_cat_list:
+            active_type = "inactive"
+        if active_type == None:
+            print("UH OH!!!!!!!! : " + str(app_id) + " is not active or inactive")
+            apps_with_none_list.append(app_id)
+            active_type = "none"
+#            exit(0)
+        if (len(app_cat_list) - active_inactive_count) <= 0:
+            apps_with_no_cats_list.append(app_id)
+        for category_name in app_cat_list:
+            if category_name == "active" or category_name == "inactive":
+                continue
+            if category_name not in category_app_dict:
+                category_app_dict[category_name] = dict()
+            if active_type not in category_app_dict[category_name]:
+                category_app_dict[category_name][active_type] = list()
+            category_app_dict[category_name][active_type].append(app_id)
+
+    # Deal with apps that have empty category list 
+    if len(apps_with_no_cats_list) > 0:
+        category_app_dict["Empty Category"] = dict()
+        category_app_dict["Empty Category"]["no_category"] = apps_with_no_cats_list
+        
+    print("FINAL category_app_dict : " + str(category_app_dict))
+    total_count = 0
+    category_count = 0
+#    for temp_cat in  app_dict:
+    for temp_cat in sorted(category_app_dict):
+        for active_type in category_app_dict[temp_cat]:
+            temp_count = len(category_app_dict[temp_cat][active_type])
+            total_count += temp_count
+        category_count += 1
+    print("Total count : " + str(total_count))
+    print("category count : " + str(category_count))
+#    print("specific_string : " + str(specific_string))
+    print("apps_with_none_list : " + str(apps_with_none_list))
+    print("apps_with_none count : " + str(len(apps_with_none_list)))
+    print("apps_with_both_list : " + str(apps_with_both_list))
+    print("apps_with_both count : " + str(len(apps_with_both_list)))
+    print("apps_with_no_cats_list : " + str(apps_with_no_cats_list))
+    print("apps_with_no_cats_list count : " + str(len(apps_with_no_cats_list)))
+    return category_app_dict
+
+
+def update_app_category_mappings():
+#    print("EXITING")
+#    exit()
+    
+    # connect to mysql
+    db_connection = mysql.connect(
+        host=sql_host, user="metrics", passwd=metrics_mysql_password, database="metrics"
+    )
+    cursor = db_connection.cursor()
+    query = "use " + query_on
+    cursor.execute(query)
+
+
+    for tag in tags:
+        # get app catagory mappings
+        cat_app_dict  = create_function_dictionary(tag)
+
+        # get existing mappings
+        existing_records_list = list()
+        existing_name_cat_dict = dict()
+        #    query = "select concat(app_name, '::', app_category, '::', is_active) from app_name_category_map_v3;"
+        query = "select app_name, app_category, is_active from app_name_category_map_v3 where tag = \'" + tag + "\';"
+#        input = (tag)
+#        cursor.execute(query, tag)
+        cursor.execute(query)
+        for row in cursor:
+            full_key = row[0] + "::" + row[1] + "::" + str(row[2])
+            name_cat_key = row[0] + "::" + row[1]
+            existing_records_list.append(full_key)
+            existing_name_cat_dict[name_cat_key] = row[2]
+        existing_count = len(existing_records_list)
+        
+        # insert statement
+        insert_prep_cursor = db_connection.cursor(prepared=True)
+
+        insert_statement = (
+            "insert into app_name_category_map_v3 "
+            "(app_name, app_category, is_active, tag) "
+            "values(%s, %s, %s, %s);"
+        )
+
+        # update statement
+        update_prep_cursor = db_connection.cursor(prepared=True)
+
+        update_statement = (
+            "update app_name_category_map_v3 "
+            "set is_active = %s where app_name = %s and app_category = %s and tag = %s;"
+        )
+    
+        # cleanup/delete statement
+        cleanup_prep_cursor = db_connection.cursor(prepared=True)
+        cleanup_statement = (
+            "delete from app_name_category_map_v3 "
+            "where app_name = %s and app_category = %s and is_active = %s and tag = %s;"
+        )
+    
+        insert_count = 0
+        update_count = 0
+        activity_dict =  {'active': 1, 'inactive': 0, 'both': 2, "none":-1, "no_category":-2}
+        for category_name in cat_app_dict:
+            for active_type in cat_app_dict[category_name]:
+                for app_name in cat_app_dict[category_name][active_type]:
+                    temp_key = app_name + "::" + category_name + "::" + str(activity_dict[active_type])
+                    temp_name_cat_key = app_name + "::" + category_name
+                    if temp_name_cat_key in existing_name_cat_dict:
+                        if activity_dict[active_type] != existing_name_cat_dict[temp_name_cat_key]:
+                            # record needs to be updated
+                            input = (activity_dict[active_type], app_name, category_name, tag)
+                            update_prep_cursor.execute(update_statement, input)
+                            update_count += 1
+                        if temp_key in existing_records_list:
+                            existing_records_list.remove(temp_key)
+                    elif temp_key in existing_records_list:
+                        existing_records_list.remove(temp_key)
+                        #REMOVE FOM EXISTING TO FIND LEFT OVERS                                                                        
+                    else:
+                        # do insert
+                        #                    print("INPUT : " + str(input))
+                        input = (app_name, category_name, activity_dict[active_type], tag)
+                        insert_prep_cursor.execute(insert_statement, input)
+                        insert_count += 1
+
+        #Clean up that no longer exist
+        cleanup_count = 0
+        for temp_key in existing_records_list:
+            cleanup_count += 1
+            temp_app_name, temp_cat_name, temp_is_active = temp_key.split('::')
+            input = (temp_app_name, temp_cat_name, int(temp_is_active), tag)
+            cleanup_prep_cursor.execute(cleanup_statement, input)
+
+        db_connection.commit()
+        print("RESULTS FOR TAG : " + tag)
+        print("Existing_count : " + str(existing_count))
+        print("Insert_count : " + str(insert_count))
+        print("Update_count : " + str(update_count))
+        print("Cleanup_count : " + str(cleanup_count))
+
+
+
+print("############################################")
+print("App Category Mapping Upload (UTC): " + str(datetime.datetime.utcnow()))
+start_time = time.time()
+update_app_category_mappings()
+print("--- app_cat_mapping time :  %s seconds ---" % (time.time() - start_time))
diff --git a/source/daily_cron_jobs/methods_upload_function_category_mappings.py b/source/daily_cron_jobs/methods_upload_function_category_mappings.py
new file mode 100644
index 0000000..b8f6ffc
--- /dev/null
+++ b/source/daily_cron_jobs/methods_upload_function_category_mappings.py
@@ -0,0 +1,185 @@
+import os
+import requests
+import pandas as pd
+import mysql.connector as mysql
+import time
+import datetime
+from biokbase.catalog.Client import Catalog
+from biokbase.narrative_method_store.client import NarrativeMethodStore
+
+metrics_mysql_password = os.environ["METRICS_MYSQL_PWD"]
+sql_host = os.environ["SQL_HOST"]
+query_on = os.environ["QUERY_ON"]
+
+specific_string = "empty"
+
+# Configure App Data: Function
+def data_configure(app_df):
+    category_mess = list(app_df.categories)
+#    filters = ["inactive", "viewers"]
+#    filters = ["inactive"]
+    filters = ["viewers"]
+#    filters = []
+    my_idx_list, categories, app_ids = [], [], []
+
+    for idx, lst in enumerate(category_mess):
+        if any([True for e in lst if e in filters]):
+            my_idx_list.append(idx)
+        else:
+            lst = [x for x in lst if "active" != x]
+            if lst:
+                categories.append(lst)
+            else:
+                my_idx_list.append(idx)
+
+    modDF = app_df.drop(my_idx_list)
+    modDF.categories = categories
+    return modDF
+
+
+def create_function_dictionary():
+    # Create App Dictionary: Main function
+    requests.packages.urllib3.disable_warnings()
+    catalog = Catalog(url=os.environ["CATALOG_URL"])
+    nms = NarrativeMethodStore(url=os.environ["NARRATIVE_METHOD_STORE"])
+
+    apps = nms.list_methods({"tag": "release"})
+#    apps = nms.list_methods({"tag": "beta"})
+#    apps = nms.list_methods({"tag": "dev"})
+#    apps = nms.list_methods({})
+
+    global specific_string
+
+    print("APPS : "+ str(apps))
+    print("============================")
+
+    for temp_app_dict in apps:
+        if temp_app_dict["id"] == "kb_uploadmethods/batch_import_assembly_from_staging":
+#PRESENT  
+#        if temp_app_dict["id"] == "kb_uploadmethods/batch_import_assemblies_from_staging":
+#NOT PRESENT
+            temp_specific_string = str(temp_app_dict)
+            specific_string = temp_specific_string  + "\n"
+    
+    apps_datastruc = pd.DataFrame.from_dict(apps)
+    ModDfApps = data_configure(apps_datastruc)
+    ModDfApps.drop(
+        [
+            "app_type",
+            "authors",
+            "git_commit_hash",
+            "icon",
+            "input_types",
+            "module_name",
+            "name",
+            "namespace",
+            "output_types",
+            "subtitle",
+            "tooltip",
+            "ver",
+        ],
+        axis=1,
+        inplace=True,
+    )
+    keys = list(
+        set([item for sublist in list(ModDfApps.categories) for item in sublist])
+    )
+    print("KEYS : " + str(keys))
+    print("============================")
+    app_dict = {k: [] for k in keys}
+
+    print("app_dict : " + str(app_dict))
+    print("============================")
+
+    for i in ModDfApps.index.values:
+        app_category_lst = ModDfApps["categories"][i]
+        for category in app_category_lst:
+            if category in app_dict.keys():
+                app_dict[category].append(ModDfApps["id"][i])
+                app_dict[category] = list(set(app_dict[category]))
+            else:
+                raise KeyError("{} not a KBase app category".format(category))
+    print("FINAL  app_dict : " + str(app_dict))
+    total_count = 0
+    category_count = 0
+#    for temp_cat in  app_dict:
+    for temp_cat in sorted(app_dict):
+        temp_count = len(app_dict[temp_cat])
+        print(temp_cat + " : " +  str(temp_count))
+        total_count += temp_count
+        category_count += 1
+    print("Total count : " + str(total_count))
+    print("category count : " + str(category_count))
+    print("specific_string : " + str(specific_string))
+    return app_dict
+
+
+def update_app_category_mappings():
+    # connect to mysql
+    db_connection = mysql.connect(
+        host=sql_host, user="metrics", passwd=metrics_mysql_password, database="metrics"
+    )
+    cursor = db_connection.cursor()
+    query = "use " + query_on
+    cursor.execute(query)
+
+    # get existing mappings
+    existing_records_list = list()
+    query = "select concat(app_name, '::', app_category) " "from app_name_category_map"
+    cursor.execute(query)
+    for row in cursor:
+        existing_records_list.append(row[0])
+
+    # update all existing records to be inactive
+    update_query = "update app_name_category_map set is_active = False"
+    cursor.execute(update_query)
+    db_connection.commit()
+
+    cat_app_dict = create_function_dictionary()
+
+    print("EXITING")
+    exit()
+    
+    # update active records if they exist or insert new row if did not exist
+    # update statement
+    update_prep_cursor = db_connection.cursor(prepared=True)
+    update_statement = (
+        "update app_name_category_map "
+        "set is_active = True "
+        "where app_name = %s and "
+        "app_category = %s "
+    )
+    # insert statement
+    insert_prep_cursor = db_connection.cursor(prepared=True)
+    existing_count = len(existing_records_list)
+    insert_statement = (
+        "insert into app_name_category_map "
+        "(app_name, app_category, is_active) "
+        "values(%s, %s, True);"
+    )
+    insert_count = 0
+    update_count = 0
+    for category_name in cat_app_dict:
+        for app_name in cat_app_dict[category_name]:
+            input = (app_name, category_name)
+            if app_name + "::" + category_name in existing_records_list:
+                # do update
+                update_prep_cursor.execute(update_statement, input)
+                update_count += 1
+            else:
+                # do insert
+                insert_prep_cursor.execute(insert_statement, input)
+                insert_count += 1
+
+    db_connection.commit()
+    print("Existing_count : " + str(existing_count))
+    print("Insert_count : " + str(insert_count))
+    print("Update_count : " + str(update_count))
+
+
+
+print("############################################")
+print("App Category Mapping Upload (UTC): " + str(datetime.datetime.utcnow()))
+start_time = time.time()
+update_app_category_mappings()
+print("--- app_cat_mapping time :  %s seconds ---" % (time.time() - start_time))
diff --git a/source/daily_cron_jobs/methods_upload_v2_app_category_mappings.py b/source/daily_cron_jobs/methods_upload_v2_app_category_mappings.py
new file mode 100644
index 0000000..4929543
--- /dev/null
+++ b/source/daily_cron_jobs/methods_upload_v2_app_category_mappings.py
@@ -0,0 +1,237 @@
+import os
+import requests
+import pandas as pd
+import mysql.connector as mysql
+import time
+import datetime
+from biokbase.catalog.Client import Catalog
+from biokbase.narrative_method_store.client import NarrativeMethodStore
+
+metrics_mysql_password = os.environ["METRICS_MYSQL_PWD"]
+sql_host = os.environ["SQL_HOST"]
+query_on = os.environ["QUERY_ON"]
+
+specific_string = "empty"
+
+# Configure App Data: Function
+
+def create_function_dictionary():
+    # Create App Dictionary: Main function
+    requests.packages.urllib3.disable_warnings()
+    catalog = Catalog(url=os.environ["CATALOG_URL"])
+    nms = NarrativeMethodStore(url=os.environ["NARRATIVE_METHOD_STORE"])
+
+    apps = nms.list_methods({"tag": "release"})
+#    apps = nms.list_methods({"tag": "beta"})
+#    apps = nms.list_methods({"tag": "dev"})
+#    apps = nms.list_methods({})
+
+    global specific_string
+
+    print("APPS : "+ str(apps))
+    print("============================")
+
+    category_app_dict = dict()
+    #key category,=> dict("active"=>[list of apps], "inactive"=>[list_of_apps], "unknown" => [list of apps])
+
+    apps_with_both_list = list()
+    apps_with_none_list = list()
+    apps_with_no_cats_list = list()
+    
+    for temp_app_dict in apps:
+#        if temp_app_dict["id"] == "kb_uploadmethods/batch_import_assembly_from_staging":
+#PRESENT  
+#        if temp_app_dict["id"] == "kb_uploadmethods/batch_import_assemblies_from_staging":
+#NOT PRESENT
+#            temp_specific_string = str(temp_app_dict)
+#            specific_string = temp_specific_string  + "\n"
+        
+        if temp_app_dict["id"] == "view_expression_gene_table_heatmap":
+            print("DETAIL : " + str(temp_app_dict))
+        
+
+        app_id = temp_app_dict["id"]
+        app_cat_list = temp_app_dict["categories"]
+
+        if app_id == "BBTools/RQCFilter":
+            print("BBTools/RQCFilter app categories : " + str(app_cat_list))
+
+        if app_id == "view_expression_heatmap":
+            print("view_expression_heatmap : " + str(app_cat_list))
+
+        active_type = None
+        active_flag_has_both = 0
+        active_inactive_count = 0
+        if "active" in app_cat_list:
+            active_inactive_count += 1
+        if "inactive" in app_cat_list:
+            active_inactive_count += 1
+        if "active" in app_cat_list and "inactive" in app_cat_list:
+            active_flag_has_both = 1
+            print("UH OH!!!!!!!! : " + str(app_id) + " is both active and inactive")
+            apps_with_both_list.append(app_id)
+            active_type = "both"
+#            exit(0)
+#        else:
+        elif "active" in app_cat_list:
+            #CURRENTLY SET IF APP HAS BOTH IS SEEN AS ACTIVE
+            active_type = "active"
+        elif "inactive" in app_cat_list:
+            active_type = "inactive"
+        if active_type == None:
+            print("UH OH!!!!!!!! : " + str(app_id) + " is not active or inactive")
+            apps_with_none_list.append(app_id)
+            active_type = "none"
+#            exit(0)
+        if (len(app_cat_list) - active_inactive_count) <= 0:
+            apps_with_no_cats_list.append(app_id)
+        for category_name in app_cat_list:
+            if category_name == "active" or category_name == "inactive":
+                continue
+            if category_name not in category_app_dict:
+                category_app_dict[category_name] = dict()
+            if active_type not in category_app_dict[category_name]:
+                category_app_dict[category_name][active_type] = list()
+            category_app_dict[category_name][active_type].append(app_id)
+
+    # Deal with apps that have empty category list 
+    if len(apps_with_no_cats_list) > 0:
+        category_app_dict["Empty Category"] = dict()
+        category_app_dict["Empty Category"]["no_category"] = apps_with_no_cats_list
+        
+    print("FINAL category_app_dict : " + str(category_app_dict))
+    total_count = 0
+    category_count = 0
+#    for temp_cat in  app_dict:
+    for temp_cat in sorted(category_app_dict):
+        for active_type in category_app_dict[temp_cat]:
+            temp_count = len(category_app_dict[temp_cat][active_type])
+            total_count += temp_count
+        category_count += 1
+    print("Total count : " + str(total_count))
+    print("category count : " + str(category_count))
+#    print("specific_string : " + str(specific_string))
+    print("apps_with_none_list : " + str(apps_with_none_list))
+    print("apps_with_none count : " + str(len(apps_with_none_list)))
+    print("apps_with_both_list : " + str(apps_with_both_list))
+    print("apps_with_both count : " + str(len(apps_with_both_list)))
+    print("apps_with_no_cats_list : " + str(apps_with_no_cats_list))
+    print("apps_with_no_cats_list count : " + str(len(apps_with_no_cats_list)))
+    return category_app_dict
+
+
+def update_app_category_mappings():
+    # get app catagory mappings
+    cat_app_dict  = create_function_dictionary()
+
+#    print("EXITING")
+#    exit()
+    
+    # connect to mysql
+    db_connection = mysql.connect(
+        host=sql_host, user="metrics", passwd=metrics_mysql_password, database="metrics"
+    )
+    cursor = db_connection.cursor()
+    query = "use " + query_on
+    cursor.execute(query)
+
+    # get existing mappings
+    existing_records_list = list()
+    existing_name_cat_dict = dict()
+#    query = "select concat(app_name, '::', app_category, '::', is_active) from app_name_category_map_v2;"
+    query = "select app_name, app_category, is_active from app_name_category_map_v2;"
+    cursor.execute(query)
+    for row in cursor:
+        full_key = row[0] + "::" + row[1] + "::" + str(row[2])
+        name_cat_key = row[0] + "::" + row[1]
+        existing_records_list.append(full_key)
+        existing_name_cat_dict[name_cat_key] = row[2]
+    existing_count = len(existing_records_list)
+        
+    # update all existing records to be inactive
+#    update_query = "update app_name_category_map_v2 set is_active = False"
+#    cursor.execute(update_query)
+#    db_connection.commit()
+    
+    # update active records if they exist or insert new row if did not exist
+    # update statement
+#    update_prep_cursor = db_connection.cursor(prepared=True)
+#    update_statement = (
+#        "update app_name_category_map_v2 "
+#        "set is_active = %s "
+#        "where app_name = %s and "
+#        "app_category = %s "
+#    )
+
+
+    # insert statement
+    insert_prep_cursor = db_connection.cursor(prepared=True)
+
+    insert_statement = (
+        "insert into app_name_category_map_v2 "
+        "(app_name, app_category, is_active) "
+        "values(%s, %s, %s);"
+    )
+
+    # update statement
+    update_prep_cursor = db_connection.cursor(prepared=True)
+
+    update_statement = (
+        "update app_name_category_map_v2 "
+        "set is_active = %s where app_name = %s and app_category = %s;"
+    )
+    
+    # cleanup/delete statement
+    cleanup_prep_cursor = db_connection.cursor(prepared=True)
+    cleanup_statement = (
+        "delete from app_name_category_map_v2 "
+        "where app_name = %s and app_category = %s and is_active = %s;"
+        )
+    
+    insert_count = 0
+    update_count = 0
+    activity_dict =  {'active': 1, 'inactive': 0, 'both': 2, "none":-1, "no_category":-2}
+    for category_name in cat_app_dict:
+        for active_type in cat_app_dict[category_name]:
+            for app_name in cat_app_dict[category_name][active_type]:
+                temp_key = app_name + "::" + category_name + "::" + str(activity_dict[active_type])
+                temp_name_cat_key = app_name + "::" + category_name
+                if temp_name_cat_key in existing_name_cat_dict:
+                    if activity_dict[active_type] != existing_name_cat_dict[temp_name_cat_key]:
+                        # record needs to be updated
+                        input = (activity_dict[active_type], app_name, category_name,)
+                        update_prep_cursor.execute(update_statement, input)
+                        update_count += 1
+                    if temp_key in existing_records_list:
+                        existing_records_list.remove(temp_key)
+                elif temp_key in existing_records_list:
+                    existing_records_list.remove(temp_key)
+                    #REMOVE FOM EXISTING TO FIND LEFT OVERS                                                                        
+                else:
+                    # do insert
+#                    print("INPUT : " + str(input))
+                    input = (app_name, category_name, activity_dict[active_type])
+                    insert_prep_cursor.execute(insert_statement, input)
+                    insert_count += 1
+
+    #Clean up that no longer exist
+    cleanup_count = 0
+    for temp_key in existing_records_list:
+        cleanup_count += 1
+        temp_app_name, temp_cat_name, temp_is_active = temp_key.split('::')
+        input = (temp_app_name, temp_cat_name, int(temp_is_active))
+        cleanup_prep_cursor.execute(cleanup_statement, input)
+
+    db_connection.commit()
+    print("Existing_count : " + str(existing_count))
+    print("Insert_count : " + str(insert_count))
+    print("Update_count : " + str(update_count))
+    print("Cleanup_count : " + str(cleanup_count))
+
+
+
+print("############################################")
+print("App Category Mapping Upload (UTC): " + str(datetime.datetime.utcnow()))
+start_time = time.time()
+update_app_category_mappings()
+print("--- app_cat_mapping time :  %s seconds ---" % (time.time() - start_time))

From f616b53c69a49b77247fc1416cfe3b735aa594ce Mon Sep 17 00:00:00 2001
From: Jason Baumohl <jkbaumohl@lbl.gov>
Date: Tue, 8 Oct 2024 04:02:02 +0000
Subject: [PATCH 07/11] mostly adding blobstore_details to reports and views

---
 .../backfill_blobstore_details.py             | 242 ---------
 .../dump_weekly_ADAM_app_categories.py        |   5 +-
 .../daily_cron_jobs/make_reporting_tables.py  |  25 +-
 .../get_downloaders_lookup_ongoing.py         | 487 ++++++++++++++++++
 .../sql_reporting_views_and_tables.sql        |  25 +
 5 files changed, 539 insertions(+), 245 deletions(-)
 delete mode 100644 source/custom_scripts/backfill_blobstore_details.py
 create mode 100644 source/monthly_cron_jobs/get_downloaders_lookup_ongoing.py

diff --git a/source/custom_scripts/backfill_blobstore_details.py b/source/custom_scripts/backfill_blobstore_details.py
deleted file mode 100644
index 8111e82..0000000
--- a/source/custom_scripts/backfill_blobstore_details.py
+++ /dev/null
@@ -1,242 +0,0 @@
-from pymongo import MongoClient
-from pymongo import ReadPreference
-from biokbase.workspace.client import Workspace
-#from installed_clients.AbstractHandleClient import AbstractHandle as HandleService
-from biokbase.service.Client import Client as ServiceClient
-import json as _json
-import os
-import mysql.connector as mysql
-import requests
-import time
-#from splitting import split_sequence
-from datetime import date
-from datetime import datetime
-
-print("############################################")
-print("############################################")
-print("############################################")
-print("START TIME (UTC): " + str(datetime.utcnow()))
-start_time = time.time()
-
-requests.packages.urllib3.disable_warnings()
-
-mongoDB_metrics_connection = os.environ["MONGO_PATH"]
-
-ws_url = os.environ["WS_URL"]
-ws_user_token = os.environ["METRICS_WS_USER_TOKEN"]
-to_workspace = os.environ["WRK_SUFFIX"]
-
-to_blobstore = os.environ["BLOBSTORE_SUFFIX"]
-to_handle_db = os.environ["HANDLE_DB_SUFFIX"]
-
-
-client = MongoClient(mongoDB_metrics_connection + to_workspace)
-db = client.workspace
-handle_service_url = "https://kbase.us/services/handle_service"
-
-#wsadmin = Workspace(ws_url, token=ws_user_token)
-#hs =  HandleService(handle_service_url, token=ws_user_token)
-
-def make_blobstore_lookup ():
-    client_blobstore = MongoClient(mongoDB_metrics_connection + to_blobstore)
-    db_blobstore = client_blobstore.blobstore
-
-    blobstore_nodes_size_lookup = dict()
-    
-    nodes_query = db_blobstore.nodes.find({},{"_id": 0, "id": 1, "size": 1})
-    for record in nodes_query:
-        blobstore_node_id = record["id"]
-        size = record["size"]
-        blobstore_nodes_size_lookup[blobstore_node_id] = size
-    return blobstore_nodes_size_lookup
-
-def make_handle_id_lookup ():
-    client_handle_db = MongoClient(mongoDB_metrics_connection + to_handle_db)
-    db_handle = client_handle_db.handle_db
-
-    handle_id_lookup = dict()
-    
-    handles_query = db_handle.handle.find({},{"_id": 0, "id": 1, "hid": 1})
-    for record in handles_query:
-        blobstore_node_id = record["id"]
-        handle = record["hid"]
-        handle_id_lookup[handle] = blobstore_node_id
-    return handle_id_lookup
-
-
-
-# object_id -> {handle=>handle, node=node, type=object_type, savedate=> sd}
-objects_with_problem_nodes_with_no_size = dict()
-objects_with_problem_handles_with_no_nodes = dict()
-
-running_size_total = 0
-
-deleted_object_with_data_found_count = 0
-deleted_object_without_data_found_count = 0
-
-#exit()
-
-
-# blobstore_id => {ws_obj_id => (save_date, saver)}
-blobstore_object_results = dict()
-
-# blobstore_id =>{first_saver_ws_obj_id => blah,
-#                 first_save_date = date}
-#blobstore_id_first_saver = dict()
-
-#ws_ids = [146324]  # small
-#ws_ids = [28129]  # fungal phytosome s
-#ws_ids = [146324,28129]  # fungal phytosome and small ws, took 203 mins
-#ws_ids = [19217]  # refseq reference
-
-
-
-#for ws_id in ws_ids:
-deleted_objects = set()
-ws_obj_deleted_cursor = db.workspaceObjects.find({"del":True},{"_id":0, "ws": 1,"id":1})
-for ws_obj_deleted in ws_obj_deleted_cursor:
-    deleted_temp_ws_id = ws_obj_deleted["ws"]
-    deleted_obj_id = ws_obj_deleted["id"]
-    deleted_ws_obj_id = str(deleted_temp_ws_id) + "/" + str(deleted_obj_id)
-    deleted_objects.add(deleted_ws_obj_id)
-
-print("TOTAL DELETED OBJECT LENGTH: " + str(len(deleted_objects)))    
-print("--- total time for the deleted objects lookup  %s seconds ---" % (time.time() - start_time))
-
-ws_obj_vers_cursor = db.workspaceObjVersions.find(
-    {#"ws":312,
-     "extids.handle" : { "$exists": True }},
-    {
-        "type": 1,
-        "ws": 1, 
-        "id": 1,
-        "ver": 1,
-        "savedate": 1,
-        "savedby": 1,
-        "extids": 1,
-        "_id": 0,
-    },
-    no_cursor_timeout=True
-    )
-i = 0
-ws_obj_info = dict()
-deleted_ext_ids_counter = 0
-
-for ws_obj_ver in ws_obj_vers_cursor:
-    is_deleted = 0
-    object_type_full = ws_obj_ver["type"]
-    (object_type, object_spec_version) = object_type_full.split("-")
-    #if (object_type != "KBaseNarrative.Narrative" and object_type != "KBaseReport.Report"):
-    ws_id = ws_obj_ver["ws"]
-    obj_id = ws_obj_ver["id"]
-    temp_ws_obj_id = str(ws_id) + "/" + str(obj_id)
-    if temp_ws_obj_id in deleted_objects:
-        deleted_ext_ids_counter += 1
-        is_deleted = 1
-#        continue
-    obj_ver = ws_obj_ver["ver"]
-    obj_save_date = ws_obj_ver["savedate"]
-    savedby = ws_obj_ver["savedby"]
-    extids = ws_obj_ver["extids"]
-    handles = extids["handle"]    
-#        for handle in handles:
-#            handles_set.add(handle)
-#        obj_copied = 0
-    full_obj_id = str(ws_id) + "/" + str(obj_id) + "/" + str(obj_ver)
-#    print("Full obj id : " + full_obj_id)
-#    print("Object Type : " + object_type_full)
-#        if (object_type != "KBaseNarrative.Narrative" and object_type != "KBaseReport.Report"):
-#        if (object_type == "KBaseNarrative.Narrative" or object_type == "KBaseReport.Report"):
-
-    ws_obj_info[full_obj_id] = {"save_date" : obj_save_date,
-                                "savedby" : savedby,
-                                "obj_type" : object_type_full,
-                                "handles" : handles,
-                                "is_deleted" : is_deleted}
-
-print("--- total time for the ws_object_version objects query  %s seconds ---" % (time.time() - start_time))
-    
-##########################################################################
-print("BLOBSTORE LOOKUP:")
-blobstore_lookup = make_blobstore_lookup()
-test_counter = 0
-for temp_key in blobstore_lookup:
-    if test_counter < 10:
-        print("ID: " + str(temp_key) + "   :::   size: " + str(blobstore_lookup[temp_key]))
-    else:
-        break
-    test_counter = test_counter + 1
-print("Total BLOBSTORE Lookuplength: " + str(len(blobstore_lookup)))
-
-print("--- total time for the blobstore size lookup creation  %s seconds ---" % (time.time() - start_time))
-
-handle_id_lookup = make_handle_id_lookup()
-test_counter = 0
-for temp_key in handle_id_lookup:
-    if test_counter < 10:
-        print("ID: " + str(temp_key) + "   :::   blobstore_id: " + str(handle_id_lookup[temp_key]))
-    else:
-        break
-    test_counter = test_counter + 1
-print("Total HANDLE ID lookup length: " + str(len(handle_id_lookup)))
-
-print("--- total time for the blobstore size lookup creation  %s seconds ---" % (time.time() - start_time))
-##############################################    
-
-for full_obj_id in  ws_obj_info:
-#    print("ws_obj_info[full_obj_id][handles] : " + str(ws_obj_info[full_obj_id]["handles"]))
-    for handle in ws_obj_info[full_obj_id]["handles"]:
-        blobstore_id = None
-        (kbh_prefix, str_handle_id) = handle.split("_")
-        if int(str_handle_id) in handle_id_lookup:
-            blobstore_id = handle_id_lookup[int(str_handle_id)]
-        else:
-            objects_with_problem_handles_with_no_nodes[full_obj_id] = ws_obj_info[full_obj_id]
-            if ws_obj_info[full_obj_id]["is_deleted"] == 1:
-                deleted_object_without_data_found_count += 1
-
-        if blobstore_id and blobstore_id in blobstore_lookup:
-            if blobstore_id not in blobstore_object_results:
-                blobstore_object_results[blobstore_id] = dict()
-            blobstore_object_results[blobstore_id][full_obj_id] = (ws_obj_info[full_obj_id]["save_date"],
-                                                                   ws_obj_info[full_obj_id]["savedby"])
-#            print("Blobstore lookup file_size : " + str(blobstore_lookup[blobstore_id]))
-            if ws_obj_info[full_obj_id]["is_deleted"] == 1:
-                deleted_object_with_data_found_count += 1
-            file_size = blobstore_lookup[blobstore_id]
-            running_size_total = running_size_total + file_size
-        else:
-#            print("HUGE PROBLEM: obj_id : " + full_obj_id +  " blobstore_id: "  + str(blobstore_id) + " IS NOT IN THE LOOKUP")
-#            del blobstore_object_results[blobstore_id]
-            objects_with_problem_nodes_with_no_size[full_obj_id] = ws_obj_info[full_obj_id]
-            if ws_obj_info[full_obj_id]["is_deleted"] == 1:
-                deleted_object_without_data_found_count += 1
-
-print("objects_with_problem_nodes_with_no_size : " + str(objects_with_problem_nodes_with_no_size))            
-print("TOTAL objects_with_problem_nodes_with_no_size : " + str(len(objects_with_problem_nodes_with_no_size)))
-
-print("objects_with_problem_handles_with_no_nodes : " + str(objects_with_problem_handles_with_no_nodes))
-print("TOTAL objects_with_problem_handles_with_no_nodes : " + str(len(objects_with_problem_handles_with_no_nodes)))
-
-print("deleted_object_with_data_found_count :" + str(deleted_object_with_data_found_count))
-print("deleted_object_without_data_found_count :" + str(deleted_object_without_data_found_count))
-
-print("blobstore_object_results length :  " +  str(len(blobstore_object_results)))
-#print("blobstore_object_results :  " +  str(blobstore_object_results))
-print("RUNNING TOTAL SIZE : " + str(running_size_total))
-
-obj_id_set = set()
-for blobstore_id in blobstore_object_results :
-    for obj_id in  blobstore_object_results[blobstore_id]:
-        obj_id_set.add(obj_id)
-print("Total number of objects with handles that could be fully determined : " + str(len(obj_id_set)))
-
-print("Total ext_ids objects that were deleted : " + str(deleted_ext_ids_counter))
-
-#print("blobstore_object_results : " + str(blobstore_object_results))
-    
-print("--- total seconds %s seconds ---" % (time.time() - start_time))
-
-
-
-exit()
diff --git a/source/custom_scripts/dump_weekly_ADAM_app_categories.py b/source/custom_scripts/dump_weekly_ADAM_app_categories.py
index 3fc67ae..9ac842b 100644
--- a/source/custom_scripts/dump_weekly_ADAM_app_categories.py
+++ b/source/custom_scripts/dump_weekly_ADAM_app_categories.py
@@ -24,7 +24,10 @@ def dump_weekly_app_categories():
     cursor.execute(query)
 
     # CHANGE QUERY HERE
-    query = ("select * from metrics_reporting.app_category_unique_users_weekly")
+#    Regular weekly app categories
+#    query = ("select * from metrics_reporting.app_category_unique_users_weekly")
+
+    # ADAM's special cagtegory mappings from late 2023 early 2024  
     query = ("select in_query.week_run, in_query.master_category, count(*) as unique_users "
              "from (select distinct DATE_FORMAT(`finish_date`,'%Y-%u') as week_run, "
              "IFNULL(master_category,'None') as master_category, uau.username "
diff --git a/source/daily_cron_jobs/make_reporting_tables.py b/source/daily_cron_jobs/make_reporting_tables.py
index cf1a0ed..91ffc0d 100644
--- a/source/daily_cron_jobs/make_reporting_tables.py
+++ b/source/daily_cron_jobs/make_reporting_tables.py
@@ -167,7 +167,24 @@ def make_reporting_tables():
     cursor.execute(app_category_run_hours_weekly_create_statement)
     print("app_category_run_hours_weekly created")
 
+    ###############
+    workspaces_current_create_statement = (
+        "CREATE OR REPLACE table metrics.workspaces_current as "
+        "(select ws.* "
+        "from metrics.workspaces ws inner join "
+        "metrics.hv_workspaces_max_date wsmd "
+        "on ws.ws_id = wsmd.ws_id and "
+        "ws.record_date = wsmd.record_date) "
+    )
+    cursor.execute(workspaces_current_create_statement)
+    print("workspaces_current created")
 
+    workspaces_current_index_create_statement = (
+        "alter table metrics.workspaces_current add unique (ws_id)"
+    )
+    cursor.execute(workspaces_current_index_create_statement)
+    print("workspaces_current_index created")
+    
     ################
     narrative_app_flows_create_statement = (
         "create or replace table metrics_reporting.narrative_app_flows as "
@@ -175,7 +192,7 @@ def make_reporting_tables():
         "from metrics.user_info ui "
         "inner join metrics.user_app_usage uau "
         "on ui.username = uau.username "
-        "inner join metrics_reporting.workspaces_current wc "
+        "inner join metrics.workspaces_current wc "
         "on wc.ws_id = uau.ws_id "
         "where ui.kb_internal_user = 0 "
         "and uau.is_error = 0 "
@@ -205,6 +222,10 @@ def make_reporting_tables():
     cursor.execute(blobstore_detail_by_ws_create_statement)
     print("blobstore_detail_by_ws_create_statement created")
 
+    blobstore_detail_by_ws_index_statement = "alter table blobstore_detail_by_ws add index (ws_id)"
+    cursor.execute(blobstore_detail_by_ws_index_statement)
+    print("blobstore_detail_by_ws_index_statement created")
+    
     blobstore_detail_by_user_monthly_create_statement = (
         "create or replace table blobstore_detail_by_user_monthly as "
         "(select saver_username, DATE_FORMAT(`save_date`,'%Y-%m') as month, "
@@ -328,7 +349,7 @@ def make_reporting_tables():
         "sum(static_narratives_count) as static_narratives_created_count, "
         "sum(visible_app_cells_count) as total_visible_app_cells, "
         "sum(code_cells_count) as total_code_cells_count "
-        "from metrics_reporting.workspaces_current wc "
+        "from metrics.workspaces_current wc "
         "inner join metrics.user_info ui "
         "on wc.username = ui.username "
         "where narrative_version > 0 "
diff --git a/source/monthly_cron_jobs/get_downloaders_lookup_ongoing.py b/source/monthly_cron_jobs/get_downloaders_lookup_ongoing.py
new file mode 100644
index 0000000..fb964be
--- /dev/null
+++ b/source/monthly_cron_jobs/get_downloaders_lookup_ongoing.py
@@ -0,0 +1,487 @@
+# GetAppStats
+#
+import requests
+import os
+import time
+from pymongo import MongoClient
+from pymongo import ReadPreference
+
+from datetime import date, timedelta, datetime
+import mysql.connector as mysql
+from biokbase.narrative_method_store.client import NarrativeMethodStore
+#from source.daily_cron_jobs.installed_clients.execution_engine2Client import execution_engine2
+from installed_clients.execution_engine2Client import execution_engine2
+
+################################################
+#
+# This code is to pull the needed downloader app runs that may have been downloaders for DOI objects
+#
+################################################
+
+requests.packages.urllib3.disable_warnings()
+
+to_workspace = os.environ["WRK_SUFFIX"]
+
+ee2_url = os.environ["EE2_URL"]
+# GetEE2AppStats
+ee2 = execution_engine2(
+    url=ee2_url,
+    token=os.environ["METRICS_USER_TOKEN"],
+)
+
+nms = NarrativeMethodStore(url=os.environ["NARRATIVE_METHOD_STORE"])
+sql_host = os.environ["SQL_HOST"]
+query_on = os.environ["QUERY_ON"]
+
+mongoDB_metrics_connection = os.environ["MONGO_PATH"]
+metrics_mysql_password = os.environ["METRICS_MYSQL_PWD"]
+
+db_connection = mysql.connect(
+    host=sql_host,  # "mysql1", #"localhost",
+    user="metrics",  # "root",
+    passwd=metrics_mysql_password,
+    database="metrics",  # "datacamp"
+)
+
+cursor = db_connection.cursor()
+
+models_bulk_dl_list = list()
+
+def get_minimum_date_for_new_doi_workspaces(cursor):
+    #First Determine the default being the start of the previous month
+    #get first day of the month:
+    query = (
+        "select min(initial_save_date) from metrics_reporting.workspaces_current where ws_id in ( "
+        "   select ws_id from metrics.copy_doi_ws_map "
+        "   where ws_id not in (select unique ws_id from metrics.copy_doi_metrics)) "
+    )
+    cursor.execute(query)
+    min_new_doi_ws_date = None
+    for row_values in cursor:
+        min_new_doi_ws_date = row_values[0]
+    print("MIN NEW DOI WS DATE:" + str(min_new_doi_ws_date))
+    return min_new_doi_ws_date
+
+def get_existing_problem_refs(cursor):
+    # builds data structure for problematic references previously resolved
+    query = (
+        "select job_id, original_ref_id, resolved_ref_id "
+        "from downloaders_problematic_obj_ids"
+    )
+    cursor.execute(query)
+    problem_refs_lookup = dict()
+    for row_values in cursor:
+        job_id = row_values[0]
+        original_ref_id = row_values[1]
+        resolved_ref_id = row_values[2]
+        if job_id not in problem_refs_lookup:
+            problem_refs_lookup[job_id] = dict()
+        problem_refs_lookup[job_id][original_ref_id] = resolved_ref_id
+    return problem_refs_lookup
+    
+def get_minimum_date_for_doi_workspaces(cursor):
+    # gets earliest initial save out of all the doi workspaces 
+    query = (
+        "select min(initial_save_date) from metrics_reporting.workspaces_current where ws_id in ( "
+        "   select ws_id from metrics.copy_doi_ws_map) ")
+    cursor.execute(query)
+    min_new_doi_ws_date = None
+    for row_values in cursor:
+        min_doi_ws_date = row_values[0]
+    print("MIN DOI WS DATE:" + str(min_new_doi_ws_date))
+    return min_doi_ws_date
+
+
+def get_downloaders_set(cursor):
+    #returns a set of downloadwer apps
+#    query = "select downloader_app_name, 1 from metrics.downloader_apps";
+    query = (
+        "select downloader_app_name as app_name from downloader_apps da "
+        "union select distinct uau.func_name from user_app_usage uau "
+        "where (uau.func_name like '%export%' or uau.func_name like '%download%' or "
+        "uau.app_name like '%export%' or uau.app_name like '%download%' or "
+        "uau.func_name like 'kb_ObjectInfo%' or uau.app_name like 'kb_ObjectInfo%') ")
+#    query = (
+#        "select downloader_app_name as app_name from downloader_apps da "
+#        "union select uau.func_name from user_app_usage uau "
+#        "where (uau.func_name like '%export%' or uau.func_name like '%download%' "
+#        "or uau.app_name like '%export%' or uau.app_name like '%download%') ")
+
+    cursor.execute(query)
+    downloaders_set = set()
+    for row_values in cursor:
+        downloaders_set.add(row_values[0])
+    print(str(downloaders_set))
+    print("Number of downloaders : " + str(len(downloaders_set)))
+    return downloaders_set
+
+def pull_downloading_jobs(downloaders_set, problem_refs_lookup):
+
+    client = MongoClient(mongoDB_metrics_connection + to_workspace)
+    db = client.workspace
+
+    prep_cursor = db_connection.cursor(prepared=True)
+    downloaders_problematic_obj_ids_insert_statement = (
+        "insert into downloaders_problematic_obj_ids "
+        "(original_ref_id, resolved_ref_id, job_id) "
+        "values(%s,%s, %s);")
+    insert_prob_refs_count = 0
+    
+    statuses = ["queued", "terminated", "running", "created", "estimated","error"]
+    finished_job_count = 0
+    downloaders_count = 0
+    downloading_jobs_with_orphaned_refs_count = 0
+    downloading_triples_not_digits_count = 0
+    downloaders_with_ws_id_count = 0
+    in_if_count = 0
+
+    downloaders_dict = dict()
+    for downloader in downloaders_set:
+        downloaders_dict[downloader] = dict()
+        downloaders_dict[downloader]["has_input_ref_count"] = 0
+        downloaders_dict[downloader]["no_input_ref_count"] = 0
+    
+    downloader_results = dict()
+    # the data structure looks like downloaded_ws_obj_id => { downloader_username => [job_id]}
+    has_2_elements_count = 0
+
+    
+    earliest_year = 2023
+#    earliest_year = 2016
+    today = date.today()
+    current_year = int(today.year)
+    part_of_year_list = (1,2,3,4,5,6,7,8,9,10,11,12)
+
+    years_to_do = range(earliest_year,(current_year + 1))
+
+    print("Current year : " + str(current_year))
+    print("Years to do: " + str(years_to_do))
+
+    fba_tools_bulk_export_objects_jobs = list()
+    DataFileUtil_download_web_file_jobs = list()
+    
+    for year_to_do in years_to_do:
+        # NEED TO CHUNK UP THE RESULTS BY QUARTER, OTHERWISE EE@ TIMESOUT.
+        for part_of_year in part_of_year_list:
+            if part_of_year == 1:
+                begin = int(datetime(year_to_do, 1, 1, 0, 0).timestamp()) * 1000
+                end = int(datetime(year_to_do, 1, 31, 23, 59).timestamp()) * 1000
+            elif part_of_year == 2:
+                begin = int(datetime(year_to_do, 2, 1, 0, 0).timestamp()) * 1000
+                end = int(datetime(year_to_do, 3, 1, 23, 59).timestamp()) * 1000
+            elif part_of_year == 3:
+                begin = int(datetime(year_to_do, 3, 2, 0, 0).timestamp()) * 1000
+                end = int(datetime(year_to_do, 3, 31, 23, 59).timestamp()) * 1000
+            elif part_of_year == 4:
+                begin = int(datetime(year_to_do, 4, 1, 0, 0).timestamp()) * 1000
+                end = int(datetime(year_to_do, 4, 30, 23, 59).timestamp()) * 1000
+            elif part_of_year == 5:
+                begin = int(datetime(year_to_do, 5, 1, 0, 0).timestamp()) * 1000
+                end = int(datetime(year_to_do, 5, 31, 23, 59).timestamp()) * 1000
+            elif part_of_year == 6:
+                begin = int(datetime(year_to_do, 6, 1, 0, 0).timestamp()) * 1000
+                end = int(datetime(year_to_do, 6, 30, 23, 59).timestamp()) * 1000
+            elif part_of_year == 7:
+                begin = int(datetime(year_to_do, 7, 1, 0, 0).timestamp()) * 1000
+                end = int(datetime(year_to_do, 7, 31, 23, 59).timestamp()) * 1000
+            elif part_of_year == 8:
+                begin = int(datetime(year_to_do, 8, 1, 0, 0).timestamp()) * 1000
+                end = int(datetime(year_to_do, 8, 30, 23, 59).timestamp()) * 1000
+            elif part_of_year == 9:
+                begin = int(datetime(year_to_do, 9, 1, 0, 0).timestamp()) * 1000
+                end = int(datetime(year_to_do, 9, 30, 23, 59).timestamp()) * 1000
+            elif part_of_year == 10:
+                begin = int(datetime(year_to_do, 10, 1, 0, 0).timestamp()) * 1000
+                end = int(datetime(year_to_do, 10, 31, 23, 59).timestamp()) * 1000
+            elif part_of_year == 11:
+                begin = int(datetime(year_to_do, 11, 1, 0, 0).timestamp()) * 1000
+                end = int(datetime(year_to_do, 11, 30, 23, 59).timestamp()) * 1000                
+            elif part_of_year == 12:
+                begin = int(datetime(year_to_do, 12, 1, 0, 0).timestamp()) * 1000
+                end = int(datetime(year_to_do, 12, 31, 23, 59).timestamp()) * 1000 
+
+            yearly_start_time = time.time()
+            print("Year_Month to do start: " + str(year_to_do) + "_" + str(part_of_year) + " :: " + str(yearly_start_time))
+        
+            params = {"start_time": begin, "end_time": end, "ascending": 0, "limit": 1000000000}
+            stats = ee2.check_jobs_date_range_for_all(params=params)
+
+            yearly_finished_count = 0
+            yearly_downloader_count = 0
+
+            example_counter = 0
+            download_job_without_input_ref_count = 0
+
+            kbObjectInfo_dict = dict()
+
+            fba_tools_bulk_export_objects_job_count = 0
+            
+            for job in stats["jobs"]:
+                if job["status"] in statuses or "finished" not in job:
+                    continue
+                else:
+                    # only want non errored finished jobs
+                    if "job_input" in job and "job_id" in job and "user" in job:
+                        in_if_count += 1
+                        method = job["job_input"]["method"]
+                        app_id = job["job_input"]["app_id"]
+                        method = method.replace(".", "/")
+                        if method in downloaders_set or app_id in downloaders_set:
+                            if method == "DataFileUtil/download_web_file":
+                                DataFileUtil_download_web_file_jobs.append(job)
+                            if "bulk_export_objects" in method:
+#                            if method == "fba_tools/bulk_export_objects":
+                                fba_tools_bulk_export_objects_jobs.append(job)
+                                fba_tools_bulk_export_objects_job_count += 1
+                            downloaders_count += 1
+                            yearly_downloader_count += 1
+                            ws_obj_id = None
+                            ws_obj_ids_list = list()
+                            job_id = job["job_id"]
+                            needs_to_be_added_to_the_db = 1
+                            if "fba_tools/bulk_export_objects" in method:
+                                # need to get references differently as it is a bulk job
+                                # need to loop over result list and then download_refs list in  'job_output': {'result': [{'downloaded_refs':[]
+                                print("IN fba_tools.bulk_export_objects")
+                                if "job_output" in job:
+                                    if job_id == "64d17b0f97c8caf1da9316ed ":
+                                        print("job id : " + str(job_id)  + " Job output: " + str(job("job_output")))
+                                    if "result" in job["job_output"]:
+                                        for job_result in job["job_output"]["result"]:
+                                            if 'downloaded_refs' in job_result:
+                                                ws_obj_ids_list.extend(job_result['downloaded_refs'])
+                                                models_bulk_dl_list.extend(job_result['downloaded_refs'])
+#NEED TO CHANGE ALL THE WS_OBJ_LIST STUFF OVER TO LIST (see if empty etc).
+                            if "kb_ObjectInfo" in method:
+                                # need to find input ref differently
+                                found_kb_info_ref = 0
+                                for param_key in job["job_input"]["params"][0]:
+                                    if "input_ref" in param_key:
+                                        ws_obj_ids_list.append(job["job_input"]["params"][0][param_key])
+                                        kbObjectInfo_dict[job_id] = ws_obj_ids_list
+                                        found_kb_info_ref = 1
+                                        #print("IN kbObjectInfo_dict checking ws_obj_id: " + str(ws_obj_ids_list))
+                                if found_kb_info_ref == 0:
+                                    print("######################")
+                                    print("UNABLE TO FIND kbinfo job_id : " + str(job_id))
+                                    print("######################")
+                            elif len(job["job_input"]['params']) > 0:
+                                for param in job["job_input"]['params']:
+                                    if "input_ref" in param:
+                                        ws_obj_ids_list.append(param["input_ref"])
+                                        
+                            if len(ws_obj_ids_list) > 0:
+                                downloaders_dict[method]["has_input_ref_count"] += 1
+                                #job_id = job["job_id"]
+                                username = job["user"]
+
+                                if len(ws_obj_ids_list) > 1:
+                                    print("ws_obj_ids_list : " + str(ws_obj_ids_list))
+                                
+                                for ws_obj_id in ws_obj_ids_list:
+                                    
+                                    
+                                    used_ws_obj_id = None
+                                    #print("ws_obj_id : " + ws_obj_id)
+                                    elements = ws_obj_id.split("/")
+                                    if len(elements) == 3:
+                                        if elements[0].isdigit() and elements[1].isdigit() and elements[2].isdigit():
+                                            used_ws_obj_id = ws_obj_id
+                                            needs_to_be_added_to_the_db = 0
+                                        else:
+                                            # had no cases of this as of this point will treat as orphaned?                                 
+                                            # need to check at end to see if this code needs to be added.
+                                            print("Unexpected triplet ref format not with digits: " + ws_obj_id)
+                                            downloading_jobs_with_orphaned_refs_count += 1
+                                            downloading_triples_not_digits_count += 1
+                                    elif job_id in problem_refs_lookup and ws_obj_id in problem_refs_lookup[job_id]:
+                                        if problem_refs_lookup[job_id][ws_obj_id] is None:
+                                            # Do nothing can not resolve the correct id
+                                            continue
+                                        else:
+                                            used_ws_obj_id =  problem_refs_lookup[job_id][ws_obj_id]
+                                            needs_to_be_added_to_the_db = 0
+                                    else:
+                                        # THE incomplete Reference needs to be tried to be resolved and then inserted into the DB
+                                        if len(elements) == 2:
+                                            has_2_elements_count += 1
+                                            #print("in elements == 2")
+                                            ws_id = None
+                                            obj_id = None
+                                            if elements[0].isdigit():
+                                                # the ws_id is a number. it is good to go
+                                                ws_id = int(elements[0])
+                                            else:
+                                                # means the ws is identified by name and not by id
+                                                # Need to search the worksaceObjects table to get the id.
+                                                # Note there is no mechanism for users to change this value
+                                                # There are no dupicate named workspaces other than null (which has 2)
+                                                workspaces_cursor = db.workspaces.find({"name":elements[0]},{"ws":1});
+                                                for record in workspaces_cursor:
+                                                    ws_id = int(record["ws"])
+                                                    #print("ws_id resolved:  " + str(ws_id))
+                                            if elements[1].isdigit():
+                                                obj_id = int(elements[1])
+                                            else:
+                                                #print("IN resolve object name")
+                                                # means the obj portion of the reference is identified by a name
+                                                # NOTE THIS NAME CAN BE CHANGED BY THE USER
+                                                # IF THE USER CHANGED THE NAME SINCE THE TIME OF THE DOWNLOAD
+                                                # THEN THAT REFERENCE IS ORPHANED
+                                                # Need to query the workspaceOBjects mongo collection
+                                                # using the name and ws_id to determine the object id
+                                                workspaceObjects_cursor = db.workspaceObjects.find({"name":elements[1],"ws":ws_id},{"id":1});
+                                                #print("elements[1] = " + elements[1])
+                                                #print("ws id : " + str(ws_id))
+                                                #print("workspaceObjects_cursor" + str(workspaceObjects_cursor))
+                                                for record in workspaceObjects_cursor:
+                                                    #print("Found wsObjects record : " + str(record) ) 
+                                                    obj_id = int(record["id"])
+                                                    #print("ws_obj_id : " + ws_obj_id + " resolved to : " + str(obj_id))
+                                    
+                                            if obj_id is not None and ws_id is not None:
+                                                # Need to do time machine to determine which object version was active
+                                                # at the time of the Downloading job start time
+                                                #print("Found input ref: " + ws_obj_id)
+                                                job_start_epoch = job["running"] / 1000
+                                                #print("job_start_epoch : " + str(job_start_epoch))
+                                                max_save_date_epoch = 0
+                                                max_save_date_version = 0
+                                                workspaceObjVersions_cursor = db.workspaceObjVersions.find({"ws": int(ws_id), "id": int(obj_id)},
+                                                                                                           {"ws": 1, "id": 1, "ver": 1, "savedate": 1, "_id": 0})
+                                                for record in workspaceObjVersions_cursor:
+                                                    iso_savedate = record["savedate"]
+                                                    iso_savedate_string = str(iso_savedate)
+                                                    iso_savedate_string_elements = iso_savedate_string.split(".")
+                                                    if len(iso_savedate_string_elements) == 1:
+                                                        iso_savedate_string = iso_savedate_string + ".000000"
+                                                    utc_dt = datetime.strptime(iso_savedate_string,'%Y-%m-%d %H:%M:%S.%f')
+                                                                           #'%Y-%m-%dT%H:%M:%S.%fZ')
+                                                    savedate_epoch = (utc_dt - datetime(1970, 1, 1)).total_seconds()
+                                                    #print("savedate_epoch : " + str(savedate_epoch))
+                                                    if (job_start_epoch > savedate_epoch and savedate_epoch > max_save_date_epoch):
+                                                        max_save_date_epoch = savedate_epoch
+                                                        max_save_date_version = record["ver"]
+
+
+
+                                                        
+                                                    #if (max_save_date_version > 1):
+                                                    #   print("FINAL VERSION saved : " + str(max_save_date_version))
+                                                    used_ws_obj_id = str(ws_id) + "/" + str(obj_id) + "/" + str(max_save_date_version)
+                                                    #   print("used_ws_obj_id : " + used_ws_obj_id)
+                                            else:
+                                                # One of the ws_id or obj_id is None most likely means orphaned reference due to
+                                                #object name change
+                                                used_ws_obj_id = None
+                                                downloading_jobs_with_orphaned_refs_count += 1
+                                        else:
+                                            print("WS OBJ ID was a different format then expected")
+                                            used_ws_obj_id = None
+                                            downloading_jobs_with_orphaned_refs_count += 1
+                                    # END OF TRYING TO DETERMINE FULL WS_OBJ_ID
+
+                                    # ENTER RECORD INTO DOWNLOADER_RESULTS
+                                    if used_ws_obj_id not in downloader_results:
+                                        downloader_results[used_ws_obj_id] = dict()
+                                    if username not in downloader_results[used_ws_obj_id]:
+                                        downloader_results[used_ws_obj_id][username] = list()
+                                    downloader_results[used_ws_obj_id][username].append(job_id)
+
+                                    if needs_to_be_added_to_the_db == 1 :
+                                        #need to do insert
+                                        input = (ws_obj_id, used_ws_obj_id, job_id)
+                                        prep_cursor.execute(downloaders_problematic_obj_ids_insert_statement, input)
+                                        insert_prob_refs_count += 1
+
+                                    #downloader_results[ws_obj_id][job_id] = username
+                                    #downloader_results[used_ws_obj_id] +=1
+                                    #downloader_results[ws_obj_id].add(username)
+                                    downloaders_with_ws_id_count += 1
+                                    #if example_counter < 10:
+                                    #   print("Example input_ws_obj_id : " + ws_obj_id + " resolved to used_ws_obj_id : " + used_ws_obj_id)
+                                    #   example_counter += 1
+                                    #else:
+                                    #    print("EARLY EXIT: DOWNLOADER RESULTS : " + str(downloader_results))
+                                    #    exit()
+                            else:
+                                download_job_without_input_ref_count += 1
+                                downloaders_dict[method]["no_input_ref_count"] += 1
+                    finished_job_count += 1
+                    yearly_finished_count += 1
+            print("Yearly downloader_count : " + str(yearly_downloader_count))
+            print("Yearly finished_count : " + str(yearly_finished_count))
+            print("Yearly download_job_without_input_ref_count : " + str(download_job_without_input_ref_count))
+            print("Year to do end: " + str(year_to_do) + "_" + str(part_of_year) + " :: " + str(time.time() - yearly_start_time) + " seconds")
+            print("kbObjectInfo_dict : " + str(kbObjectInfo_dict))
+            print("kbObjectInfo_dict len : " + str(len(kbObjectInfo_dict)))
+
+
+    print(str(downloaders_dict))
+
+#    i = 0
+#    while i < 3:
+#        print("DataFileUtil_download_web_file_jobs number : " + str(i))
+#        print(DataFileUtil_download_web_file_jobs[i])
+#        i += 1
+
+#    i = 0
+#    while i < 3:
+#        print("fba_tools_bulk_export_objects_jobs number : " + str(i))
+#        print(fba_tools_bulk_export_objects_jobs[i])
+#        i += 1
+
+#    i = -10
+#    while i < 0: 
+#        print("fba_tools_bulk_export_objects_jobs number : " + str(i))
+#        print(fba_tools_bulk_export_objects_jobs[i])
+#        i += 1
+
+
+    print("TOTAL length of fba_tools_bulk_export_objects_jobs : " + str(len(fba_tools_bulk_export_objects_jobs)))
+    print("counter : " + str(fba_tools_bulk_export_objects_job_count))
+#    print("DOWNLOADER RESULTS:")
+#    print(str(downloader_results))
+#    loop_count = 0
+    db_connection.commit()
+    print("Finished job count : "  + str(finished_job_count))
+    print("In If count : "  + str(in_if_count))
+    print("Downloaders job count : "  + str(downloaders_count))
+
+    print("Downloaders with ws_id count : "  + str(downloaders_with_ws_id_count))
+    print("Has 2 elements count : has_2_elements_count : " + str(has_2_elements_count))
+    print("FINAL DOWNLADER METHODS WITH AND WITHOUT INPUT REFS : ")
+    
+    print("insert_prob_refs_count :  " + str(insert_prob_refs_count))
+
+    print("Downloaders_results length : " + str(len(downloader_results)))
+    return downloader_results
+
+def get_downloaders_lookup():
+
+    start_time = time.time()    
+    main_function_start_time = time.time()
+    
+    downloaders_set = get_downloaders_set(cursor)
+    problem_refs_lookup = get_existing_problem_refs(cursor)
+    downloader_results = pull_downloading_jobs(downloaders_set, problem_refs_lookup)
+    print("--- Total TIME for building downloading lookups %s seconds ---" % (time.time() - start_time))
+    return downloader_results
+
+
+
+#downloader_results = get_downloaders_lookup()
+#print("pre bulk dl print")
+#print("Length models_bulk_dl_list : " + str(len(models_bulk_dl_list)))
+#for bulk_dl_id in models_bulk_dl_list:
+#    print("Bulk downloader id : " + str(bulk_dl_id) + " --- bulk_dl downloader result : " + str(downloader_results[bulk_dl_id]))
+#print("post bulk dl print")
+#i = 0
+#for downloader_key in downloader_results:
+#    print("Downloader key : " + str(downloader_key) + " downloader_results : " + str(downloader_results[downloader_key]))
+#    if i > 10:
+#        break
+#    i = i + 1
+#print("Downloader_results : " + str(downloader_results['49114/8/1']))    
+
+
diff --git a/sql_create_statements/sql_reporting_views_and_tables.sql b/sql_create_statements/sql_reporting_views_and_tables.sql
index 294cf9c..b8ffa0c 100644
--- a/sql_create_statements/sql_reporting_views_and_tables.sql
+++ b/sql_create_statements/sql_reporting_views_and_tables.sql
@@ -785,6 +785,29 @@ select max(record_date) as record_date, ws_id
 from metrics.workspaces w
 group by ws_id;
 
+#IN METRICS
+CREATE OR REPLACE table metrics.workspaces_current as
+(select ws.*
+from metrics.workspaces ws inner join
+metrics.hv_workspaces_max_date wsmd
+on ws.ws_id = wsmd.ws_id and
+ws.record_date = wsmd.record_date);
+
+alter table metrics.workspaces_current
+add unique (ws_id); 
+
+#IN METRICS
+CREATE OR REPLACE table metrics.workspaces_current_plus_users as
+(select wc.* , bdws.orig_saver_count, bdws.non_orig_saver_count, bdws.orig_saver_size_GB, bdws.non_orig_saver_size_GB
+from metrics.user_info ui
+inner join metrics.workspaces_current wc on ui.username = wc.username
+left outer join blobstore_detail_by_ws bdws on wc.ws_id = bdws.ws_id
+where ui.kb_internal_user = 0
+and wc.narrative_version > 0
+and is_deleted = 0
+and is_temporary = 0);
+
+
 #IN METRICS_REPORTING
 CREATE OR REPLACE VIEW metrics_reporting.workspaces_current as
 select ws.*
@@ -1638,6 +1661,8 @@ group by ws_id );
 Query OK, 108871 rows affected (6 min 52.38 sec)
 Records: 108871  Duplicates: 0  Warnings: 0
 
+alter table blobstore_detail_by_ws add index (ws_id);
+
 create or replace view blobstore_detail_by_ws_monthly as
 (select ws_id, DATE_FORMAT(`save_date`,'%Y-%m') as month,
 sum(orig_saver) as orig_saver_count, 0 - sum((orig_saver - 1)) as non_orig_saver_count,

From 6e5762e3f97044fb4c7edb5790b78920f18d1460 Mon Sep 17 00:00:00 2001
From: Jason Baumohl <jkbaumohl@lbl.gov>
Date: Tue, 8 Oct 2024 04:07:20 +0000
Subject: [PATCH 08/11] Added blobstore columns to WS report

---
 source/custom_scripts/dump_narratives_results.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/source/custom_scripts/dump_narratives_results.py b/source/custom_scripts/dump_narratives_results.py
index 1bc14f2..7af4ebd 100644
--- a/source/custom_scripts/dump_narratives_results.py
+++ b/source/custom_scripts/dump_narratives_results.py
@@ -30,12 +30,17 @@ def dump_narratives_results():
 
     # CHANGE QUERY HERE
     #    Query for Adam Narratives dump of information:
-    query = ("select wc.* from metrics.user_info ui inner join metrics_reporting.workspaces_current wc on ui.username = wc.username "
-             "where ui.kb_internal_user = 0 and wc.narrative_version > 0 and is_deleted = 0 and is_temporary = 0")
+    query = ("select * from metrics.workspaces_current_plus_users ")
+#    query = ("select wc.* from metrics.user_info ui inner join metrics_reporting.workspaces_current wc on ui.username = wc.username "
+#             "where ui.kb_internal_user = 0 and wc.narrative_version > 0 and is_deleted = 0 and is_temporary = 0")
     #    Headers for Adam's narratives query (Note if more columns added, may need to update this
     print("ws_id\tusername\tmod_date\tinitial_save_date\trecord_date\ttop_lvl_object_count\ttotal_object_count\tvisible_app_cells_count\tcode_cells_count\t"
           "narrative_version\thidden_object_count\tdeleted_object_count\ttotal_size\ttop_lvl_size\tis_public\tis_temporary\tis_deleted\tnumber_of_shares\t"
-          "num_nar_obj_ids\tstatic_narratives_count\tstatic_narratives_views\tunique_object_types_count")
+          "num_nar_obj_ids\tstatic_narratives_count\tstatic_narratives_views\tunique_object_types_count\t"
+          "orig_saver_count\tnon_orig_saver_count\torig_saver_size_GB\tnon_orig_saver_size_GB")
+          )
+          
+#          "num_nar_obj_ids\tstatic_narratives_count\tstatic_narratives_views\tunique_object_types_count")
 
     cursor.execute(query)
     row_values = list()

From 657a8affb50b78f3502b531447e4f2390477fdcf Mon Sep 17 00:00:00 2001
From: Jason Baumohl <jkbaumohl@lbl.gov>
Date: Tue, 8 Oct 2024 04:16:53 +0000
Subject: [PATCH 09/11] Added blobstore columns to WS report

---
 source/custom_scripts/dump_narratives_results.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/source/custom_scripts/dump_narratives_results.py b/source/custom_scripts/dump_narratives_results.py
index 7af4ebd..1891bca 100644
--- a/source/custom_scripts/dump_narratives_results.py
+++ b/source/custom_scripts/dump_narratives_results.py
@@ -38,7 +38,6 @@ def dump_narratives_results():
           "narrative_version\thidden_object_count\tdeleted_object_count\ttotal_size\ttop_lvl_size\tis_public\tis_temporary\tis_deleted\tnumber_of_shares\t"
           "num_nar_obj_ids\tstatic_narratives_count\tstatic_narratives_views\tunique_object_types_count\t"
           "orig_saver_count\tnon_orig_saver_count\torig_saver_size_GB\tnon_orig_saver_size_GB")
-          )
           
 #          "num_nar_obj_ids\tstatic_narratives_count\tstatic_narratives_views\tunique_object_types_count")
 

From aa84b18f587f6941f55539236cc675d15340a7f9 Mon Sep 17 00:00:00 2001
From: Jason Baumohl <jkbaumohl@lbl.gov>
Date: Thu, 10 Oct 2024 00:37:08 +0000
Subject: [PATCH 10/11] more git cleanup commits

---
 bin/dump_get_copy_info_for_narratives.sh               |  3 +++
 source/daily_cron_jobs/upload_app_stats.py             |  4 ++--
 source/daily_cron_jobs/upload_blobstore_details.py     | 10 +++++-----
 source/daily_cron_jobs/upload_blobstore_stats.py       |  4 ++--
 .../upload_elasticsearch_usersmry_stats.py             |  4 ++--
 5 files changed, 14 insertions(+), 11 deletions(-)
 create mode 100755 bin/dump_get_copy_info_for_narratives.sh

diff --git a/bin/dump_get_copy_info_for_narratives.sh b/bin/dump_get_copy_info_for_narratives.sh
new file mode 100755
index 0000000..55d639d
--- /dev/null
+++ b/bin/dump_get_copy_info_for_narratives.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+python custom_scripts/get_copy_info_for_narratives.py
diff --git a/source/daily_cron_jobs/upload_app_stats.py b/source/daily_cron_jobs/upload_app_stats.py
index 5be45fa..4e90f96 100644
--- a/source/daily_cron_jobs/upload_app_stats.py
+++ b/source/daily_cron_jobs/upload_app_stats.py
@@ -7,8 +7,8 @@
 print("############################################")
 print("App Stats Upload (UTC): " + str(datetime.datetime.utcnow()))
 start_time = time.time()
-#start_date = "2021-06-01"
-#end_date = "2022-07-20"
+#start_date = "2023-07-27"
+#end_date = "2023-08-01"
 #methods_upload_app_stats.upload_user_app_stats(start_date, end_date)
 methods_upload_app_stats.upload_user_app_stats()
 print("Uploading app stats took ", time.time() - start_time, " seconds to run")
diff --git a/source/daily_cron_jobs/upload_blobstore_details.py b/source/daily_cron_jobs/upload_blobstore_details.py
index a30d1c4..1f0e798 100644
--- a/source/daily_cron_jobs/upload_blobstore_details.py
+++ b/source/daily_cron_jobs/upload_blobstore_details.py
@@ -14,11 +14,11 @@
 
 
 start_time = time.time()
-start_date = "2024-09-07"
-end_date = "2024-09-28"
-methods_upload_blobstore_details.process_blobstore_details_data(start_date,end_date)
-#methods_upload_blobstore_details.process_blobstore_details_data()
-#print("Uploading blobstore details took ", time.time() - start_time, " seconds to run")
+#start_date = "2024-09-07"
+#end_date = "2024-10-28"
+#methods_upload_blobstore_details.process_blobstore_details_data(start_date,end_date)
+methods_upload_blobstore_details.process_blobstore_details_data()
+print("Uploading blobstore details took ", time.time() - start_time, " seconds to run")
 
 
 start_date=datetime.datetime.combine(yesterday, datetime.datetime.min.time())
diff --git a/source/daily_cron_jobs/upload_blobstore_stats.py b/source/daily_cron_jobs/upload_blobstore_stats.py
index 3f3fcc2..ba4b2c1 100644
--- a/source/daily_cron_jobs/upload_blobstore_stats.py
+++ b/source/daily_cron_jobs/upload_blobstore_stats.py
@@ -7,8 +7,8 @@
 print("############################################")
 print("Blobstore Stats Upload (UTC): " + str(datetime.datetime.utcnow()))
 start_time = time.time()
-#start_date = "2021-09-10"
-#end_date = "2021-10-31"
+#start_date = "2023-07-27"
+#end_date = "2023-08-01"
 #methods_upload_blobstore_stats.process_blobstore_stats_data(start_date,end_date)
 methods_upload_blobstore_stats.process_blobstore_stats_data()
 print("Uploading blobstore stats took ", time.time() - start_time, " seconds to run")
diff --git a/source/daily_cron_jobs/upload_elasticsearch_usersmry_stats.py b/source/daily_cron_jobs/upload_elasticsearch_usersmry_stats.py
index 881b0d6..40013bf 100644
--- a/source/daily_cron_jobs/upload_elasticsearch_usersmry_stats.py
+++ b/source/daily_cron_jobs/upload_elasticsearch_usersmry_stats.py
@@ -13,8 +13,8 @@
 start_time = time.time()
 # start_date = "month-day-year"
 # end_date = "month-day-year"
-#start_date = "10-10-2021"
-#end_date = "10-28-2021"
+#start_date = "07-27-2023"
+#end_date = "08-01-2021"
 #return_capture = methods_upload_elasticsearch_sumrydicts.elastic_summary_dictionaries(
 #    start_date, end_date
 #)

From 11ef6dbd4f3987d53b5d3a4e30591526bbb8b815 Mon Sep 17 00:00:00 2001
From: Jason Baumohl <jkbaumohl@lbl.gov>
Date: Thu, 10 Oct 2024 00:55:24 +0000
Subject: [PATCH 11/11] master cron job including blobstore_details

---
 bin/master_cron_shell.sh | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/bin/master_cron_shell.sh b/bin/master_cron_shell.sh
index 2f05b37..ef60061 100755
--- a/bin/master_cron_shell.sh
+++ b/bin/master_cron_shell.sh
@@ -14,6 +14,8 @@ python daily_cron_jobs/upload_public_narratives_count.py
 
 python daily_cron_jobs/upload_user_orcid_count.py
 
+python daily_cron_jobs/upload_blobstore_details.py
+
 python daily_cron_jobs/make_reporting_tables.py