Current working code for JSON based purge/restore of data

CSV export kept on hold for now as restoring from CSV is complicated due to loss of data structure. This commit includes working code for export as JSON file and import from JSON file.
e-mission · Jan 9, 2024 · 78979ff · 78979ff
1 parent ae6eae6
commit 78979ff
Show file tree

Hide file tree

Showing 2 changed files with 10 additions and 4 deletions.
diff --git a/bin/purge_user_timeseries.py b/bin/purge_user_timeseries.py
@@ -52,8 +52,8 @@ def purgeUserTimeseries(user_uuid, user_email=None, dir_name=DEFAULT_DIR_NAME, f
     else:
         exportOldTimeseriesAsCsv(user_id, last_ts_run, dir_name, file_prefix)
 
-    # res = edb.get_timeseries_db().delete_many({"user_id": user_id, "metadata.write_ts": { "$lt": last_ts_run}})
-    # logging.info("{} deleted entries since {}".format(res.deleted_count, datetime.fromtimestamp(last_ts_run)))
+    result = edb.get_timeseries_db().delete_many({"user_id": user_id, "metadata.write_ts": { "$lt": last_ts_run}})
+    logging.debug("{} deleted entries since {}".format(result.deleted_count, datetime.fromtimestamp(last_ts_run)))
 
 if __name__ == '__main__':
     logging.basicConfig(level=logging.DEBUG)

diff --git a/bin/restore_user_timeseries.py b/bin/restore_user_timeseries.py
@@ -9,6 +9,7 @@
 import emission.storage.pipeline_queries as esp
 import pandas as pd
 import pymongo
+from bson.binary import Binary
 from bson import ObjectId
 import json
 
@@ -21,9 +22,14 @@ def restoreUserTimeseries(filename):
 
     with open(filename, 'r') as file:
         data = json.load(file)
-    result = edb.get_timeseries_db().insert_many(data)
 
-    logging.info("{} documents successfully inserted".format(len(result.inserted_ids)))
+    # Converting _id to ObjectId and UUID string to binary BinData
+    for document in data:
+        document["_id"] = ObjectId(document["_id"])
+        document["user_id"] = Binary(uuid.UUID(document["user_id"]).bytes, 0x03)
+
+    result = edb.get_timeseries_db().insert_many(data)
+    logging.debug("{} documents successfully inserted".format(len(result.inserted_ids)))
 
 if __name__ == '__main__':
     logging.basicConfig(level=logging.DEBUG)