Skip to content

Commit

Permalink
Current working code for JSON based purge/restore of data
Browse files Browse the repository at this point in the history
CSV export kept on hold for now as restoring from CSV is complicated due to loss of data structure.

This commit includes working code for export as JSON file and import from JSON file.
  • Loading branch information
Mahadik, Mukul Chandrakant authored and Mahadik, Mukul Chandrakant committed Jan 9, 2024
1 parent ae6eae6 commit 78979ff
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 4 deletions.
4 changes: 2 additions & 2 deletions bin/purge_user_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ def purgeUserTimeseries(user_uuid, user_email=None, dir_name=DEFAULT_DIR_NAME, f
else:
exportOldTimeseriesAsCsv(user_id, last_ts_run, dir_name, file_prefix)

# res = edb.get_timeseries_db().delete_many({"user_id": user_id, "metadata.write_ts": { "$lt": last_ts_run}})
# logging.info("{} deleted entries since {}".format(res.deleted_count, datetime.fromtimestamp(last_ts_run)))
result = edb.get_timeseries_db().delete_many({"user_id": user_id, "metadata.write_ts": { "$lt": last_ts_run}})
logging.debug("{} deleted entries since {}".format(result.deleted_count, datetime.fromtimestamp(last_ts_run)))

if __name__ == '__main__':
logging.basicConfig(level=logging.DEBUG)
Expand Down
10 changes: 8 additions & 2 deletions bin/restore_user_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import emission.storage.pipeline_queries as esp
import pandas as pd
import pymongo
from bson.binary import Binary
from bson import ObjectId
import json

Expand All @@ -21,9 +22,14 @@ def restoreUserTimeseries(filename):

with open(filename, 'r') as file:
data = json.load(file)
result = edb.get_timeseries_db().insert_many(data)

logging.info("{} documents successfully inserted".format(len(result.inserted_ids)))
# Converting _id to ObjectId and UUID string to binary BinData
for document in data:
document["_id"] = ObjectId(document["_id"])
document["user_id"] = Binary(uuid.UUID(document["user_id"]).bytes, 0x03)

result = edb.get_timeseries_db().insert_many(data)
logging.debug("{} documents successfully inserted".format(len(result.inserted_ids)))

if __name__ == '__main__':
logging.basicConfig(level=logging.DEBUG)
Expand Down

0 comments on commit 78979ff

Please sign in to comment.