From 21b30a0426ec059b830411dc99c8c7ed4bda0885 Mon Sep 17 00:00:00 2001 From: Alexander Simpson Date: Thu, 18 May 2023 11:31:23 +0100 Subject: [PATCH 01/11] Create AnalysisDashboard API with support for creating snapshots --- analysis_dashboard/__init__.py | 0 analysis_dashboard/analysis_dashboard.py | 87 ++++++++++++++++++++++++ analysis_dashboard/data_models.py | 35 ++++++++++ 3 files changed, 122 insertions(+) create mode 100644 analysis_dashboard/__init__.py create mode 100644 analysis_dashboard/analysis_dashboard.py create mode 100644 analysis_dashboard/data_models.py diff --git a/analysis_dashboard/__init__.py b/analysis_dashboard/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/analysis_dashboard/analysis_dashboard.py b/analysis_dashboard/analysis_dashboard.py new file mode 100644 index 0000000..f5957a1 --- /dev/null +++ b/analysis_dashboard/analysis_dashboard.py @@ -0,0 +1,87 @@ +from core_data_modules.logging import Logger +from firebase_admin import firestore, storage + +from analysis_dashboard.data_models import AnalysisSnapshot +from util.firebase_utils import initialize_firebase_app + +log = Logger(__name__) + + +class AnalysisDashboard: + def __init__(self, firebase_app): + """ + Client for accessing an Analysis Dashboard Firebase project. + + :param firebase_app: Firebase app. + :type firebase_app: firebase_admin.App + """ + self._firebase_app = firebase_app + + @classmethod + def init_from_credentials(cls, cert, app_name="AnalysisDashboard"): + """ + :param cert: Firestore service account certificate, as a path to a file or a dictionary. + :type cert: str | dict + :param app_name: Name to give the Firebase app instance used to connect. + :type app_name: str + :return: + :rtype: AnalysisDashboard + """ + return cls(initialize_firebase_app(cert, app_name)) + + def create_snapshot(self, series_id, files): + """ + Creates a new analysis snapshot in Firebase. + + :param series_id: Id of the series the snapshot is for. + :type series_id: str + :param files: Files to upload as part of the snapshot, as a dictionary of (local file path) -> (blob name). + :type files: dict of str -> str + """ + snapshot = AnalysisSnapshot( + datasets=list(files.values()) + ) + + log.info(f"Creating new analysis snapshot with id {snapshot.snapshot_id}...") + for i, (local_file_path, blob_name) in enumerate(files.items()): + log.info(f"Uploading file {i + 1}/{len(files)} to storage") + self.upload_file_to_storage( + file_path=local_file_path, + blob_name=f"series/{series_id}/snapshots/{snapshot.snapshot_id}/files/{blob_name}", + bucket_name="test" + ) + + log.info(f"Writing analysis snapshot document to Firestore...") + self.creat_snapshot_doc_in_firestore(series_id, snapshot) + + def creat_snapshot_doc_in_firestore(self, series_id, analysis_snapshot): + """ + Writes a snapshot document to the AnalysisDashboard firestore in 'create' mode. + + If a snapshot with this snapshot id and series id already exists, this function will fail. + + :param series_id: Id of the series this snapshot is for. + :type series_id: str + :param analysis_snapshot: Analysis snapshot document to write. + :type analysis_snapshot: analysis_dashboard.data_models.AnalysisSnapshot + """ + firestore_client = firestore.client(self._firebase_app) + firestore_client \ + .document(f"series/{series_id}/snapshots/{analysis_snapshot.snapshot_id}") \ + .create(analysis_snapshot.to_dict()) + + def upload_file_to_storage(self, file_path, blob_name, bucket_name): + """ + Uploads a file from the local disk to an Analysis Dashboard storage bucket. + + :param file_path: Path on local disk to the file to upload. + :type file_path: str + :param blob_name: Name to give the blob in storage. + :type blob_name: str + :param bucket_name: Name of the bucket to upload the file to. + :type bucket_name: str + """ + bucket = storage.bucket(bucket_name, app=self._firebase_app) + blob = bucket.blob(blob_name) + log.info(f"Uploading '{file_path}' -> '{blob.public_url}'...") + blob.upload_from_filename(file_path) diff --git a/analysis_dashboard/data_models.py b/analysis_dashboard/data_models.py new file mode 100644 index 0000000..df8cc0e --- /dev/null +++ b/analysis_dashboard/data_models.py @@ -0,0 +1,35 @@ +import uuid + + +class AnalysisSnapshot: + def __init__(self, datasets, snapshot_id=None): + """ + Represents a single version of a piece of analysis, describing which datasets are available and how those + datasets were generated. + + :param datasets: List of datasets available. + TODO rename to files? + :type datasets: list of str + TODO change to dict of dataset_name -> bucket_name? + :param snapshot_id: Id of this analysis snapshot. If None, a message id will automatically be generated in + the constructor. + :type snapshot_id: str | None + """ + if snapshot_id is None: + snapshot_id = str(uuid.uuid4()) + + self.snapshot_id = snapshot_id + self.datasets = datasets + + def to_dict(self): + return { + "snapshot_id": self.snapshot_id, + "datasets": self.datasets + } + + @classmethod + def from_dict(cls, d): + return cls( + d["snapshot_id"], + d["datasets"] + ) From a2cfe1a879b6e4efd415b444bdcf2ce9c3e21562 Mon Sep 17 00:00:00 2001 From: Alexander Simpson Date: Thu, 18 May 2023 11:37:20 +0100 Subject: [PATCH 02/11] Move AnalysisSnapshot from data_models.py -> data_models/analysis_snapshot.py --- analysis_dashboard/data_models/__init__.py | 1 + .../{data_models.py => data_models/analysis_snapshot.py} | 0 2 files changed, 1 insertion(+) create mode 100644 analysis_dashboard/data_models/__init__.py rename analysis_dashboard/{data_models.py => data_models/analysis_snapshot.py} (100%) diff --git a/analysis_dashboard/data_models/__init__.py b/analysis_dashboard/data_models/__init__.py new file mode 100644 index 0000000..1492f5b --- /dev/null +++ b/analysis_dashboard/data_models/__init__.py @@ -0,0 +1 @@ +from .analysis_snapshot import AnalysisSnapshot diff --git a/analysis_dashboard/data_models.py b/analysis_dashboard/data_models/analysis_snapshot.py similarity index 100% rename from analysis_dashboard/data_models.py rename to analysis_dashboard/data_models/analysis_snapshot.py From 3eef7ed6ff08936195f7f3c132d4861ce4b785e3 Mon Sep 17 00:00:00 2001 From: Alexander Simpson Date: Fri, 19 May 2023 15:44:07 +0100 Subject: [PATCH 03/11] Add bucket_name as an argument to AnalysisDashboard.create_snapshot --- analysis_dashboard/analysis_dashboard.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/analysis_dashboard/analysis_dashboard.py b/analysis_dashboard/analysis_dashboard.py index f5957a1..9b36811 100644 --- a/analysis_dashboard/analysis_dashboard.py +++ b/analysis_dashboard/analysis_dashboard.py @@ -29,7 +29,7 @@ def init_from_credentials(cls, cert, app_name="AnalysisDashboard"): """ return cls(initialize_firebase_app(cert, app_name)) - def create_snapshot(self, series_id, files): + def create_snapshot(self, series_id, files, bucket_name): """ Creates a new analysis snapshot in Firebase. @@ -37,6 +37,8 @@ def create_snapshot(self, series_id, files): :type series_id: str :param files: Files to upload as part of the snapshot, as a dictionary of (local file path) -> (blob name). :type files: dict of str -> str + :param bucket_name: Name of bucket to upload files to e.g. 'analysis-dashboard.appspot.com' + :type bucket_name: str """ snapshot = AnalysisSnapshot( datasets=list(files.values()) @@ -48,7 +50,7 @@ def create_snapshot(self, series_id, files): self.upload_file_to_storage( file_path=local_file_path, blob_name=f"series/{series_id}/snapshots/{snapshot.snapshot_id}/files/{blob_name}", - bucket_name="test" + bucket_name=bucket_name ) log.info(f"Writing analysis snapshot document to Firestore...") From bbb87607f6192ae9a04a560c68c0cc3cae633986 Mon Sep 17 00:00:00 2001 From: Alexander Simpson Date: Fri, 19 May 2023 15:44:23 +0100 Subject: [PATCH 04/11] Fix typo in name of AnalysisDashboard.create_snapshot_doc_in_firestore --- analysis_dashboard/analysis_dashboard.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/analysis_dashboard/analysis_dashboard.py b/analysis_dashboard/analysis_dashboard.py index 9b36811..210fa83 100644 --- a/analysis_dashboard/analysis_dashboard.py +++ b/analysis_dashboard/analysis_dashboard.py @@ -54,9 +54,9 @@ def create_snapshot(self, series_id, files, bucket_name): ) log.info(f"Writing analysis snapshot document to Firestore...") - self.creat_snapshot_doc_in_firestore(series_id, snapshot) + self.create_snapshot_doc_in_firestore(series_id, snapshot) - def creat_snapshot_doc_in_firestore(self, series_id, analysis_snapshot): + def create_snapshot_doc_in_firestore(self, series_id, analysis_snapshot): """ Writes a snapshot document to the AnalysisDashboard firestore in 'create' mode. From cf67f3660a109933ac5878730c9760c78a03871c Mon Sep 17 00:00:00 2001 From: Alexander Simpson Date: Fri, 19 May 2023 15:50:58 +0100 Subject: [PATCH 05/11] Import AnalysisDashboard in analysis_dashboard.__init__ --- analysis_dashboard/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/analysis_dashboard/__init__.py b/analysis_dashboard/__init__.py index e69de29..50479fc 100644 --- a/analysis_dashboard/__init__.py +++ b/analysis_dashboard/__init__.py @@ -0,0 +1 @@ +from .analysis_dashboard import AnalysisDashboard From 2fcc7f221396d75a7e83801fb6d50a208ef2bf68 Mon Sep 17 00:00:00 2001 From: Alexander Simpson Date: Fri, 19 May 2023 16:34:06 +0100 Subject: [PATCH 06/11] Print size of files being uploaded --- analysis_dashboard/analysis_dashboard.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/analysis_dashboard/analysis_dashboard.py b/analysis_dashboard/analysis_dashboard.py index 210fa83..6bbe408 100644 --- a/analysis_dashboard/analysis_dashboard.py +++ b/analysis_dashboard/analysis_dashboard.py @@ -1,3 +1,5 @@ +from os.path import getsize + from core_data_modules.logging import Logger from firebase_admin import firestore, storage @@ -85,5 +87,5 @@ def upload_file_to_storage(self, file_path, blob_name, bucket_name): """ bucket = storage.bucket(bucket_name, app=self._firebase_app) blob = bucket.blob(blob_name) - log.info(f"Uploading '{file_path}' -> '{blob.public_url}'...") + log.info(f"Uploading '{file_path}' -> '{blob.public_url}' ({getsize(file_path)} bytes)...") blob.upload_from_filename(file_path) From b76775b8283709335cc812dfef499cb3a4c02fac Mon Sep 17 00:00:00 2001 From: Alexander Simpson Date: Fri, 19 May 2023 16:41:11 +0100 Subject: [PATCH 07/11] Log when created new analysis snapshot --- analysis_dashboard/analysis_dashboard.py | 1 + 1 file changed, 1 insertion(+) diff --git a/analysis_dashboard/analysis_dashboard.py b/analysis_dashboard/analysis_dashboard.py index 6bbe408..26ddb4f 100644 --- a/analysis_dashboard/analysis_dashboard.py +++ b/analysis_dashboard/analysis_dashboard.py @@ -57,6 +57,7 @@ def create_snapshot(self, series_id, files, bucket_name): log.info(f"Writing analysis snapshot document to Firestore...") self.create_snapshot_doc_in_firestore(series_id, snapshot) + log.info(f"Created new analysis snapshot with id {snapshot.snapshot_id}") def create_snapshot_doc_in_firestore(self, series_id, analysis_snapshot): """ From efb68bd6718656534ec9cf0d1d369e079c4d8041 Mon Sep 17 00:00:00 2001 From: Alexander Simpson Date: Fri, 26 May 2023 13:15:30 +0100 Subject: [PATCH 08/11] Rename datasets -> files --- analysis_dashboard/analysis_dashboard.py | 2 +- analysis_dashboard/data_models/analysis_snapshot.py | 10 ++++------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/analysis_dashboard/analysis_dashboard.py b/analysis_dashboard/analysis_dashboard.py index 26ddb4f..5f99292 100644 --- a/analysis_dashboard/analysis_dashboard.py +++ b/analysis_dashboard/analysis_dashboard.py @@ -43,7 +43,7 @@ def create_snapshot(self, series_id, files, bucket_name): :type bucket_name: str """ snapshot = AnalysisSnapshot( - datasets=list(files.values()) + files=list(files.values()) ) log.info(f"Creating new analysis snapshot with id {snapshot.snapshot_id}...") diff --git a/analysis_dashboard/data_models/analysis_snapshot.py b/analysis_dashboard/data_models/analysis_snapshot.py index df8cc0e..159eeb6 100644 --- a/analysis_dashboard/data_models/analysis_snapshot.py +++ b/analysis_dashboard/data_models/analysis_snapshot.py @@ -2,15 +2,13 @@ class AnalysisSnapshot: - def __init__(self, datasets, snapshot_id=None): + def __init__(self, files, snapshot_id=None): """ Represents a single version of a piece of analysis, describing which datasets are available and how those datasets were generated. - :param datasets: List of datasets available. - TODO rename to files? - :type datasets: list of str - TODO change to dict of dataset_name -> bucket_name? + :param files: List of files available. + :type files: list of str :param snapshot_id: Id of this analysis snapshot. If None, a message id will automatically be generated in the constructor. :type snapshot_id: str | None @@ -19,7 +17,7 @@ def __init__(self, datasets, snapshot_id=None): snapshot_id = str(uuid.uuid4()) self.snapshot_id = snapshot_id - self.datasets = datasets + self.datasets = files def to_dict(self): return { From 4cf4150f4b51587545b1f3db030c28560c5ad95e Mon Sep 17 00:00:00 2001 From: Alexander Simpson Date: Fri, 26 May 2023 14:55:12 +0100 Subject: [PATCH 09/11] Rename datasets -> files in serialization --- analysis_dashboard/data_models/analysis_snapshot.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/analysis_dashboard/data_models/analysis_snapshot.py b/analysis_dashboard/data_models/analysis_snapshot.py index 159eeb6..8cec666 100644 --- a/analysis_dashboard/data_models/analysis_snapshot.py +++ b/analysis_dashboard/data_models/analysis_snapshot.py @@ -22,12 +22,12 @@ def __init__(self, files, snapshot_id=None): def to_dict(self): return { "snapshot_id": self.snapshot_id, - "datasets": self.datasets + "files": self.datasets } @classmethod def from_dict(cls, d): return cls( d["snapshot_id"], - d["datasets"] + d["files"] ) From d7b9b3fe5b659f64f63d714258d7f1feb94320b8 Mon Sep 17 00:00:00 2001 From: Alexander Simpson Date: Fri, 26 May 2023 14:56:13 +0100 Subject: [PATCH 10/11] Add placeholders for tags in serialization --- analysis_dashboard/data_models/analysis_snapshot.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/analysis_dashboard/data_models/analysis_snapshot.py b/analysis_dashboard/data_models/analysis_snapshot.py index 8cec666..563168a 100644 --- a/analysis_dashboard/data_models/analysis_snapshot.py +++ b/analysis_dashboard/data_models/analysis_snapshot.py @@ -7,6 +7,8 @@ def __init__(self, files, snapshot_id=None): Represents a single version of a piece of analysis, describing which datasets are available and how those datasets were generated. + TODO: Support tags + :param files: List of files available. :type files: list of str :param snapshot_id: Id of this analysis snapshot. If None, a message id will automatically be generated in @@ -22,7 +24,9 @@ def __init__(self, files, snapshot_id=None): def to_dict(self): return { "snapshot_id": self.snapshot_id, - "files": self.datasets + "files": self.datasets, + "tags": [], + "tag_categories": [] } @classmethod From bb6948d0b6951970d212f15a38d5fbbbb2147e92 Mon Sep 17 00:00:00 2001 From: Alexander Simpson Date: Tue, 30 May 2023 12:40:07 +0100 Subject: [PATCH 11/11] Update docstrings for analysis --- analysis_dashboard/analysis_dashboard.py | 9 ++++-- .../data_models/analysis_snapshot.py | 29 ++++++++++++++----- 2 files changed, 29 insertions(+), 9 deletions(-) diff --git a/analysis_dashboard/analysis_dashboard.py b/analysis_dashboard/analysis_dashboard.py index 5f99292..67148a8 100644 --- a/analysis_dashboard/analysis_dashboard.py +++ b/analysis_dashboard/analysis_dashboard.py @@ -22,14 +22,19 @@ def __init__(self, firebase_app): @classmethod def init_from_credentials(cls, cert, app_name="AnalysisDashboard"): """ + Initialises an `AnalysisDashboard` instance from the given Firebase credentials. + + The credentials define which Firebase instance to connect to. + :param cert: Firestore service account certificate, as a path to a file or a dictionary. :type cert: str | dict :param app_name: Name to give the Firebase app instance used to connect. :type app_name: str - :return: + :return: Initialised AnalysisDashboard instance. :rtype: AnalysisDashboard """ - return cls(initialize_firebase_app(cert, app_name)) + firebase_app = initialize_firebase_app(cert, app_name) + return cls(firebase_app) def create_snapshot(self, series_id, files, bucket_name): """ diff --git a/analysis_dashboard/data_models/analysis_snapshot.py b/analysis_dashboard/data_models/analysis_snapshot.py index 563168a..533cb9e 100644 --- a/analysis_dashboard/data_models/analysis_snapshot.py +++ b/analysis_dashboard/data_models/analysis_snapshot.py @@ -2,35 +2,50 @@ class AnalysisSnapshot: + """ + Represents a single version of a piece of analysis, describing which datasets are available and how those + datasets were generated. + """ + def __init__(self, files, snapshot_id=None): """ - Represents a single version of a piece of analysis, describing which datasets are available and how those - datasets were generated. - - TODO: Support tags - :param files: List of files available. :type files: list of str :param snapshot_id: Id of this analysis snapshot. If None, a message id will automatically be generated in the constructor. :type snapshot_id: str | None + TODO: Support tags """ if snapshot_id is None: snapshot_id = str(uuid.uuid4()) self.snapshot_id = snapshot_id - self.datasets = files + self.files = files def to_dict(self): + """ + Serializes this snapshot to a dictionary. + + :return: Serialized snapshot. + :rtype: dict + """ return { "snapshot_id": self.snapshot_id, - "files": self.datasets, + "files": self.files, "tags": [], "tag_categories": [] } @classmethod def from_dict(cls, d): + """ + Initialises an AnalysisSnapshot from a serialized dictionary. + + :param d: Dictionary to deserialize. + :type d: dict + :return: Deserialized snapshot. + :rtype: AnalysisSnapshot + """ return cls( d["snapshot_id"], d["files"]