From 8188a8a39d630569cc137386e9665d341e8e8b8d Mon Sep 17 00:00:00 2001
From: Jessica Nash <janash@vt.edu>
Date: Fri, 29 Sep 2023 00:10:49 -0400
Subject: [PATCH 1/7] add queries for adding new tables and routes for molecule
 data download

---
 add_data_tables.sql                          |  68 +++++++++
 backend/app/app/api/v2/endpoints/molecule.py | 147 ++++++++++++++++++-
 backend/environment.yaml                     |   1 +
 3 files changed, 211 insertions(+), 5 deletions(-)
 create mode 100644 add_data_tables.sql

diff --git a/add_data_tables.sql b/add_data_tables.sql
new file mode 100644
index 0000000..0e8ea7b
--- /dev/null
+++ b/add_data_tables.sql
@@ -0,0 +1,68 @@
+CREATE TABLE ml_data (
+    molecule_id INTEGER,
+    property TEXT,
+    max DOUBLE PRECISION,
+    min DOUBLE PRECISION,
+    delta DOUBLE PRECISION,
+    vburminconf DOUBLE PRECISION,
+    boltzmann_average DOUBLE PRECISION
+);
+
+ALTER TABLE ml_data
+ADD CONSTRAINT fk_molecule_id
+FOREIGN KEY (molecule_id) REFERENCES molecule(molecule_id);
+
+CREATE INDEX idx_ml_data_molecule_id ON ml_data(molecule_id);
+
+\COPY ml_data FROM 'ml_data_json_table.csv' DELIMITER ',' CSV HEADER;
+
+CREATE TABLE dft_data (
+    molecule_id INTEGER,
+    property TEXT,
+    max DOUBLE PRECISION,
+    min DOUBLE PRECISION,
+    delta DOUBLE PRECISION,
+    vburminconf DOUBLE PRECISION,
+    boltzmann_average DOUBLE PRECISION
+);
+
+ALTER TABLE dft_data
+ADD CONSTRAINT fk_molecule_id
+FOREIGN KEY (molecule_id) REFERENCES molecule(molecule_id);
+
+CREATE INDEX idx_dft_data_molecule_id ON dft_data(molecule_id);
+
+\COPY dft_data FROM 'dft_data_json_table.csv' DELIMITER ',' CSV HEADER;
+
+CREATE TABLE xtb_data (
+    molecule_id INTEGER,
+    property TEXT,
+    max DOUBLE PRECISION,
+    min DOUBLE PRECISION,
+    boltzmann_average DOUBLE PRECISION
+);
+
+ALTER TABLE xtb_data
+ADD CONSTRAINT fk_molecule_id
+FOREIGN KEY (molecule_id) REFERENCES molecule(molecule_id);
+
+CREATE INDEX idx_xtb_data_molecule_id ON xtb_data(molecule_id);
+
+\COPY xtb_data FROM 'xtb_data_json_table.csv' DELIMITER ',' CSV HEADER;
+
+
+CREATE TABLE xtb_ni_data (
+    molecule_id INTEGER,
+    property TEXT,
+    boltzmann_average DOUBLE PRECISION,
+    max DOUBLE PRECISION,
+    min DOUBLE PRECISION
+);
+
+ALTER TABLE xtb_ni_data
+ADD CONSTRAINT fk_molecule_id
+FOREIGN KEY (molecule_id) REFERENCES molecule(molecule_id);
+
+CREATE INDEX idx_xtb_ni_data_molecule_id ON xtb_ni_data(molecule_id);
+
+\COPY xtb_ni_data FROM 'xtb_ni_data_json_table.csv' DELIMITER ',' CSV HEADER;
diff --git a/backend/app/app/api/v2/endpoints/molecule.py b/backend/app/app/api/v2/endpoints/molecule.py
index b5d93a8..43f141c 100644
--- a/backend/app/app/api/v2/endpoints/molecule.py
+++ b/backend/app/app/api/v2/endpoints/molecule.py
@@ -1,15 +1,23 @@
-from multiprocessing.sharedctypes import Value
-from re import sub
+"""
+API endpoints for molecules. 
+Prefixed with /molecules
+"""
+
+import io
 from typing import List, Optional, Any
 
-from app import schemas
-from app.api import deps
-from app.db.session import models
+import pandas as pd
+
 from fastapi import APIRouter, Depends, HTTPException
+from fastapi.responses import StreamingResponse
 from rdkit import Chem
 from sqlalchemy import exc, text
 from sqlalchemy.orm import Session
 
+from app import schemas
+from app.api import deps
+from app.db.session import models
+
 router = APIRouter()
 
 
@@ -47,6 +55,135 @@ def valid_smiles(smiles):
 
     return smiles
 
+@router.get("/data/export/{molecule_id}", response_class=StreamingResponse)
+def get_molecule_data(molecule_id: int,
+                      data_type: str="ml",
+                      db: Session = Depends(deps.get_db)):
+
+    # Generalized - get max molecule id.
+    query = text(f"SELECT MAX(molecule_id) FROM molecule;")
+    max_molecule_id = db.execute(query).fetchall()[0][0]
+
+    # Check to see if the molecule_id is within range.
+    if molecule_id > max_molecule_id:
+        raise HTTPException(status_code=404, detail=f"Molecule with ID supplied not found, the maximum ID is {max_molecule_id}")
+    
+    # Check to see if the molecule_id is within range.
+    if molecule_id <= 0:
+        raise HTTPException(status_code=500)
+    
+    # Check for valid data type.
+    if data_type.lower() not in ["ml", "dft", "xtb", "xtb_ni"]:
+        raise HTTPException(status_code=400, detail="Invalid data type.")
+    
+    # Use pandas.read_sql_query to get the data.
+    table_name = f"{data_type}_data"
+    query = text(f"""
+        SELECT t.*, m.SMILES
+        FROM {table_name} t
+        JOIN molecule m ON t.molecule_id = m.molecule_id
+        WHERE t.molecule_id = :molecule_id
+    """)
+
+    stmt = query.bindparams(molecule_id=molecule_id)
+
+    df = pd.read_sql_query(stmt, db.bind)
+
+    # Reshape the data into wide format
+    df_wide = df.pivot(index=["molecule_id", "smiles"], columns="property")
+
+    # Flatten multi-level columns and reset the index
+    df_wide.columns = ['_'.join(col[::-1]).strip() for col in df_wide.columns.values]
+    df_wide.reset_index(inplace=True)
+
+    # Add the SMILES column back
+    df_wide = pd.merge(df_wide, df[["molecule_id", "smiles"]].drop_duplicates(), on="molecule_id", how="left")
+
+    df_wide.dropna(axis=1, inplace=True)
+
+    df_wide.drop(columns="smiles_y", inplace=True)
+
+    df_wide.rename( columns = {'smiles_x':'smiles'}, inplace=True) 
+
+    # Send csv file as streaming response.
+    # See: https://github.com/tiangolo/fastapi/issues/1277
+    # See: https://stackoverflow.com/questions/61140398/fastapi-return-a-file-response-with-the-output-of-a-sql-query
+
+    # Create a buffer to hold the csv file.
+    buffer = io.StringIO()
+
+    # Write the dataframe to the buffer.
+    df_wide.to_csv(buffer, index=False)
+
+    # Set the buffer to the beginning of the file.
+    buffer.seek(0)
+
+    # Return the buffer as a streaming response.
+    response = StreamingResponse(buffer, media_type="text/csv")
+    response.headers["Content-Disposition"] = f"attachment; filename={molecule_id}_{data_type}.csv"
+    return response
+
+@router.get("/data/export", response_class=StreamingResponse)
+def get_molecules_data(molecule_ids: str,
+                       data_type: str="ml",
+                       db: Session = Depends(deps.get_db)):
+    
+    # Sanitize molecule ids
+    int_check = [x.strip().isdigit() for x in molecule_ids.split(",")]
+
+    if not all(int_check):
+        raise HTTPException(status_code=400, detail="Invalid molecule ids.")
+
+    # Check for valid data type.
+    if data_type.lower() not in ["ml", "dft", "xtb", "xtb_ni"]:
+        raise HTTPException(status_code=400, detail="Invalid data type.")
+    
+    # Use pandas.read_sql_query to get the data.
+    table_name = f"{data_type}_data"
+
+    #molecule_ids= [int(x) for x in molecule_ids.split(",")]
+
+    query = text(f"""
+        SELECT t.*, m.SMILES
+        FROM {table_name} t
+        JOIN molecule m ON t.molecule_id = m.molecule_id
+        WHERE t.molecule_id IN ({molecule_ids})
+    """)
+
+    df = pd.read_sql_query(query, db.bind)
+
+    # Reshape the data into wide format
+    df_wide = df.pivot(index=["molecule_id", "smiles"], columns="property")
+
+    # Flatten multi-level columns and reset the index
+    df_wide.columns = ['_'.join(col[::-1]).strip() for col in df_wide.columns.values]
+
+    df_wide.reset_index(inplace=True)
+
+    # Add the SMILES column back
+    df_wide = pd.merge(df_wide, df[["molecule_id", "smiles"]].drop_duplicates(), on="molecule_id", how="left")
+
+    df_wide.dropna(axis=1, inplace=True)
+
+    df_wide.drop(columns="smiles_y", inplace=True)
+
+    df_wide.rename( columns = {'smiles_x':'smiles'}, inplace=True)
+    
+    # Create a buffer to hold the csv file.
+    buffer = io.StringIO()
+
+    # Write the dataframe to the buffer.
+    df_wide.to_csv(buffer, index=False)
+
+    # Set the buffer to the beginning of the file.
+    buffer.seek(0)
+
+    # Return the buffer as a streaming response.
+    response = StreamingResponse(buffer, media_type="text/csv")
+    response.headers["Content-Disposition"] = f"attachment; filename={data_type}_{molecule_ids.replace(',','_')}.csv"
+
+    return response
+                 
 
 @router.get("/umap", response_model=List[schemas.MoleculeSimple])
 def get_molecule_umap(
diff --git a/backend/environment.yaml b/backend/environment.yaml
index 95d12b0..00c2194 100644
--- a/backend/environment.yaml
+++ b/backend/environment.yaml
@@ -10,6 +10,7 @@ dependencies:
   - alembic 
   - psycopg2-binary 
   - sqlalchemy 
+  - pandas
   - tenacity 
   - uvicorn 
   - curl 

From b3e14742f4a3e3873b092bf3fc724dd5d2f4d257 Mon Sep 17 00:00:00 2001
From: Jessica Nash <janash@vt.edu>
Date: Fri, 29 Sep 2023 12:19:05 -0400
Subject: [PATCH 2/7] clean up download code

---
 backend/app/app/api/v2/endpoints/molecule.py | 91 ++++++++------------
 1 file changed, 37 insertions(+), 54 deletions(-)

diff --git a/backend/app/app/api/v2/endpoints/molecule.py b/backend/app/app/api/v2/endpoints/molecule.py
index 43f141c..b22fcb8 100644
--- a/backend/app/app/api/v2/endpoints/molecule.py
+++ b/backend/app/app/api/v2/endpoints/molecule.py
@@ -20,6 +20,36 @@
 
 router = APIRouter()
 
+def _pandas_long_to_wide(df):
+    """
+    Internal function for reshaping from long to wide format for CSV export.
+    """
+    # Reshape the data into wide format
+    df_wide = df.pivot(index=["molecule_id", "smiles"], columns="property")
+
+    # Flatten multi-level columns and reset the index
+    df_wide.columns = ['_'.join(col[::-1]).strip() for col in df_wide.columns.values]
+
+    df_wide.reset_index(inplace=True)
+
+    df_wide.dropna(axis=1, inplace=True)
+
+    return df_wide
+
+def _pandas_to_buffer(df):
+    """Internal function for converting dataframe to buffer"""
+    
+    # Create a buffer to hold the csv file.
+    buffer = io.StringIO()
+
+    # Write the dataframe to the buffer.
+    df.to_csv(buffer, index=False)
+
+    # Set the buffer to the beginning of the file.
+    buffer.seek(0)
+
+    return buffer
+
 
 def valid_smiles(smiles):
     """Check to see if a smile string is valid to represent a molecule.
@@ -56,7 +86,7 @@ def valid_smiles(smiles):
     return smiles
 
 @router.get("/data/export/{molecule_id}", response_class=StreamingResponse)
-def get_molecule_data(molecule_id: int,
+async def get_molecule_data(molecule_id: int,
                       data_type: str="ml",
                       db: Session = Depends(deps.get_db)):
 
@@ -76,7 +106,7 @@ def get_molecule_data(molecule_id: int,
     if data_type.lower() not in ["ml", "dft", "xtb", "xtb_ni"]:
         raise HTTPException(status_code=400, detail="Invalid data type.")
     
-    # Use pandas.read_sql_query to get the data.
+    # Use pandas.rea``  d_sql_query to get the data.
     table_name = f"{data_type}_data"
     query = text(f"""
         SELECT t.*, m.SMILES
@@ -89,34 +119,9 @@ def get_molecule_data(molecule_id: int,
 
     df = pd.read_sql_query(stmt, db.bind)
 
-    # Reshape the data into wide format
-    df_wide = df.pivot(index=["molecule_id", "smiles"], columns="property")
-
-    # Flatten multi-level columns and reset the index
-    df_wide.columns = ['_'.join(col[::-1]).strip() for col in df_wide.columns.values]
-    df_wide.reset_index(inplace=True)
-
-    # Add the SMILES column back
-    df_wide = pd.merge(df_wide, df[["molecule_id", "smiles"]].drop_duplicates(), on="molecule_id", how="left")
+    df_wide = _pandas_long_to_wide(df)      
 
-    df_wide.dropna(axis=1, inplace=True)
-
-    df_wide.drop(columns="smiles_y", inplace=True)
-
-    df_wide.rename( columns = {'smiles_x':'smiles'}, inplace=True) 
-
-    # Send csv file as streaming response.
-    # See: https://github.com/tiangolo/fastapi/issues/1277
-    # See: https://stackoverflow.com/questions/61140398/fastapi-return-a-file-response-with-the-output-of-a-sql-query
-
-    # Create a buffer to hold the csv file.
-    buffer = io.StringIO()
-
-    # Write the dataframe to the buffer.
-    df_wide.to_csv(buffer, index=False)
-
-    # Set the buffer to the beginning of the file.
-    buffer.seek(0)
+    buffer = _pandas_to_buffer(df_wide)
 
     # Return the buffer as a streaming response.
     response = StreamingResponse(buffer, media_type="text/csv")
@@ -124,7 +129,7 @@ def get_molecule_data(molecule_id: int,
     return response
 
 @router.get("/data/export", response_class=StreamingResponse)
-def get_molecules_data(molecule_ids: str,
+async def get_molecules_data(molecule_ids: str,
                        data_type: str="ml",
                        db: Session = Depends(deps.get_db)):
     
@@ -152,31 +157,9 @@ def get_molecules_data(molecule_ids: str,
 
     df = pd.read_sql_query(query, db.bind)
 
-    # Reshape the data into wide format
-    df_wide = df.pivot(index=["molecule_id", "smiles"], columns="property")
+    df_wide = _pandas_long_to_wide(df)      
 
-    # Flatten multi-level columns and reset the index
-    df_wide.columns = ['_'.join(col[::-1]).strip() for col in df_wide.columns.values]
-
-    df_wide.reset_index(inplace=True)
-
-    # Add the SMILES column back
-    df_wide = pd.merge(df_wide, df[["molecule_id", "smiles"]].drop_duplicates(), on="molecule_id", how="left")
-
-    df_wide.dropna(axis=1, inplace=True)
-
-    df_wide.drop(columns="smiles_y", inplace=True)
-
-    df_wide.rename( columns = {'smiles_x':'smiles'}, inplace=True)
-    
-    # Create a buffer to hold the csv file.
-    buffer = io.StringIO()
-
-    # Write the dataframe to the buffer.
-    df_wide.to_csv(buffer, index=False)
-
-    # Set the buffer to the beginning of the file.
-    buffer.seek(0)
+    buffer = _pandas_to_buffer(df_wide)
 
     # Return the buffer as a streaming response.
     response = StreamingResponse(buffer, media_type="text/csv")

From 580c6f4071a8c35c3fc660f6def3f371b807fcc4 Mon Sep 17 00:00:00 2001
From: Jessica Nash <janash@vt.edu>
Date: Sun, 1 Oct 2023 17:14:34 -0400
Subject: [PATCH 3/7] add json endpoint for molecule data

---
 backend/app/app/api/v2/endpoints/molecule.py | 111 +++++++++++--------
 1 file changed, 66 insertions(+), 45 deletions(-)

diff --git a/backend/app/app/api/v2/endpoints/molecule.py b/backend/app/app/api/v2/endpoints/molecule.py
index b22fcb8..7c5f8f6 100644
--- a/backend/app/app/api/v2/endpoints/molecule.py
+++ b/backend/app/app/api/v2/endpoints/molecule.py
@@ -50,6 +50,21 @@ def _pandas_to_buffer(df):
 
     return buffer
 
+def _valid_molecule_id(molecule_id, db):
+
+    # Generalized - get max molecule id.
+    query = text(f"SELECT MAX(molecule_id) FROM molecule;")
+    max_molecule_id = db.execute(query).fetchall()[0][0]
+
+    # Check to see if the molecule_id is within range.
+    if molecule_id > max_molecule_id:
+        raise HTTPException(status_code=404, detail=f"Molecule with ID supplied not found, the maximum ID is {max_molecule_id}")
+    
+    # Check to see if the molecule_id is within range.
+    if molecule_id <= 0:
+        raise HTTPException(status_code=500)
+
+    return 
 
 def valid_smiles(smiles):
     """Check to see if a smile string is valid to represent a molecule.
@@ -85,27 +100,22 @@ def valid_smiles(smiles):
 
     return smiles
 
-@router.get("/data/export/{molecule_id}", response_class=StreamingResponse)
+@router.get("/data/export/{molecule_id}")
 async def get_molecule_data(molecule_id: int,
                       data_type: str="ml",
+                      return_type: str="csv",
                       db: Session = Depends(deps.get_db)):
 
-    # Generalized - get max molecule id.
-    query = text(f"SELECT MAX(molecule_id) FROM molecule;")
-    max_molecule_id = db.execute(query).fetchall()[0][0]
-
-    # Check to see if the molecule_id is within range.
-    if molecule_id > max_molecule_id:
-        raise HTTPException(status_code=404, detail=f"Molecule with ID supplied not found, the maximum ID is {max_molecule_id}")
-    
-    # Check to see if the molecule_id is within range.
-    if molecule_id <= 0:
-        raise HTTPException(status_code=500)
+    # Check to see if the molecule_id is valid.
+    _valid_molecule_id(molecule_id, db)
     
     # Check for valid data type.
     if data_type.lower() not in ["ml", "dft", "xtb", "xtb_ni"]:
         raise HTTPException(status_code=400, detail="Invalid data type.")
     
+    if return_type.lower() not in ["csv", "json"]:
+        raise HTTPException(status_code=400, detail="Invalid return type.")
+    
     # Use pandas.rea``  d_sql_query to get the data.
     table_name = f"{data_type}_data"
     query = text(f"""
@@ -121,33 +131,52 @@ async def get_molecule_data(molecule_id: int,
 
     df_wide = _pandas_long_to_wide(df)      
 
-    buffer = _pandas_to_buffer(df_wide)
+    if return_type.lower() == "json":
+        json_data =  df_wide.to_dict(orient="records")[0]
+        return json_data
+    else:
+        buffer = _pandas_to_buffer(df_wide)
 
-    # Return the buffer as a streaming response.
-    response = StreamingResponse(buffer, media_type="text/csv")
-    response.headers["Content-Disposition"] = f"attachment; filename={molecule_id}_{data_type}.csv"
-    return response
+        # Return the buffer as a streaming response.
+        response = StreamingResponse(buffer, media_type="text/csv")
+        response.headers["Content-Disposition"] = f"attachment; filename={molecule_id}_{data_type}.csv"
+        return response
 
-@router.get("/data/export", response_class=StreamingResponse)
+@router.get("/data/export")
 async def get_molecules_data(molecule_ids: str,
                        data_type: str="ml",
+                       return_type: str="csv",
+                       context: Optional[str]=None,
                        db: Session = Depends(deps.get_db)):
     
+    
     # Sanitize molecule ids
     int_check = [x.strip().isdigit() for x in molecule_ids.split(",")]
 
     if not all(int_check):
         raise HTTPException(status_code=400, detail="Invalid molecule ids.")
+    
+    molecule_ids_list = [int(x) for x in molecule_ids.split(",")]
+    first_molecule_id = molecule_ids_list[0]
+    num_molecules = len(molecule_ids_list)
+
+    if context:
+        if context.lower() not in ["substructure", "pca_neighbors", "umap_neighbors"]:
+            raise HTTPException(status_code=400, detail="Invalid context.")
+
+    # Check to see if all molecule ids are valid.
+    [ _valid_molecule_id(int(x), db) for x in molecule_ids.split(",") ]
 
     # Check for valid data type.
     if data_type.lower() not in ["ml", "dft", "xtb", "xtb_ni"]:
         raise HTTPException(status_code=400, detail="Invalid data type.")
     
+    if return_type.lower() not in ["csv", "json"]:
+        raise HTTPException(status_code=400, detail="Invalid return type.")
+    
     # Use pandas.read_sql_query to get the data.
     table_name = f"{data_type}_data"
 
-    #molecule_ids= [int(x) for x in molecule_ids.split(",")]
-
     query = text(f"""
         SELECT t.*, m.SMILES
         FROM {table_name} t
@@ -159,13 +188,21 @@ async def get_molecules_data(molecule_ids: str,
 
     df_wide = _pandas_long_to_wide(df)      
 
-    buffer = _pandas_to_buffer(df_wide)
-
-    # Return the buffer as a streaming response.
-    response = StreamingResponse(buffer, media_type="text/csv")
-    response.headers["Content-Disposition"] = f"attachment; filename={data_type}_{molecule_ids.replace(',','_')}.csv"
-
-    return response
+    if return_type.lower() == "json":
+        json_data =  df_wide.to_dict(orient="records")
+        return json_data
+    else:
+        buffer = _pandas_to_buffer(df_wide)
+
+        # Return the buffer as a streaming response.
+        filename = f"{data_type}_{first_molecule_id}_{num_molecules}"
+        if context:
+            filename += f"_{context}"
+        filename += ".csv"
+        response = StreamingResponse(buffer, media_type="text/csv")
+        response.headers["Content-Disposition"] = f"attachment; filename={filename}"
+
+        return response
                  
 
 @router.get("/umap", response_model=List[schemas.MoleculeSimple])
@@ -220,13 +257,7 @@ def get_molecule_umap(
 @router.get("/{molecule_id}", response_model=schemas.Molecule)
 def get_a_single_molecule(molecule_id: int, db: Session = Depends(deps.get_db)):
 
-    # Generalized - get max molecule id.
-    query = text(f"SELECT MAX(molecule_id) FROM molecule;")
-    max_molecule_id = db.execute(query).fetchall()[0][0]
-
-    # Check to see if the molecule_id is within range.
-    if molecule_id > max_molecule_id:
-        raise HTTPException(status_code=404, detail=f"Molecule with ID supplied not found, the maximum ID is {max_molecule_id}")
+    _valid_molecule_id(molecule_id, db)
 
     molecule = (
         db.query(models.molecule)
@@ -295,17 +326,7 @@ def search_neighbors(
 
     type = type.lower()
 
-    # Generalized - get max molecule id.
-    query = text(f"SELECT MAX(molecule_id) FROM molecule;")
-    max_molecule_id = db.execute(query).fetchall()[0][0]
-
-    # Check to see if the molecule_id is within range.
-    if molecule_id > max_molecule_id:
-        raise HTTPException(status_code=404, detail=f"Molecule with ID supplied not found, the maximum ID is {max_molecule_id}")
-    
-    # Check to see if the molecule_id is within range.
-    if molecule_id <= 0:
-        raise HTTPException(status_code=500)
+    _valid_molecule_id(molecule_id, db)
     
     # Check for valid neighbor type.
     if type not in ["pca", "umap"]:

From 0a4127db19c43ad957e83ad31ad8367ff8a82d9e Mon Sep 17 00:00:00 2001
From: Jessica Nash <janash@vt.edu>
Date: Sun, 1 Oct 2023 17:15:32 -0400
Subject: [PATCH 4/7] basic molecule table - no pagination

---
 frontend/src/components/MoleculeDataTable.jsx | 63 +++++++++++++++++++
 1 file changed, 63 insertions(+)
 create mode 100644 frontend/src/components/MoleculeDataTable.jsx

diff --git a/frontend/src/components/MoleculeDataTable.jsx b/frontend/src/components/MoleculeDataTable.jsx
new file mode 100644
index 0000000..400145d
--- /dev/null
+++ b/frontend/src/components/MoleculeDataTable.jsx
@@ -0,0 +1,63 @@
+import React, {useEffect, useState} from 'react';
+import Table from '@mui/material/Table';
+import TableBody from '@mui/material/TableBody';
+import TableCell from '@mui/material/TableCell';
+import TableContainer from '@mui/material/TableContainer';
+import TablePagination from '@mui/material/TablePagination';
+import TableHead from '@mui/material/TableHead';
+import TableRow from '@mui/material/TableRow';
+import Paper from '@mui/material/Paper';
+
+
+async function retrieveData(molecule_id, data_type) {
+    try {
+        const response = await fetch(`/api/molecules/data/export/${molecule_id}?data_type=${data_type}&return_type=json`);
+        const data = await response.json();
+        return data;
+    } catch (error) {
+        debugger;
+        console.log(error);
+        return null;
+    }
+}
+
+export default function MoleculeDataTable({molecule_id, initial_data_type}) {
+    const [moleculeData, setMoleculeData] = useState(null);
+    const [data_type, setDataType] = useState(initial_data_type);
+    const [rowsPerPage, setRowsPerPage] = useState(10);
+    const [page, setPage] = useState(0);
+
+    useEffect(() => {
+        async function fetchData() {
+            const data = await retrieveData(molecule_id, data_type);
+            setMoleculeData(data);
+        }
+
+        fetchData();
+    }, [data_type, molecule_id]);
+    
+  return (
+    <TableContainer component={Paper}>
+      <Table sx={{ minWidth: 650 }} aria-label="molecule_data_table" stickyHeader>
+        <TableHead>
+          <TableRow>
+            <TableCell>Property</TableCell>
+            <TableCell align="right">Value</TableCell>
+          </TableRow>
+        </TableHead>
+        <TableBody>
+        {moleculeData && 
+        Object.keys(moleculeData)
+        .filter(key => key !== 'smiles' && key !== 'molecule_id')
+        .map((key) => (
+            
+            <TableRow key={key}>
+                <TableCell align="left">{key}</TableCell>
+                <TableCell align="right">{moleculeData[key]}</TableCell>
+            </TableRow>
+            ))};
+        </TableBody>
+      </Table>
+    </TableContainer>
+  );
+}
\ No newline at end of file

From b1dcfb17bbf4aac89a516c95f2077815fc2a2be8 Mon Sep 17 00:00:00 2001
From: Jessica Nash <janash@vt.edu>
Date: Sun, 1 Oct 2023 18:14:48 -0400
Subject: [PATCH 5/7] molecule data table with switch (no download)

---
 frontend/src/components/MoleculeDataTable.jsx | 133 +++++++++++++-----
 1 file changed, 94 insertions(+), 39 deletions(-)

diff --git a/frontend/src/components/MoleculeDataTable.jsx b/frontend/src/components/MoleculeDataTable.jsx
index 400145d..9a00c6d 100644
--- a/frontend/src/components/MoleculeDataTable.jsx
+++ b/frontend/src/components/MoleculeDataTable.jsx
@@ -1,12 +1,17 @@
-import React, {useEffect, useState} from 'react';
-import Table from '@mui/material/Table';
-import TableBody from '@mui/material/TableBody';
-import TableCell from '@mui/material/TableCell';
-import TableContainer from '@mui/material/TableContainer';
-import TablePagination from '@mui/material/TablePagination';
-import TableHead from '@mui/material/TableHead';
-import TableRow from '@mui/material/TableRow';
+import React, { useEffect, useState } from 'react';
+import { DataGrid, GridFooterContainer, GridFooter } from "@mui/x-data-grid";
 import Paper from '@mui/material/Paper';
+import Typography from '@mui/material/Typography';
+import { Select, MenuItem } from '@mui/material';
+import Button from '@mui/material/Button';
+
+
+const dataTypeMapping = {
+    "ML Data": "ml",
+    "DFT Data": "dft",
+    "XTB Data": "xtb",
+    "XTB_NI Data": "xtb_ni"
+};
 
 
 async function retrieveData(molecule_id, data_type) {
@@ -15,49 +20,99 @@ async function retrieveData(molecule_id, data_type) {
         const data = await response.json();
         return data;
     } catch (error) {
-        debugger;
         console.log(error);
         return null;
     }
 }
 
-export default function MoleculeDataTable({molecule_id, initial_data_type}) {
+function CustomFooter({ selectedDataType, setSelectedDataType }) {
+    const handleChange = (event) => {
+      setSelectedDataType(event.target.value);
+    };
+  
+    return (
+      <GridFooterContainer>
+        <Typography sx={{ color: 'gray', display: 'inline-block', verticalAlign: 'middle' }}>
+          Data Type:
+        </Typography>
+        <Select
+          value={selectedDataType}
+          onChange={handleChange}
+          displayEmpty
+          sx={{ marginLeft: '8px', marginRight: '16px', display: 'inline-block', verticalAlign: 'middle' }}
+        >
+          <MenuItem value="ML Data">ML Data</MenuItem>
+          <MenuItem value="DFT Data">DFT Data</MenuItem>
+          <MenuItem value="XTB Data">XTB Data</MenuItem>
+          <MenuItem value="XTB_NI Data">XTB_NI Data</MenuItem>
+        </Select>
+        <GridFooter sx={{
+          border: 'none', // To delete double border.
+        }} />
+      </GridFooterContainer>
+    );
+  }
+  
+  
+
+export default function MoleculeDataTable({ molecule_id, initial_data_type }) {
     const [moleculeData, setMoleculeData] = useState(null);
     const [data_type, setDataType] = useState(initial_data_type);
-    const [rowsPerPage, setRowsPerPage] = useState(10);
-    const [page, setPage] = useState(0);
+    const [selectedDataType, setSelectedDataType] = useState("ML Data"); // Set the default value to "ML Data"
 
     useEffect(() => {
         async function fetchData() {
-            const data = await retrieveData(molecule_id, data_type);
+            let data = await retrieveData(molecule_id, dataTypeMapping["DFT Data"]);
+    
+            // If DFT data is empty, default to ML Data
+            if (!data || Object.keys(data).length === 0) {
+                setSelectedDataType("ML Data");
+                data = await retrieveData(molecule_id, dataTypeMapping["ML Data"]);
+            } else {
+                setSelectedDataType("DFT Data");
+            }
             setMoleculeData(data);
         }
-
-        fetchData();
-    }, [data_type, molecule_id]);
     
-  return (
-    <TableContainer component={Paper}>
-      <Table sx={{ minWidth: 650 }} aria-label="molecule_data_table" stickyHeader>
-        <TableHead>
-          <TableRow>
-            <TableCell>Property</TableCell>
-            <TableCell align="right">Value</TableCell>
-          </TableRow>
-        </TableHead>
-        <TableBody>
-        {moleculeData && 
-        Object.keys(moleculeData)
+        fetchData();
+    }, [molecule_id]);
+
+    useEffect(() => {
+        setDataType(dataTypeMapping[selectedDataType]);
+        console.log(data_type)
+    }, [selectedDataType]);
+
+    const columns = [
+        { field: 'property', headerName: 'Property', filterable: true, flex: true },
+        { field: 'value', headerName: 'Value', width: 150, filterable: true, headerAlign: 'right', align: 'right', flex: true }
+    ];
+
+    const rows = moleculeData ? Object.keys(moleculeData)
         .filter(key => key !== 'smiles' && key !== 'molecule_id')
-        .map((key) => (
-            
-            <TableRow key={key}>
-                <TableCell align="left">{key}</TableCell>
-                <TableCell align="right">{moleculeData[key]}</TableCell>
-            </TableRow>
-            ))};
-        </TableBody>
-      </Table>
-    </TableContainer>
-  );
+        .map(key => ({
+            id: key,
+            property: key,
+            value: moleculeData[key],
+        })) : [];
+
+    return (
+        <Paper elevation={3} style={{ height: 400, width: '100%' }}>
+            <DataGrid
+                rows={rows}
+                columns={columns}
+                components={{Footer: CustomFooter}}
+                componentsProps={{
+                    footer: { selectedDataType, setSelectedDataType }
+                }}
+                initialState={{
+                    pagination: {
+                        paginationModel: {
+                            pageSize: 25, // This sets the initial page size
+                        },
+                    },
+                }}
+                pageSizeOptions={[5, 10, 25, 50, 100]} 
+            />
+        </Paper>
+    );
 }
\ No newline at end of file

From a32378724edb52951972b67ae1e748c2942b94a6 Mon Sep 17 00:00:00 2001
From: Jessica Nash <janash@vt.edu>
Date: Sun, 1 Oct 2023 18:45:20 -0400
Subject: [PATCH 6/7] add molecule data table component

---
 frontend/src/components/MoleculeDataTable.jsx | 63 ++++++++++++++++---
 1 file changed, 53 insertions(+), 10 deletions(-)

diff --git a/frontend/src/components/MoleculeDataTable.jsx b/frontend/src/components/MoleculeDataTable.jsx
index 9a00c6d..0d77a20 100644
--- a/frontend/src/components/MoleculeDataTable.jsx
+++ b/frontend/src/components/MoleculeDataTable.jsx
@@ -14,10 +14,12 @@ const dataTypeMapping = {
 };
 
 
-async function retrieveData(molecule_id, data_type) {
+async function retrieveData(molecule_id, data_type="ml") {
     try {
         const response = await fetch(`/api/molecules/data/export/${molecule_id}?data_type=${data_type}&return_type=json`);
         const data = await response.json();
+        console.log(data_type);
+        console.log(data);
         return data;
     } catch (error) {
         console.log(error);
@@ -25,16 +27,34 @@ async function retrieveData(molecule_id, data_type) {
     }
 }
 
-function CustomFooter({ selectedDataType, setSelectedDataType }) {
+async function downloadData(molecule_id, data_type) {
+    try {
+        const response = await fetch(`/api/molecules/data/export/${molecule_id}?data_type=${data_type}&return_type=csv`);
+        
+        if(response.status === 200) {
+            const blob = await response.blob();
+            const url = window.URL.createObjectURL(blob);
+            const a = document.createElement('a');
+            a.href = url;
+            a.download = `data_${molecule_id}_${data_type}.csv`;  // you can name the file however you'd like
+            a.click();
+            window.URL.revokeObjectURL(url);
+        } else {
+            console.error("Failed to fetch CSV");
+        }
+    } catch (error) {
+        console.log(error);
+    }
+}
+
+
+function CustomFooter({ selectedDataType, setSelectedDataType, moleculeID }) {
     const handleChange = (event) => {
       setSelectedDataType(event.target.value);
     };
   
     return (
       <GridFooterContainer>
-        <Typography sx={{ color: 'gray', display: 'inline-block', verticalAlign: 'middle' }}>
-          Data Type:
-        </Typography>
         <Select
           value={selectedDataType}
           onChange={handleChange}
@@ -46,6 +66,9 @@ function CustomFooter({ selectedDataType, setSelectedDataType }) {
           <MenuItem value="XTB Data">XTB Data</MenuItem>
           <MenuItem value="XTB_NI Data">XTB_NI Data</MenuItem>
         </Select>
+        <Button variant="contained" color="primary" sx={{ marginLeft: 'auto', marginRight: '16px', display: 'inline-block', verticalAlign: 'middle' }} onClick={() => { downloadData(moleculeID, dataTypeMapping[selectedDataType]) }} >
+            Download as CSV
+        </Button>
         <GridFooter sx={{
           border: 'none', // To delete double border.
         }} />
@@ -53,21 +76,30 @@ function CustomFooter({ selectedDataType, setSelectedDataType }) {
     );
   }
   
+  function CustomNoRowsOverlay({ selectedDataType}) {
+    return (
+        <div style={{ display: 'flex', alignItems: 'center', justifyContent: 'center', height: '100%' }}>
+            <Typography variant="h6">{selectedDataType} is not available for this molecule.</Typography>
+        </div>
+    );
+}
+
   
 
 export default function MoleculeDataTable({ molecule_id, initial_data_type }) {
     const [moleculeData, setMoleculeData] = useState(null);
     const [data_type, setDataType] = useState(initial_data_type);
     const [selectedDataType, setSelectedDataType] = useState("ML Data"); // Set the default value to "ML Data"
+    const [moleculeID, setMoleculeID] = useState(molecule_id);
 
     useEffect(() => {
         async function fetchData() {
-            let data = await retrieveData(molecule_id, dataTypeMapping["DFT Data"]);
+            let data = await retrieveData(moleculeID, dataTypeMapping["DFT Data"]);
     
             // If DFT data is empty, default to ML Data
             if (!data || Object.keys(data).length === 0) {
                 setSelectedDataType("ML Data");
-                data = await retrieveData(molecule_id, dataTypeMapping["ML Data"]);
+                data = await retrieveData(moleculeID, dataTypeMapping["ML Data"]);
             } else {
                 setSelectedDataType("DFT Data");
             }
@@ -75,7 +107,17 @@ export default function MoleculeDataTable({ molecule_id, initial_data_type }) {
         }
     
         fetchData();
-    }, [molecule_id]);
+    }, [moleculeID]);
+
+    useEffect(() => {
+        async function fetchData() {
+            const data = await retrieveData(moleculeID, data_type);
+            setMoleculeData(data);
+        }
+
+        fetchData();
+
+    }, [data_type]);
 
     useEffect(() => {
         setDataType(dataTypeMapping[selectedDataType]);
@@ -100,9 +142,10 @@ export default function MoleculeDataTable({ molecule_id, initial_data_type }) {
             <DataGrid
                 rows={rows}
                 columns={columns}
-                components={{Footer: CustomFooter}}
+                components={{Footer: CustomFooter, NoRowsOverlay: CustomNoRowsOverlay}}
                 componentsProps={{
-                    footer: { selectedDataType, setSelectedDataType }
+                    footer: { selectedDataType, setSelectedDataType, moleculeID },
+                    noRowsOverlay: { selectedDataType },
                 }}
                 initialState={{
                     pagination: {

From a291dac61c83ae02e9764145a5cd65965fc572b4 Mon Sep 17 00:00:00 2001
From: Jessica Nash <janash@vt.edu>
Date: Sun, 1 Oct 2023 18:45:58 -0400
Subject: [PATCH 7/7] switch molecule data table

---
 frontend/src/pages/Molecule.jsx | 89 +--------------------------------
 1 file changed, 2 insertions(+), 87 deletions(-)

diff --git a/frontend/src/pages/Molecule.jsx b/frontend/src/pages/Molecule.jsx
index 4f989ee..b2ba8e6 100644
--- a/frontend/src/pages/Molecule.jsx
+++ b/frontend/src/pages/Molecule.jsx
@@ -3,12 +3,12 @@ import React, { useEffect, useState } from 'react';
 import { useParams } from "react-router-dom";
 import { Box, Grid, Container, TextField, MenuItem, Card, CardContent, Select, InputLabel, FormControl, ThemeProvider} from "@mui/material";
 import Button from "@mui/material/Button";
-import { DataGrid, GridFooterContainer, GridFooter } from "@mui/x-data-grid";
 import Typography from '@mui/material/Typography';
 import { CircularProgress } from "@mui/material";
 import { retrieveSVG, theme } from "../common/MoleculeUtils";
 import { NGLStage, Component } from "../components/NGL"
 
+import MoleculeDataTable from "../components/MoleculeDataTable";
 
 async function molecule(molecule_id, signal) {
    /**
@@ -111,84 +111,6 @@ export default function MoleculeInfo() {
       }
    }
 
-   function downloadDataAsJSON() {
-      // Function to download all the molecule data as a JSON file.
-      const jsonData = JSON.stringify(molData);
-      const blob = new Blob([jsonData], { type: "application/json" });
-      const url = URL.createObjectURL(blob);
-  
-      const downloadLink = document.createElement("a");
-      downloadLink.href = url;
-      downloadLink.download = `${params.molid}_data.json`;
-  
-      downloadLink.click();
-    }
-
-   function Table(data) {
-      let columns = [];
-      let rows = [];
-
-      let keys = Object.keys(data);
-      let properties = Object.keys(data["max_data"])
-
-      columns.push({field: "id", flex: 1, headerClassName: "super-app-theme--header"});
-      
-      // Loop through all the keys and create columns and rows. Avoid boltzmann_averaged_data since it does not have the same keys as the rest.
-      for (const element of keys) {
-         if (element != "boltzmann_averaged_data")
-         {
-            columns.push({field: element, flex: 0.75, headerClassName: "super-app-theme--header"});
-         }
-      }
-
-      // Make the rows of the table
-      for (const property of properties) {
-         let newObj = {id: property};
-         for (const element of keys) {
-            if (element != "boltzmann_averaged_data")
-            {
-               newObj[element] = data[element][property];
-            }
-         }
-         rows.push(newObj);
-      }
-
-      function CustomFooter () {
-         return (
-           <GridFooterContainer>
-             <Typography sx= {{mx: 1, color: 'gray'}}> ML Data </Typography>
-             <GridFooter sx={{
-               border: 'none', // To delete double border.
-               }} />
-           </GridFooterContainer>
-         );
-       }
-
-      return (
-         <Box
-            sx={{
-            width: '100%',
-            '& .super-app-theme--header': {
-               backgroundColor: '#393536',
-               color: 'white',
-            },
-            }}
-         >
-            <DataGrid
-               disableColumnMenu
-               rows={rows}
-               columns={columns}
-               components={{Footer: CustomFooter}}
-               initialState={{
-                  pagination: {
-                     paginationModel: { page: 0, pageSize: 4 },
-                  },
-               }}
-            />
-         </Box>
-      )
-   }
-
    function loadData(signal, molid) {
       /**
        * Main driver function which loads the neighbors for a molecule requested by the user.
@@ -266,7 +188,7 @@ export default function MoleculeInfo() {
                         </Card>}
             </Grid>
             <Grid item xs={(width > 1366) ? 6 : 12}>
-               {Object.keys(molData).length > 0 && Table(molData.ml_data)}
+               {Object.keys(molData).length > 0 && <MoleculeDataTable molecule_id={molData.molecule_id} initial_data_type="ml" />}
             </Grid>
             {(width > 768) && allConformers.length > 0 && conformer.length > 0 && <Grid item xs={(width > 1366) ? 6 : 12}>
                <Container>
@@ -317,13 +239,6 @@ export default function MoleculeInfo() {
             </Grid>
             }
             {Object.keys(molData).length > 0 && (width > 768) && <Grid item xs={12}>
-               <Box display="flex" justifyContent="center" alignItems="center">
-                  <ThemeProvider theme={theme}>
-                     <Button variant="contained" sx={{ my: 3 }} onClick={() => { downloadDataAsJSON();}}>
-                        Download
-                     </Button>
-                  </ThemeProvider>
-               </Box>
             </Grid>}
          </Grid>
       </Container>