Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add DocumentStoreSSHClient #45

Merged
merged 11 commits into from
Jun 14, 2024
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -137,3 +137,6 @@ dmypy.json

# MacOs
**/.DS_Store

# VSCode
.vscode/
36 changes: 36 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,43 @@ We have two primary databases. A Document store to keep unstructured json docume

### Document Store
We have some convenience methods to interact with our Document Store. You can create a client by explicitly setting credentials, or downloading from AWS Secrets Manager.

__To connect from outside of our VPC:__

1. If using credentials from environment, please configure:
```sh
DOC_DB_HOST=docdb-us-west-2-****.cluster-************.us-west-2.docdb.amazonaws.com
DOC_DB_USERNAME=doc_db_username
DOC_DB_PASSWORD=doc_db_password
DOC_DB_SSH_HOST=ssh_host
DOC_DB_SSH_USERNAME=ssh_username
DOC_DB_SSH_PASSWORD=ssh_password
```
2. Usage:
```python
from aind_data_access_api.document_db_ssh import DocumentDbSSHClient, DocumentDbSSHCredentials

# Method 1) if credentials are set in environment
credentials = DocumentDbSSHCredentials()

# Method 2) if you have permissions to AWS Secrets Manager
# Each secret must contain corresponding "host", "username", and "password"
credentials = DocumentDbSSHCredentials.from_secrets_manager(
doc_db_secret_name="/doc/store/secret/name", ssh_secret_name="/ssh/tunnel/secret/name"
)

with DocumentDbSSHClient(credentials=credentials) as doc_db_client:
# To get a list of filtered records:
filter = {"subject.subject_id": "123456"}
projection = {
"name": 1, "created": 1, "location": 1, "subject.subject_id": 1, "subject.date_of_birth": 1,
}
count = doc_db_client.collection.count_documents(filter)
response = list(doc_db_client.collection.find(filter=filter, projection=projection))
```

__To connect from within our VPC:__
```python
from aind_data_access_api.credentials import DocumentStoreCredentials
from aind_data_access_api.document_store import Client

Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ secrets = [
"boto3",
]
docdb = [
"pymongo==4.3.3"
"pymongo==4.3.3",
"sshtunnel"
]
rds = [
"psycopg2-binary==2.9.5",
Expand Down
1 change: 1 addition & 0 deletions src/aind_data_access_api/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
"""Init package"""

__version__ = "0.8.1"
149 changes: 149 additions & 0 deletions src/aind_data_access_api/document_db_ssh.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
"""Module to interface with the Document Database using SSH tunneling."""

import logging

from pydantic import Field, SecretStr
from pydantic_settings import SettingsConfigDict
from pymongo import MongoClient
from sshtunnel import SSHTunnelForwarder

from aind_data_access_api.credentials import CoreCredentials
from aind_data_access_api.secrets import get_secret


class DocumentDbSSHCredentials(CoreCredentials):
"""Document Store credentials with SSH tunneling."""

model_config = SettingsConfigDict(env_prefix="DOC_DB_", extra="ignore")

host: str = Field(..., description="The host of the document database")
port: int = Field(
default=27017, description="The port of the document database"
)
username: str = Field(..., description="The username for authentication")
password: SecretStr = Field(
..., description="The password for authentication"
)
database: str = Field(
default="metadata_index", description="The name of the database"
)
collection: str = Field(
default="data_assets", description="The name of the collection"
)
ssh_local_bind_address: str = Field(
default="localhost",
description="The local bind address for SSH tunneling",
)
ssh_host: str = Field(..., description="The host of the SSH server")
ssh_port: int = Field(default=22, description="The port of the SSH server")
ssh_username: str = Field(
..., description="The username for SSH authentication"
)
ssh_password: SecretStr = Field(
..., description="The password for SSH authentication"
)

@classmethod
def from_secrets_manager(
cls, doc_db_secret_name: str, ssh_secret_name: str
):
"""
Construct class from AWS Secrets Manager

Parameters
----------
doc_db_secret_name : str
The name of the secret that contains the document store credentials
(host, port, username, password).
ssh_secret_name : str
The name of the secret that contains the ssh credentials
(host, username, password).
"""
docdb_secret = get_secret(doc_db_secret_name)
ssh_secret = get_secret(ssh_secret_name)
return DocumentDbSSHCredentials(
**docdb_secret, **{"ssh_" + k: v for k, v in ssh_secret.items()}
)


class DocumentDbSSHClient:
"""Class to establish a Document Store client with SSH tunneling."""

def __init__(self, credentials: DocumentDbSSHCredentials):
"""
Construct a client to interface with a Document Database.

Parameters
----------
credentials : DocumentDbSSHCredentials
"""
self.credentials = credentials
self.database_name = credentials.database
self.collection_name = credentials.collection

@property
def collection(self):
"""Collection of metadata records in Document Database."""
db = self._client[self.database_name]
collection = db[self.collection_name]
return collection

def _create_mongo_client(self):
"""
Create a MongoClient to connect to the Document Store.
Uses retryWrites=False to enable writing to AWS DocumentDB.
Uses authMechanism="SCRAM-SHA-1" for complex usernames.
"""
return MongoClient(
host=self.credentials.ssh_local_bind_address,
port=self.credentials.port,
retryWrites=False,
directConnection=True,
username=self.credentials.username,
password=self.credentials.password.get_secret_value(),
authSource="admin",
authMechanism="SCRAM-SHA-1",
)

def _create_ssh_tunnel(self):
"""Create an SSH tunnel to the Document Database."""
return SSHTunnelForwarder(
ssh_address_or_host=(
self.credentials.ssh_host,
self.credentials.ssh_port,
),
ssh_username=self.credentials.ssh_username,
ssh_password=self.credentials.ssh_password.get_secret_value(),
remote_bind_address=(self.credentials.host, self.credentials.port),
local_bind_address=(
self.credentials.ssh_local_bind_address,
self.credentials.port,
),
)

def start(self):
"""Start the client and SSH tunnel."""
self._client = self._create_mongo_client()
self._ssh_server = self._create_ssh_tunnel()
self._ssh_server.start()
server_info = self._client.server_info()
logging.info(server_info)
logging.info(
f"Connected to {self.credentials.host}:{self.credentials.port} as "
f"{self.credentials.username}"
)

def close(self):
"""Close the client and SSH tunnel."""
self._client.close()
self._ssh_server.stop()
logging.info("DocDB SSH session closed.")

def __enter__(self):
"""Enter the context manager."""
self.start()
return self

def __exit__(self, exc_type, exc_val, exc_tb):
"""Exit the context manager."""
self.close()
1 change: 1 addition & 0 deletions src/aind_data_access_api/document_store.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Module to interface with the Document Store"""

import json
import logging
from typing import Iterator, List, Optional
Expand Down
3 changes: 2 additions & 1 deletion src/aind_data_access_api/rds_tables.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
"""Module to interface with the Relational Database"""

from typing import Optional, Union
from typing_extensions import Self

import pandas as pd
import sqlalchemy.engine
from pydantic import AliasChoices, Field, SecretStr, model_validator
from pydantic_settings import SettingsConfigDict
from sqlalchemy import create_engine, engine, text
from sqlalchemy.engine.cursor import CursorResult
from typing_extensions import Self

from aind_data_access_api.credentials import CoreCredentials

Expand Down
1 change: 1 addition & 0 deletions src/aind_data_access_api/secrets.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Module to access secrets and parameters"""

import json

import boto3
Expand Down
Loading
Loading