diff --git a/.gitignore b/.gitignore index 457c9dcc..38e11ca6 100644 --- a/.gitignore +++ b/.gitignore @@ -140,3 +140,4 @@ postgres/ .idea environment/local.env qdrant_storage/ +postgres-data/ \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index bc164bb3..6c942b7a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -39,6 +39,7 @@ COPY . /app COPY --from=build /src/dist web/dist/ RUN python -m pip install --upgrade pip -RUN pip install -r requirements/requirements-all.txt --no-cache-dir +RUN pip install -r requirements/requirements-all-docker.txt --no-cache-dir +RUN pip install torch --index-url https://download.pytorch.org/whl/cpu CMD ["uvicorn", "pephub.main:app", "--host", "0.0.0.0", "--port", "80"] diff --git a/README.md b/README.md index b123fa6c..c20d7acc 100644 --- a/README.md +++ b/README.md @@ -142,29 +142,18 @@ docker run -p 8000:8000 \ ### Option 2. `docker compose`: -The server has been Dockerized and packaged with a [postgres](https://hub.docker.com/_/postgres) image to be run with [`docker compose`](https://docs.docker.com/compose/). This lets you run everything at once and develop without having to manage database instances. The `docker-compose.yaml` file is written such that it mounts the database storage info to a folder called `postgres/` at the root of the repository. This lets you load the database once and have it persist its state after restarting the container. +The server has been Dockerized and packaged with a [postgres](https://hub.docker.com/_/postgres) image to be run with [`docker compose`](https://docs.docker.com/compose/). This lets you run everything at once and develop without having to manage database instances. -You can start a development environment in three steps: +You can start a development environment in two steps: -**1. Obtain the latest database schema:** - -```console -sh setup_db.sh -``` - -**2. Curate your environment:** +**1. Curate your environment:** Since we are running in `docker`, we need to supply environment variables to the container. The `docker-compose.yaml` file is written such that you can supply a `.env` file at the root with your configurations. See the [example env file](environment/template.env) for reference. See [here](docs/server-settings.md) for a detailed explanation of all configurable server settings. For now, you can simply copy the `env` file: ``` cp environment/template.env .env ``` -**3. Build and start the containers:** -If you are running on an Apple M1 chip, you will need to set the following env variable prior to running `docker compose`: - -```console -export DOCKER_DEFAULT_PLATFORM=linux/amd64 -``` +**2. Build and start the containers:** ```console docker compose up --build @@ -173,7 +162,7 @@ docker compose up --build `pephub` now runs/listens on http://localhost:8000 `postgres` now runs/listens on http://localhost:5432 -**3. Utilize the [`load_db`](scripts/load_db.py) script to populate the database with `examples/`:** +**3. (_Optional_) Utilize the [`load_db`](scripts/load_db.py) script to populate the database with `examples/`:** ```console cd scripts @@ -184,4 +173,19 @@ python load_db.py \ ../examples ``` +**4. (_Optional_) GitHub Authentication Client Setup** + +_pephub_ uses GitHub for namespacing and authentication. As such, a GitHub application capable of logging in users is required. We've [included instructions](https://github.com/pepkit/pephub/blob/master/docs/authentication.md#setting-up-github-oauth-for-your-own-server) for setting this up locally using your own GitHub account. + +**5. (_Optional_) Vector Database Setup** + +We've added [semantic-search](https://huggingface.co/course/chapter5/6?fw=tf#using-embeddings-for-semantic-search) capabilities to pephub. Optionally, you may host an instance of the [qdrant](https://qdrant.tech/) **vector database** to store embeddings computed using a sentence transformer that has mined and processed any relevant metadata from PEPs. If no qdrant connection settings are supplied, pephub will default to SQL search. Read more [here](docs/semantic-search.md). To run qdrant locally, simply run the following: + +``` +docker pull qdrant/qdrant +docker run -p 6333:6333 \ + -v $(pwd)/qdrant_storage:/qdrant/storage \ + qdrant/qdrant +``` + _Note: If you wish to run the development environment with a pubic database, curate your `.env` file as such._ diff --git a/docker-compose.yaml b/docker-compose.yaml index 8aed70b1..f05c2f1b 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -2,9 +2,7 @@ version: '3.8' services: db: - build: - context: ./postgres - dockerfile: Dockerfile + image: postgres:latest ports: - 5432:5432 networks: @@ -34,4 +32,4 @@ volumes: networks: pephub: - driver: bridge \ No newline at end of file + driver: bridge diff --git a/pephub/exceptions.py b/pephub/exceptions.py deleted file mode 100644 index 119edc78..00000000 --- a/pephub/exceptions.py +++ /dev/null @@ -1,2 +0,0 @@ -class PepHubException(Exception): - pass diff --git a/pephub/helpers.py b/pephub/helpers.py index 58ef3183..25deb4fe 100644 --- a/pephub/helpers.py +++ b/pephub/helpers.py @@ -2,115 +2,14 @@ from typing import List, Union, Tuple from fastapi import Response, UploadFile from fastapi.exceptions import HTTPException -from ubiquerg import VersionInHelpParser -from os.path import exists, basename -from yaml import safe_load +from os.path import basename import zipfile import io import yaml import peppy from peppy.const import SAMPLE_DF_KEY -from pephub.exceptions import PepHubException - -from ._version import __version__ as v -from .const import DEFAULT_PORT, PKG_NAME - - -def build_parser(): - """ - Building argument parser - :return argparse.ArgumentParser - """ - banner = "%(prog)s - PEP web server" - additional_description = ( - "For subcommand-specific options, type: '%(prog)s -h'" - ) - additional_description += "\nhttps://github.com/pepkit/pepserver" - - parser = VersionInHelpParser( - prog=PKG_NAME, description=banner, epilog=additional_description - ) - - parser.add_argument( - "-V", "--version", action="version", version="%(prog)s {v}".format(v=v) - ) - - msg_by_cmd = {"serve": "run the server"} - - subparsers = parser.add_subparsers(dest="command") - - def add_subparser(cmd, description): - return subparsers.add_parser(cmd, description=description, help=description) - - sps = {} - # add arguments that are common for both subparsers - for cmd, desc in msg_by_cmd.items(): - sps[cmd] = add_subparser(cmd, desc) - sps[cmd].add_argument( - "-c", - "--config", - required=False, - dest="config", - help="A path to the pepserver config file", - ) - - sps["serve"].add_argument( - "-p", - "--port", - dest="port", - type=int, - help="The port the webserver should be run on.", - default=DEFAULT_PORT, - ) - - sps["serve"].add_argument( - "-r", - "--reload", - dest="reload", - type=bool, - help="Run the server in reload configuration", - default=False, - ) - - sps["serve"].add_argument( - "--log-level", - dest="log_level", - type=str, - help="The level of logging to use", - default="INFO", - ) - - sps["serve"].add_argument( - "--uvicorn-log-level", - dest="uvicorn_log_level", - type=str, - help="The level of logging to use for uvicorn", - default="info", - ) - - return parser - - -def read_server_configuration(path: str) -> dict: - """Read in a server configuration file at a specified path""" - if not exists(path): - raise FileNotFoundError(f"Configuration file at {path} could not be found.") - with open(path, "r") as f: - cfg = safe_load(f) - if cfg.get("data") is None: - raise PepHubException( - "'data' section is required in the configuration file." - ) - if cfg["data"].get("path") is None: - raise PepHubException( - "No path to PEPs was specified in the configuration file." - ) - - return { - "data": {"path": cfg["data"]["path"], "index": cfg["data"].get("index")} - } def get_project_sample_names(proj: peppy.Project) -> List[str]: diff --git a/pephub/main.py b/pephub/main.py index 1e822835..cf236b6c 100644 --- a/pephub/main.py +++ b/pephub/main.py @@ -71,6 +71,7 @@ # mount ui app.add_middleware(SPA) + # app.add_middleware(EnvironmentMiddleware) try: app.mount("/", StaticFiles(directory=SPA_PATH, html=True), name="spa") diff --git a/pephub/route_examples.py b/pephub/route_examples.py deleted file mode 100644 index 738a3d4e..00000000 --- a/pephub/route_examples.py +++ /dev/null @@ -1,27 +0,0 @@ -# this file is for API documentation route examples. - -from fastapi import Path, Query -from pydantic import BaseModel - -# example for /pep/{namespace} -example_namespace = Path( - ..., description="A namespace that holds projects.", regex=r"^\w+$", example="demo" -) - -example_project = Path( - ..., - description="A project name inside a particular namespace", - example="BiocProject", -) - -# example for /pep/{namespace}/{pep}/convert -example_filter = Query( - ..., - description="A valid eido conversion filter type. See /eido/filters for a list of valid filters.", - example="basic", -) - - -class ValidationRequest(BaseModel): - namespace: str - project: str diff --git a/pephub/routers/api/v1/namespace.py b/pephub/routers/api/v1/namespace.py index 361c415a..090b96dc 100644 --- a/pephub/routers/api/v1/namespace.py +++ b/pephub/routers/api/v1/namespace.py @@ -287,7 +287,7 @@ async def upload_raw_pep( # This configurations needed due to Issue #124 Should be removed in the future project_dict = ProjectRawModel(**project_from_json.pep_dict.dict()) - ff = project_dict.dict(by_alias=True) + ff = project_dict.model_dump(by_alias=True) p_project = peppy.Project().from_dict(ff) p_project.name = name diff --git a/pephub/routers/api/v1/project.py b/pephub/routers/api/v1/project.py index 9bce4b1c..810b42ca 100644 --- a/pephub/routers/api/v1/project.py +++ b/pephub/routers/api/v1/project.py @@ -67,7 +67,7 @@ async def get_a_pep( raw_project = ProjectRawModel(**proj) except Exception: raise HTTPException(500, "Unexpected project error!") - return raw_project.dict(by_alias=False) + return raw_project.model_dump(by_alias=False) samples = [s.to_dict() for s in proj.samples] sample_table_index = proj.sample_table_index @@ -77,7 +77,7 @@ async def get_a_pep( sample_attributes = proj._samples[0]._attributes proj_dict = proj.to_dict() - proj_annotation_dict = proj_annotation.dict() + proj_annotation_dict = proj_annotation.model_dump() # default to name from annotation if hasattr(proj, "name") and hasattr(proj_annotation, "name"): @@ -242,7 +242,7 @@ async def update_a_pep( # update "meta meta data" update_dict = {} # dict used to pass to the `db.update_item` function - for k, v in updated_project.dict(exclude_unset=True).items(): + for k, v in updated_project.model_dump(exclude_unset=True).items(): # is the value an attribute of the peppy project? if k in new_raw_project: new_raw_project[k] = v @@ -277,7 +277,7 @@ async def update_a_pep( # "project": raw_peppy_project, "registry": f"{namespace}/{project}:{tag}", "api_endpoint": f"/api/v1/namespaces/{namespace}/{project}", - "project": updated_project.dict(), + "project": updated_project.model_dump(), }, status_code=202, ) diff --git a/pephub/routers/models.py b/pephub/routers/models.py index f0ac6f37..86eeb262 100644 --- a/pephub/routers/models.py +++ b/pephub/routers/models.py @@ -1,5 +1,5 @@ from typing import Optional, List -from pydantic import BaseModel, Field, Extra +from pydantic import BaseModel, Field, ConfigDict from pepdbagent.models import UpdateItems from pepdbagent.const import DEFAULT_TAG @@ -8,13 +8,12 @@ class ProjectOptional(UpdateItems): # sample table is a list of JSON objects - sample_table: Optional[List[dict]] - project_config_yaml: Optional[str] - description: Optional[str] - subsample_tables: Optional[List[List[dict]]] + sample_table: Optional[List[dict]] = None + project_config_yaml: Optional[str] = None + description: Optional[str] = None + subsample_tables: Optional[List[List[dict]]] = None - class Config: - allow_population_by_field_name = True + model_config = ConfigDict(populate_by_name=True) class SearchQuery(BaseModel): @@ -27,7 +26,7 @@ class SearchQuery(BaseModel): class RawValidationQuery(BaseModel): project_config: str - sample_table: Optional[str] + sample_table: Optional[str] = None class TokenExchange(BaseModel): @@ -60,21 +59,18 @@ class JWTDeviceTokenResponse(BaseModel): class ProjectRawModel(BaseModel): config: dict = Field(alias="_config") - subsample_list: Optional[list] = Field(alias="_subsample_list") + subsample_list: Optional[list] = Field(alias="_subsample_list", default=None) sample_list: list[dict] = Field(alias="_sample_dict") - class Config: - allow_population_by_field_name = True + model_config = ConfigDict(populate_by_name=True) class ProjectRawRequest(BaseModel): config: dict - subsample_list: Optional[List[List[dict]]] + subsample_list: Optional[List[List[dict]]] = None sample_list: List[dict] - class Config: - allow_population_by_field_name = True - extra = Extra.allow + model_config = ConfigDict(populate_by_name=True, extra="allow") class ProjectJsonRequest(BaseModel): diff --git a/requirements/requirements-all-docker.txt b/requirements/requirements-all-docker.txt new file mode 100644 index 00000000..3c75c7d7 --- /dev/null +++ b/requirements/requirements-all-docker.txt @@ -0,0 +1,19 @@ +fastapi>=0.72.0 +aiofiles +eido +logmuse>=0.2.7 +jinja2>=3.1.2 +yacman>=0.8.4 +python-multipart>=0.0.5 +tqdm +uvicorn +python-dotenv +pepdbagent>=0.6.0 +peppy<=0.40.0a4 +qdrant-client +requests +aiofiles +pyjwt[crypto] +coloredlogs +-f https://download.pytorch.org/whl/torch_stable.html +sentence-transformers