Skip to content

Commit

Permalink
Merge pull request #71 from databio/dev
Browse files Browse the repository at this point in the history
Release 0.10.0
  • Loading branch information
khoroshevskyi authored Jan 3, 2025
2 parents 5138c2a + 1549194 commit 7433e04
Show file tree
Hide file tree
Showing 18 changed files with 3,630 additions and 176 deletions.
18 changes: 10 additions & 8 deletions .github/workflows/run-pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
pytest:
strategy:
matrix:
python-version: ["3.9", "3.11"]
python-version: ["3.9", "3.12"]
os: [ubuntu-latest] # can't use macOS when using service containers or container jobs
runs-on: ${{ matrix.os }}
services:
Expand All @@ -27,21 +27,23 @@ jobs:
- 5432:5432
options: --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 5
steps:
- uses: actions/checkout@v2

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

- name: Install uv
run: pip install uv

- name: Install dev dependencies
run: if [ -f requirements/requirements-dev.txt ]; then pip install -r requirements/requirements-dev.txt; fi
run: if [ -f requirements/requirements-dev.txt ]; then uv pip install -r requirements/requirements-dev.txt --system; fi

- name: Install test dependencies
run: if [ -f requirements/requirements-test.txt ]; then pip install -r requirements/requirements-test.txt; fi
run: if [ -f requirements/requirements-test.txt ]; then uv pip install -r requirements/requirements-test.txt --system; fi

- name: Install package
run: python -m pip install .
run: uv pip install . --system

- name: Run pytest tests
run: pytest tests -x -vv
2 changes: 1 addition & 1 deletion bbconf/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.9.0"
__version__ = "0.10.0"
20 changes: 18 additions & 2 deletions bbconf/bbagent.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import logging

from functools import cached_property
from pathlib import Path
from typing import List, Union
Expand All @@ -12,19 +14,26 @@
from bbconf.modules.bedsets import BedAgentBedSet
from bbconf.modules.objects import BBObjects

from .const import PKG_NAME

_LOGGER = logging.getLogger(PKG_NAME)


class BedBaseAgent(object):
def __init__(
self,
config: Union[Path, str],
init_ml: bool = True,
):
"""
Initialize connection to the pep_db database. You can use The basic connection parameters
Initialize connection to the pep_db database. You can use the basic connection parameters
or libpq connection string.
:param config: path to the configuration file
:param init_ml: initialize ML models for search (default: True)
"""

self.config = BedBaseConfig(config)
self.config = BedBaseConfig(config, init_ml)

self._bed = BedAgentBedFile(self.config, self)
self._bedset = BedAgentBedSet(self.config)
Expand All @@ -42,6 +51,13 @@ def bedset(self) -> BedAgentBedSet:
def objects(self) -> BBObjects:
return self._objects

def __repr__(self) -> str:
repr = f"BedBaseAgent(config={self.config})"
repr += f"\n{self.bed}"
repr += f"\n{self.bedset}"
repr += f"\n{self.objects}"
return repr

def get_stats(self) -> StatsReturn:
"""
Get statistics for a bed file
Expand Down
67 changes: 54 additions & 13 deletions bbconf/config_parser/bedbaseconfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,17 +41,38 @@
_LOGGER = logging.getLogger(PKG_NAME)


class BedBaseConfig:
def __init__(self, config: Union[Path, str]):
class BedBaseConfig(object):
"""
Class to handle BEDbase configuration file and create objects for different modules.
"""

def __init__(self, config: Union[Path, str], init_ml: bool = True):
"""
Initialize BedBaseConfig object
:param config: path to the configuration file
:param init_ml: initialize machine learning models used for search
"""

self.cfg_path = get_bedbase_cfg(config)
self._config = self._read_config_file(self.cfg_path)

self._db_engine = self._init_db_engine()

self._qdrant_engine = self._init_qdrant_backend()
self._qdrant_text_engine = self._init_qdrant_text_backend()
self._b2bsi = self._init_b2bsi_object()
self._r2v = self._init_r2v_object()
self._bivec = self._init_bivec_object()

if init_ml:
self._b2bsi = self._init_b2bsi_object()
self._r2v = self._init_r2v_object()
self._bivec = self._init_bivec_object()
else:
_LOGGER.info(
f"Skipping initialization of ML models, init_ml parameter set to False."
)

self._b2bsi = None
self._r2v = None
self._bivec = None

self._phc = self._init_pephubclient()
self._boto3_client = self._init_boto3_client()
Expand Down Expand Up @@ -182,6 +203,11 @@ def zarr_root(self) -> Union[Z_GROUP, None]:
return zarr.group(store=cache, overwrite=False)

def _init_db_engine(self) -> BaseEngine:
"""
Create database engine object using credentials provided in config file
"""

_LOGGER.info(f"Initializing database engine...")
return BaseEngine(
host=self._config.database.host,
port=self._config.database.port,
Expand All @@ -197,6 +223,8 @@ def _init_qdrant_backend(self) -> QdrantBackend:
:return: QdrantClient
"""

_LOGGER.info(f"Initializing qdrant engine...")
try:
return QdrantBackend(
collection=self._config.qdrant.file_collection,
Expand All @@ -210,19 +238,27 @@ def _init_qdrant_backend(self) -> QdrantBackend:
f"error in Connection to qdrant! skipping... Error: {err}", UserWarning
)

def _init_qdrant_text_backend(self) -> QdrantBackend:
def _init_qdrant_text_backend(self) -> Union[QdrantBackend, None]:
"""
Create qdrant client text embedding object using credentials provided in config file
:return: QdrantClient
"""

return QdrantBackend(
dim=TEXT_EMBEDDING_DIMENSION,
collection=self.config.qdrant.text_collection,
qdrant_host=self.config.qdrant.host,
qdrant_api_key=self.config.qdrant.api_key,
)
_LOGGER.info(f"Initializing qdrant text engine...")
try:
return QdrantBackend(
dim=TEXT_EMBEDDING_DIMENSION,
collection=self.config.qdrant.text_collection,
qdrant_host=self.config.qdrant.host,
qdrant_api_key=self.config.qdrant.api_key,
)
except Exception as e:
_LOGGER.error(f"Error in Connection to qdrant text! skipping {e}")
warnings.warn(
"Error in Connection to qdrant text! skipping...", UserWarning
)
return None

def _init_bivec_object(self) -> Union[BiVectorSearchInterface, None]:
"""
Expand All @@ -231,9 +267,11 @@ def _init_bivec_object(self) -> Union[BiVectorSearchInterface, None]:
:return: BiVectorSearchInterface
"""

_LOGGER.info(f"Initializing BiVectorBackend...")
search_backend = BiVectorBackend(
metadata_backend=self._qdrant_text_engine, bed_backend=self._qdrant_engine
)
_LOGGER.info(f"Initializing BiVectorSearchInterface...")
search_interface = BiVectorSearchInterface(
backend=search_backend,
query2vec=self.config.path.text2vec,
Expand All @@ -247,6 +285,7 @@ def _init_b2bsi_object(self) -> Union[BED2BEDSearchInterface, None]:
:return: Bed2BEDSearchInterface object
"""
try:
_LOGGER.info(f"Initializing search interfaces...")
return BED2BEDSearchInterface(
backend=self.qdrant_engine,
query2vec=BED2Vec(model=self._config.path.region2vec),
Expand All @@ -267,6 +306,7 @@ def _init_pephubclient() -> Union[PEPHubClient, None]:
:return: PephubClient
"""
try:
_LOGGER.info(f"Initializing PEPHub client...")
return PEPHubClient()
except Exception as e:
_LOGGER.error(f"Error in creating PephubClient object: {e}")
Expand Down Expand Up @@ -298,6 +338,7 @@ def _init_r2v_object(self) -> Union[Region2VecExModel, None]:
Create Region2VecExModel object using credentials provided in config file
"""
try:
_LOGGER.info(f"Initializing R2V object...")
return Region2VecExModel(self.config.path.region2vec)
except Exception as e:
_LOGGER.error(f"Error in creating Region2VecExModel object: {e}")
Expand Down
16 changes: 14 additions & 2 deletions bbconf/db_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,9 @@ class Bed(Base):
ref_classifier: Mapped["GenomeRefStats"] = relationship(
"GenomeRefStats", back_populates="bed", cascade="all, delete-orphan"
)
processed: Mapped[bool] = mapped_column(
default=False, comment="Whether the bed file was processed"
)


class BedMetadata(Base):
Expand Down Expand Up @@ -255,6 +258,11 @@ class Files(Base):
bedfile: Mapped["Bed"] = relationship("Bed", back_populates="files")
bedset: Mapped["BedSets"] = relationship("BedSets", back_populates="files")

__table_args__ = (
UniqueConstraint("name", "bedfile_id"),
UniqueConstraint("name", "bedset_id"),
)


class BedFileBedSetRelation(Base):
__tablename__ = "bedfile_bedset_relation"
Expand Down Expand Up @@ -303,6 +311,10 @@ class BedSets(Base):
author: Mapped[str] = mapped_column(nullable=True, comment="Author of the bedset")
source: Mapped[str] = mapped_column(nullable=True, comment="Source of the bedset")

processed: Mapped[bool] = mapped_column(
default=False, comment="Whether the bedset was processed"
)


class Universes(Base):
__tablename__ = "universes"
Expand Down Expand Up @@ -339,7 +351,7 @@ class TokenizedBed(Base):
nullable=False,
)
universe_id: Mapped[str] = mapped_column(
ForeignKey("universes.id", ondelete="CASCADE", passive_deletes=True),
ForeignKey("universes.id", ondelete="CASCADE"),
primary_key=True,
index=True,
nullable=False,
Expand All @@ -352,7 +364,7 @@ class TokenizedBed(Base):
universe: Mapped["Universes"] = relationship(
"Universes",
back_populates="tokenized",
passive_deletes=True,
passive_deletes="all",
)


Expand Down
6 changes: 6 additions & 0 deletions bbconf/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,3 +83,9 @@ class QdrantInstanceNotInitializedError(BedBaseConfError):
"""Error type for missing qdrant instance"""

pass


class BedSetTrackHubLimitError(BedBaseConfError):
"""Limit for visualizing trackhub exceeded"""

pass
2 changes: 2 additions & 0 deletions bbconf/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ def get_bedbase_cfg(cfg: str = None) -> str:
Optional, the $BEDBASE config env var will be used if not provided
:return str: absolute configuration file path
"""

_LOGGER.info(f"Loading configuration file: {cfg}")
selected_cfg = select_config(config_filepath=cfg, config_env_vars=CFG_ENV_VARS)
if not selected_cfg:
raise BedBaseConnectionError(
Expand Down
12 changes: 9 additions & 3 deletions bbconf/models/bed_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ class BedStatsModel(BaseModel):


class BedPEPHub(BaseModel):
sample_name: str
sample_name: str = ""
genome: str = ""
organism: str = ""
species_id: str = ""
Expand Down Expand Up @@ -192,8 +192,8 @@ class BedListResult(BaseModel):

class QdrantSearchResult(BaseModel):
id: str
payload: dict
score: float
payload: dict = None
score: float = None
metadata: Union[BedMetadataBasic, None] = None


Expand Down Expand Up @@ -233,3 +233,9 @@ class RefGenValidModel(BaseModel):
tier_ranking: int

model_config = ConfigDict(extra="forbid")


class RefGenValidReturnModel(BaseModel):
id: str
provided_genome: Union[str, None] = None
compared_genome: List[RefGenValidModel]
Loading

0 comments on commit 7433e04

Please sign in to comment.