Skip to content

Commit

Permalink
Updated bedms to new version + stability improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
khoroshevskyi committed Dec 3, 2024
1 parent aed8866 commit abe3de0
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 15 deletions.
2 changes: 1 addition & 1 deletion pephub/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.14.1"
__version__ = "0.14.2"
4 changes: 4 additions & 0 deletions pephub/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,3 +142,7 @@
ARCHIVE_URL_PATH = "https://cloud2.databio.org/pephub/"

MAX_PROCESSED_PROJECT_SIZE = 5000

MAX_STANDARDIZED_PROJECT_SIZE = 100

BEDMS_REPO_URL = "databio/attribute-standardizer-model6"
43 changes: 30 additions & 13 deletions pephub/routers/api/v1/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,11 @@
ConfigResponseModel,
StandardizerResponse,
)
from ....const import MAX_PROCESSED_PROJECT_SIZE
from ....const import (
MAX_PROCESSED_PROJECT_SIZE,
BEDMS_REPO_URL,
MAX_STANDARDIZED_PROJECT_SIZE,
)
from .helpers import verify_updated_project

from bedms import AttrStandardizer
Expand Down Expand Up @@ -1182,33 +1186,46 @@ def delete_full_history(
response_model=StandardizerResponse,
)
async def get_standardized_cols(
pep: peppy.Project = Depends(get_project),
pep: dict = Depends(get_project),
schema: str = "",
):
"""
Standardize PEP metadata column headers using BEDmess.
Standardize PEP metadata column headers using BEDms.
:param namespace: pep: PEP string to be standardized
:param pep: PEP string to be standardized
:param schema: Schema for AttrStandardizer
:return dict: Standardized results
"""

if schema == "":
if schema == "" or schema not in ["ENCODE", "BEDBASE", "FAIRTRACKS"]:
raise HTTPException(
code=500,
detail="Schema is required! Available schemas are ENCODE and Fairtracks",
status_code=404,
detail="Schema not available! Available schemas are ENCODE, BEDBASE and FAIRTRACKS.",
)

if len(pep["_sample_dict"]) > MAX_STANDARDIZED_PROJECT_SIZE:
# raise HTTPException(
# status_code=400,
# detail=f"Project is too large. Cannot standardize. "
# f"Limit is {MAX_STANDARDIZED_PROJECT_SIZE} samples.",
# )
prj = peppy.Project.from_dict(
{
"_config": pep["_config"],
"_sample_dict": pep["_sample_dict"][:50],
}
)
return {}

prj = peppy.Project.from_dict(pep)
model = AttrStandardizer(schema)
else:
prj = peppy.Project.from_dict(pep)
model = AttrStandardizer(repo_id=BEDMS_REPO_URL, model_name=schema.lower())

try:
results = model.standardize(pep=prj)
except Exception:
except Exception as e:
_LOGGER.error(f"Error standardizing PEP. {e}")
raise HTTPException(
code=400,
status_code=400,
detail=f"Error standardizing PEP.",
)

Expand Down
2 changes: 1 addition & 1 deletion requirements/requirements-all.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@ fastembed
numpy<2.0.0
slowapi
cachetools>=4.2.4
bedms>=0.1.0
bedms>=0.2.0

0 comments on commit abe3de0

Please sign in to comment.