Skip to content

Commit

Permalink
Dom update chemicals (#140)
Browse files Browse the repository at this point in the history
* fixed #136 
* fixed #138
  • Loading branch information
terazus authored Jan 22, 2024
1 parent 7664fdf commit c4e0d76
Show file tree
Hide file tree
Showing 9 changed files with 148 additions and 14 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,5 @@ credentials.json
.env
.env.prod
envv/*
ptmd.db
ptmd.db
scripts/
14 changes: 6 additions & 8 deletions ptmd/boot/file_parsers/parse_chemicals.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"""
from __future__ import annotations

from pandas import read_excel, DataFrame
from pandas import read_csv, DataFrame

from ptmd.const import CHEMICALS_FILEPATH, BASE_IDENTIFIER

Expand All @@ -13,14 +13,12 @@ def parse_chemicals() -> list[dict]:
:return: A list of chemicals.
"""
chemicals: list[dict] = []
chemicals_dataframe: DataFrame = read_excel(CHEMICALS_FILEPATH,
engine='openpyxl',
sheet_name="SUMMARY table of CHEMICALS")
chemicals_dataframe: DataFrame = read_csv(CHEMICALS_FILEPATH, sep=",", encoding='utf-8')
for compound in chemicals_dataframe.itertuples():
chemicals.append({
'common_name': compound.Compound.replace('"', ''),
'ptx_code': int(compound._2.replace('"', '').replace(BASE_IDENTIFIER, '')),
'formula': compound.Formula.replace('"', ''),
'cas': compound._5.replace('"', '').split('\n')[0]
'common_name': compound.compound_name_user.replace('\xa0', ''),
'ptx_code': int(compound.ptx_code.replace(BASE_IDENTIFIER, '')),
'formula': compound.formula,
'cas': compound.cas_neutral
})
return chemicals
2 changes: 1 addition & 1 deletion ptmd/const/directories.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
SCHEMAS_PATH: str = path.join(DATA_PATH, 'schemas')
EXPOSURE_INFORMATION_SCHEMA_FILEPATH: str = path.join(SCHEMAS_PATH, 'exposure_information_sheet_schema.json')
PARTNERS_LONGNAME_PATH: str = path.join(DATA_PATH, 'data', 'partners.json')
CHEMICALS_FILEPATH: str = path.join(DATA_PATH, 'data', 'chemicals.xlsx')
CHEMICALS_FILEPATH: str = path.join(DATA_PATH, 'data', 'ptx_chem_database_chemicals.csv')
ORGANISMS_FILEPATH: str = path.join(DATA_PATH, 'data', 'organisms.json')
DOWNLOAD_DIRECTORY_PATH: str = path.join(DATA_PATH, 'downloads')

Expand Down
6 changes: 3 additions & 3 deletions ptmd/lib/isa/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@


class Batch2ISA:
""" Class for converting a batch of PTMD data to ISA-Tab format.
""" Class for converting a batch of PTMD data to ISA-json format.
:param file: The file to convert.
"""
Expand All @@ -53,7 +53,7 @@ def convert(self) -> list[dict]:
:return: A list of dictionaries containing the ISA investigations.
"""
study: Study = Study(
filename=self.filename,
filename=f's_{self.filename.replace(".xlsx", ".txt")}',
sources=[self.blank_source],
characteristic_categories=[ORGANISM_OA, SEX_OA, REPLICATE_OA, BOX_OA, POSITION_OA],
units=[HOURS_OA]
Expand Down Expand Up @@ -235,7 +235,7 @@ def create_source(self, sample_identifier: str) -> Source:
:param sample_identifier: The identifier of the sample.
:return: A source.
"""
source_name: str = f"Source of sample {sample_identifier}"
source_name: str = f"{sample_identifier}_source"

if 'Drosophila_melanogaster' not in self.organism_name:
return Source(name=source_name, characteristics=[
Expand Down
Binary file removed ptmd/resources/data/chemicals.xlsx
Binary file not shown.
3 changes: 2 additions & 1 deletion ptmd/resources/data/partners.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,6 @@
"NIH": "National Institutes of Health",
"CLU": "Clemson University Research Foundation",
"LEITAT": "Acondicionatment tarrasense Associacion",
"UOB": "University of Birmingham"
"UOB": "University of Birmingham",
"MGI": "MGI Tech Co., Ltd."
}
133 changes: 133 additions & 0 deletions ptmd/resources/data/ptx_chem_database_chemicals.csv

Large diffs are not rendered by default.

Binary file removed ptmd/resources/downloads/UOB_Daphnia_magna_BB.xlsx
Binary file not shown.
1 change: 1 addition & 0 deletions tests/test_boot/test_file_parsers/test_parse_chemicals.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ def test_parse_chemical(self):
for key in keys:
self.assertIn(key, chemical.keys())
self.assertEqual(type(chemical), dict)
self.assertIsInstance(chemical['ptx_code'], int)

0 comments on commit c4e0d76

Please sign in to comment.