Skip to content

Commit

Permalink
Merge pull request #143 from wilhelm-lab/fix/filter_search_result_col…
Browse files Browse the repository at this point in the history
…umns

Fix/filter search result columns
  • Loading branch information
picciama authored Aug 8, 2024
2 parents a2234c4 + 79bad67 commit ab21a01
Show file tree
Hide file tree
Showing 9 changed files with 41 additions and 14 deletions.
2 changes: 1 addition & 1 deletion .cookietemple.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,5 @@ full_name: Mario Picciani
email: [email protected]
project_name: spectrum_io
project_short_description: IO related functionalities for oktoberfest.
version: 0.6.0
version: 0.6.1
license: MIT
4 changes: 2 additions & 2 deletions .github/release-drafter.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name-template: "0.6.0 🌈" # <<COOKIETEMPLE_FORCE_BUMP>>
tag-template: 0.6.0 # <<COOKIETEMPLE_FORCE_BUMP>>
name-template: "0.6.1 🌈" # <<COOKIETEMPLE_FORCE_BUMP>>
tag-template: 0.6.1 # <<COOKIETEMPLE_FORCE_BUMP>>
exclude-labels:
- "skip-changelog"

Expand Down
2 changes: 1 addition & 1 deletion cookietemple.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.6.0
current_version = 0.6.1

[bumpversion_files_whitelisted]
init_file = spectrum_io/__init__.py
Expand Down
4 changes: 2 additions & 2 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,9 @@
# the built documents.
#
# The short X.Y version.
version = "0.6.0"
version = "0.6.1"
# The full version, including alpha/beta/rc tags.
release = "0.6.0"
release = "0.6.1"

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "spectrum_io"
version = "0.6.0" # <<COOKIETEMPLE_FORCE_BUMP>>
version = "0.6.1" # <<COOKIETEMPLE_FORCE_BUMP>>
description = "IO related functionalities for oktoberfest."
authors = ["Wilhelmlab at Technical University of Munich"]
license = "MIT"
Expand Down
2 changes: 1 addition & 1 deletion spectrum_io/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
__author__ = """The Oktoberfest development team (Wilhelmlab at Technical University of Munich)"""
__copyright__ = f"Copyright {datetime.now():%Y}, Wilhelmlab at Technical University of Munich"
__license__ = "MIT"
__version__ = "0.6.0"
__version__ = "0.6.1"

import logging
import logging.handlers
Expand Down
2 changes: 1 addition & 1 deletion spectrum_io/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@


@click.command()
@click.version_option(version="0.6.0", message=click.style("spectrum_io Version: 0.6.0"))
@click.version_option(version="0.6.1", message=click.style("spectrum_io Version: 0.6.1"))
def main() -> None:
"""spectrum_io."""

Expand Down
16 changes: 15 additions & 1 deletion spectrum_io/search_result/maxquant.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,21 @@ def read_result(
parsed_mods["^_"] = f"_{unimod_tag}-"

logger.info("Reading msms.txt file")
self.results = pd.read_csv(self.path / "msms.txt", sep="\t")
self.results = pd.read_csv(
self.path / "msms.txt",
usecols=[
"Raw file",
"Scan number",
"Modified sequence",
"Charge",
"Scan event number",
"Mass", # = Calculated Precursor mass; TODO get column with experimental Precursor mass instead
"Score",
"Reverse",
"Proteins",
],
sep="\t",
)

logger.info("Finished reading msms.txt file")

Expand Down
21 changes: 17 additions & 4 deletions spectrum_io/search_result/search_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,20 @@
logger = logging.getLogger(__name__)


COLUMNS = [
"RAW_FILE",
"SCAN_NUMBER",
"MODIFIED_SEQUENCE",
"PRECURSOR_CHARGE",
"MASS",
"SCORE",
"REVERSE",
"SEQUENCE",
"PEPTIDE_LENGTH",
"PROTEINS",
]


def filter_valid_prosit_sequences(df: pd.DataFrame) -> pd.DataFrame:
"""
Filter valid Prosit sequences.
Expand All @@ -24,7 +38,6 @@ def filter_valid_prosit_sequences(df: pd.DataFrame) -> pd.DataFrame:
# remove unsupported mods to exclude
supported_pattern = re.compile(r"^(?:\[UNIMOD:\d+\]\-)?(?:[ACDEFGHIKLMNPQRSTVWY]+(?:\[UNIMOD:\d+\])?)*$")
df = df[df["MODIFIED_SEQUENCE"].str.match(supported_pattern)]

# remove precursor charges greater than 6
df = df[df["PRECURSOR_CHARGE"] <= 6]
logger.info(f"#sequences after filtering for valid prosit sequences: {len(df.index)}")
Expand Down Expand Up @@ -122,8 +135,8 @@ def generate_internal(
"""
if out_path is None:
# convert and return
return self.read_result(tmt_label, custom_mods=custom_mods)

filtered_df = self.read_result(tmt_label, custom_mods=custom_mods)
return filtered_df[COLUMNS]
if isinstance(out_path, str):
out_path = Path(out_path)

Expand All @@ -134,7 +147,7 @@ def generate_internal(
return csv.read_file(out_path)

# convert, save and return
df = self.read_result(tmt_label, custom_mods=custom_mods)
df = self.read_result(tmt_label, custom_mods=custom_mods)[COLUMNS]
csv.write_file(df, out_path)
return df

Expand Down

0 comments on commit ab21a01

Please sign in to comment.