Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix/filename not in msms #137

Merged
merged 4 commits into from
Oct 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions oktoberfest/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,18 @@

import logging.handlers
import sys
import time

from oktoberfest import plotting as pl
from oktoberfest import predict as pr
from oktoberfest import preprocessing as pp
from oktoberfest import rescore as re

from . import runner

CONSOLE_LOG_LEVEL = logging.INFO
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)

if len(logger.handlers) == 0:
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(name)s::%(funcName)s %(message)s")
converter = time.gmtime
# add console handler
console_handler = logging.StreamHandler(sys.stdout)
console_handler.setLevel(CONSOLE_LOG_LEVEL)
Expand All @@ -37,6 +33,6 @@
error_handler.setFormatter(formatter)
logger.addHandler(error_handler)
else:
logger.info("Logger already initizalized. Resuming normal operation.")
logger.info("Logger already initialized. Resuming normal operation.")

Check warning on line 36 in oktoberfest/__init__.py

View check run for this annotation

Codecov / codecov/patch

oktoberfest/__init__.py#L36

Added line #L36 was not covered by tests

sys.modules.update({f"{__name__}.{m}": globals()[m] for m in ["pl", "pp", "pr", "re"]})
4 changes: 1 addition & 3 deletions oktoberfest/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from rich import traceback

from oktoberfest import __copyright__, __version__, logger, runner
from oktoberfest import runner

Check warning on line 5 in oktoberfest/__main__.py

View check run for this annotation

Codecov / codecov/patch

oktoberfest/__main__.py#L5

Added line #L5 was not covered by tests

"""triqler.__main__: executed when bootstrap directory is called as script."""

Expand All @@ -28,8 +28,6 @@

def main():
"""Execution of oktoberfest from terminal."""
logger.info(f"Oktoberfest version {__version__}\n{__copyright__}")

args = _parse_args()
runner.run_job(args.config_path)

Expand Down
27 changes: 21 additions & 6 deletions oktoberfest/preprocessing/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,23 +281,28 @@
search_results: pd.DataFrame,
output_dir: Union[str, Path],
filenames: Optional[List[str]] = None,
):
) -> List[str]:
"""
Split search results by spectrum file.

Given a list of spectrum filenames from which search results originate the provided search results are split
and filename specific csv files are written to the provided output directory. The provided filenames need to
Given a list of spectrum file names from which search results originate the provided search results are split
and filename specific csv files are written to the provided output directory. The provided file names need to
correspond to the spectrum file identifier in the "RAW_FILE" column of the provided search results. The search
results need to be provided in internal format (see :doc:`../../internal_format`).
If the list of filenames is not provided, all spectrum file identifiers are considered, otherwise only the
If the list of file names is not provided, all spectrum file identifiers are considered, otherwise only the
identifiers found in the list are taken into account for writing the individual csv files.
The output filenames follow the convention <filename>.rescore.
The output file names follow the convention <filename>.rescore.
If a file name is not found in the search results, it is ignored and a warning is printed.
The function returns a list of file names for which search results are available, removing the ones that were
ignored if a list of file names was provided.

:param search_results: search results in internal format
:param output_dir: directory in which to store individual csv files containing the search results for
individual filenames
:param filenames: optional list of spectrum filenames that should be considered. If not provided, all spectrum file
identifiers in the search results are considered.

:return: list of file names for which search results could be found
"""
if isinstance(output_dir, str):
output_dir = Path(output_dir)
Expand All @@ -308,10 +313,20 @@

grouped_search_results = search_results.groupby("RAW_FILE")

filenames_found = []

Check warning on line 316 in oktoberfest/preprocessing/preprocessing.py

View check run for this annotation

Codecov / codecov/patch

oktoberfest/preprocessing/preprocessing.py#L316

Added line #L316 was not covered by tests
for filename in filenames:
output_file = (output_dir / filename).with_suffix(".rescore")
logger.info(f"Creating split msms.txt file {output_file}")
grouped_search_results.get_group(filename).to_csv(output_file)
try:
grouped_search_results.get_group(filename).to_csv(output_file)
filenames_found.append(filename)
except KeyError:
logger.warning(

Check warning on line 324 in oktoberfest/preprocessing/preprocessing.py

View check run for this annotation

Codecov / codecov/patch

oktoberfest/preprocessing/preprocessing.py#L320-L324

Added lines #L320 - L324 were not covered by tests
f"The search results do not contain search results for the provided file name {filename}. "
"If this is not intended, please verify that the file names are written correctly in the "
f"search results. {filename} is ignored."
)
return filenames_found

Check warning on line 329 in oktoberfest/preprocessing/preprocessing.py

View check run for this annotation

Codecov / codecov/patch

oktoberfest/preprocessing/preprocessing.py#L329

Added line #L329 was not covered by tests


def merge_spectra_and_peptides(spectra: pd.DataFrame, search: pd.DataFrame) -> Spectra:
Expand Down
54 changes: 42 additions & 12 deletions oktoberfest/runner.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import datetime
import json
import logging
from pathlib import Path
from typing import List, Type, Union

from spectrum_io.spectral_library import MSP, DLib, SpectralLibrary, Spectronaut

from oktoberfest import __copyright__, __version__
from oktoberfest import plotting as pl
from oktoberfest import predict as pr
from oktoberfest import preprocessing as pp
Expand All @@ -15,7 +18,7 @@
logger = logging.getLogger(__name__)


def _preprocess(spectra_files: List[Path], config: Config):
def _preprocess(spectra_files: List[Path], config: Config) -> List[Path]:
preprocess_search_step = ProcessStep(config.output, "preprocessing_search")
if not preprocess_search_step.is_done():
# load search results
Expand All @@ -42,12 +45,21 @@
search_results = pp.filter_peptides_for_model(peptides=search_results, model=config.models["intensity"])

# split search results
pp.split_search(
filenames_found = pp.split_search(
search_results=search_results,
output_dir=config.output / "msms",
filenames=[spectra_file.stem for spectra_file in spectra_files],
)
preprocess_search_step.mark_done()
else:
filenames_found = [msms_file.stem for msms_file in (config.output / "msms").glob("*rescore")]

spectra_files_to_return = []

Check warning on line 57 in oktoberfest/runner.py

View check run for this annotation

Codecov / codecov/patch

oktoberfest/runner.py#L57

Added line #L57 was not covered by tests
for spectra_file in spectra_files:
if spectra_file.stem in filenames_found:
spectra_files_to_return.append(spectra_file)

Check warning on line 60 in oktoberfest/runner.py

View check run for this annotation

Codecov / codecov/patch

oktoberfest/runner.py#L60

Added line #L60 was not covered by tests

return spectra_files_to_return

Check warning on line 62 in oktoberfest/runner.py

View check run for this annotation

Codecov / codecov/patch

oktoberfest/runner.py#L62

Added line #L62 was not covered by tests


def _annotate_and_get_library(spectra_file: Path, config: Config) -> Spectra:
Expand Down Expand Up @@ -226,7 +238,7 @@
proc_dir = config.output / "proc"
proc_dir.mkdir(parents=True, exist_ok=True)

_preprocess(spectra_files, config)
spectra_files = _preprocess(spectra_files, config)

Check warning on line 241 in oktoberfest/runner.py

View check run for this annotation

Codecov / codecov/patch

oktoberfest/runner.py#L241

Added line #L241 was not covered by tests

processing_pool = JobPool(processes=config.num_threads)

Expand Down Expand Up @@ -292,7 +304,7 @@
proc_dir = config.output / "proc"
proc_dir.mkdir(parents=True, exist_ok=True)

_preprocess(spectra_files, config)
spectra_files = _preprocess(spectra_files, config)

Check warning on line 307 in oktoberfest/runner.py

View check run for this annotation

Codecov / codecov/patch

oktoberfest/runner.py#L307

Added line #L307 was not covered by tests

processing_pool = JobPool(processes=config.num_threads)

Expand Down Expand Up @@ -360,11 +372,29 @@
conf.check()
job_type = conf.job_type

if job_type == "SpectralLibraryGeneration":
generate_spectral_lib(config_path)
elif job_type == "CollisionEnergyCalibration":
run_ce_calibration(config_path)
elif job_type == "Rescoring":
run_rescoring(config_path)
else:
raise ValueError(f"Unknown job_type in config: {job_type}")
# add file handler to root logger
base_logger = logging.getLogger()
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(name)s::%(funcName)s %(message)s")
suffix = datetime.datetime.now().strftime("%Y_%m_%d-%H_%M_%S")
logging_output = conf.output / f"{job_type}_{suffix}.log"
file_handler = logging.FileHandler(filename=logging_output)
file_handler.setLevel(logging.DEBUG)
file_handler.setFormatter(formatter)
base_logger.addHandler(file_handler)

Check warning on line 383 in oktoberfest/runner.py

View check run for this annotation

Codecov / codecov/patch

oktoberfest/runner.py#L376-L383

Added lines #L376 - L383 were not covered by tests

logger.info(f"Oktoberfest version {__version__}\n{__copyright__}")
logger.info("Job executed with the following config:")
logger.info(json.dumps(conf.data, indent=4))

Check warning on line 387 in oktoberfest/runner.py

View check run for this annotation

Codecov / codecov/patch

oktoberfest/runner.py#L385-L387

Added lines #L385 - L387 were not covered by tests

try:

Check warning on line 389 in oktoberfest/runner.py

View check run for this annotation

Codecov / codecov/patch

oktoberfest/runner.py#L389

Added line #L389 was not covered by tests
if job_type == "SpectralLibraryGeneration":
generate_spectral_lib(config_path)

Check warning on line 391 in oktoberfest/runner.py

View check run for this annotation

Codecov / codecov/patch

oktoberfest/runner.py#L391

Added line #L391 was not covered by tests
elif job_type == "CollisionEnergyCalibration":
run_ce_calibration(config_path)

Check warning on line 393 in oktoberfest/runner.py

View check run for this annotation

Codecov / codecov/patch

oktoberfest/runner.py#L393

Added line #L393 was not covered by tests
elif job_type == "Rescoring":
run_rescoring(config_path)

Check warning on line 395 in oktoberfest/runner.py

View check run for this annotation

Codecov / codecov/patch

oktoberfest/runner.py#L395

Added line #L395 was not covered by tests
else:
raise ValueError(f"Unknown job_type in config: {job_type}")

Check warning on line 397 in oktoberfest/runner.py

View check run for this annotation

Codecov / codecov/patch

oktoberfest/runner.py#L397

Added line #L397 was not covered by tests
finally:
file_handler.close()
base_logger.removeHandler(file_handler)

Check warning on line 400 in oktoberfest/runner.py

View check run for this annotation

Codecov / codecov/patch

oktoberfest/runner.py#L399-L400

Added lines #L399 - L400 were not covered by tests
Loading