diff --git a/.cookietemple.yml b/.cookietemple.yml index 7dfbf33..e31dc4d 100644 --- a/.cookietemple.yml +++ b/.cookietemple.yml @@ -15,5 +15,5 @@ full_name: Mario Picciani email: mario.picciani@tum.de project_name: spectrum_io project_short_description: IO related functionalities for oktoberfest. -version: 0.6.0 +version: 0.6.1 license: MIT diff --git a/.github/release-drafter.yml b/.github/release-drafter.yml index 8f8af9b..96aaae2 100644 --- a/.github/release-drafter.yml +++ b/.github/release-drafter.yml @@ -1,5 +1,5 @@ -name-template: "0.6.0 🌈" # <> -tag-template: 0.6.0 # <> +name-template: "0.6.1 🌈" # <> +tag-template: 0.6.1 # <> exclude-labels: - "skip-changelog" diff --git a/cookietemple.cfg b/cookietemple.cfg index bd1dee7..df3da30 100644 --- a/cookietemple.cfg +++ b/cookietemple.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.6.0 +current_version = 0.6.1 [bumpversion_files_whitelisted] init_file = spectrum_io/__init__.py diff --git a/docs/conf.py b/docs/conf.py index a4f6cae..8a6ddd6 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -52,9 +52,9 @@ # the built documents. # # The short X.Y version. -version = "0.6.0" +version = "0.6.1" # The full version, including alpha/beta/rc tags. -release = "0.6.0" +release = "0.6.1" # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/pyproject.toml b/pyproject.toml index 4e5d056..d3dd879 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "spectrum_io" -version = "0.6.0" # <> +version = "0.6.1" # <> description = "IO related functionalities for oktoberfest." authors = ["Wilhelmlab at Technical University of Munich"] license = "MIT" diff --git a/spectrum_io/__init__.py b/spectrum_io/__init__.py index 0d10c7f..8a31418 100644 --- a/spectrum_io/__init__.py +++ b/spectrum_io/__init__.py @@ -5,7 +5,7 @@ __author__ = """The Oktoberfest development team (Wilhelmlab at Technical University of Munich)""" __copyright__ = f"Copyright {datetime.now():%Y}, Wilhelmlab at Technical University of Munich" __license__ = "MIT" -__version__ = "0.6.0" +__version__ = "0.6.1" import logging import logging.handlers diff --git a/spectrum_io/__main__.py b/spectrum_io/__main__.py index ecf8132..5d5fecc 100644 --- a/spectrum_io/__main__.py +++ b/spectrum_io/__main__.py @@ -5,7 +5,7 @@ @click.command() -@click.version_option(version="0.6.0", message=click.style("spectrum_io Version: 0.6.0")) +@click.version_option(version="0.6.1", message=click.style("spectrum_io Version: 0.6.1")) def main() -> None: """spectrum_io.""" diff --git a/spectrum_io/search_result/maxquant.py b/spectrum_io/search_result/maxquant.py index 709746a..10ed7e3 100644 --- a/spectrum_io/search_result/maxquant.py +++ b/spectrum_io/search_result/maxquant.py @@ -71,7 +71,21 @@ def read_result( parsed_mods["^_"] = f"_{unimod_tag}-" logger.info("Reading msms.txt file") - self.results = pd.read_csv(self.path / "msms.txt", sep="\t") + self.results = pd.read_csv( + self.path / "msms.txt", + usecols=[ + "Raw file", + "Scan number", + "Modified sequence", + "Charge", + "Scan event number", + "Mass", # = Calculated Precursor mass; TODO get column with experimental Precursor mass instead + "Score", + "Reverse", + "Proteins", + ], + sep="\t", + ) logger.info("Finished reading msms.txt file") diff --git a/spectrum_io/search_result/search_results.py b/spectrum_io/search_result/search_results.py index 6377bc2..5cda972 100644 --- a/spectrum_io/search_result/search_results.py +++ b/spectrum_io/search_result/search_results.py @@ -11,6 +11,20 @@ logger = logging.getLogger(__name__) +COLUMNS = [ + "RAW_FILE", + "SCAN_NUMBER", + "MODIFIED_SEQUENCE", + "PRECURSOR_CHARGE", + "MASS", + "SCORE", + "REVERSE", + "SEQUENCE", + "PEPTIDE_LENGTH", + "PROTEINS", +] + + def filter_valid_prosit_sequences(df: pd.DataFrame) -> pd.DataFrame: """ Filter valid Prosit sequences. @@ -24,7 +38,6 @@ def filter_valid_prosit_sequences(df: pd.DataFrame) -> pd.DataFrame: # remove unsupported mods to exclude supported_pattern = re.compile(r"^(?:\[UNIMOD:\d+\]\-)?(?:[ACDEFGHIKLMNPQRSTVWY]+(?:\[UNIMOD:\d+\])?)*$") df = df[df["MODIFIED_SEQUENCE"].str.match(supported_pattern)] - # remove precursor charges greater than 6 df = df[df["PRECURSOR_CHARGE"] <= 6] logger.info(f"#sequences after filtering for valid prosit sequences: {len(df.index)}") @@ -122,8 +135,8 @@ def generate_internal( """ if out_path is None: # convert and return - return self.read_result(tmt_label, custom_mods=custom_mods) - + filtered_df = self.read_result(tmt_label, custom_mods=custom_mods) + return filtered_df[COLUMNS] if isinstance(out_path, str): out_path = Path(out_path) @@ -134,7 +147,7 @@ def generate_internal( return csv.read_file(out_path) # convert, save and return - df = self.read_result(tmt_label, custom_mods=custom_mods) + df = self.read_result(tmt_label, custom_mods=custom_mods)[COLUMNS] csv.write_file(df, out_path) return df