diff --git a/CHANGELOG.md b/CHANGELOG.md index bdc3b6f..87454d3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ needed. This is equivalent to summarizing all activity on a feature branch versu ## Unreleased ### Added +* protocols_csv_2_md - Utility script which converts the protocols.csv data to the markdown file used by the site. * csv_2_supporting - Utility script which enables batch creation of supporting material files from a csv file with similar structure to the reagent_resources.csv plus two additional columns "Publications" and "Notes". ## v0.4.1 diff --git a/pyproject.toml b/pyproject.toml index 7416ad5..95da371 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ name = "ibex_imaging_knowledge_base_utilities" version = "0.5.0" authors = [{ name="Ziv Yaniv", email="zivyaniv@nih.gov" }, ] -description = "Utility scripts used for maintaining the IBEX Imaging Community Knowledge-Base" +description = "Utility scripts used for managing the IBEX Imaging Community Knowledge-Base" readme = "README.md" requires-python = ">=3.10.0" classifiers = [ @@ -34,5 +34,6 @@ fluorescent_probes_csv_2_md = "ibex_imaging_knowledge_base_utilities.fluorescent update_index_md_stats = "ibex_imaging_knowledge_base_utilities.update_index_md_stats:main" data_dict_glossary_2_contrib_md = "ibex_imaging_knowledge_base_utilities.datadict_glossary_2_contrib_md:main" zenodo_json_2_thewho_md = "ibex_imaging_knowledge_base_utilities.zenodo_json_2_thewho_md:main" +protocols_csv_2_md = "ibex_imaging_knowledge_base_utilities.protocols_csv_2_md:main" csv2supporting = "ibex_imaging_knowledge_base_utilities.csv_2_supporting:main" validate_zenodo_json = "ibex_imaging_knowledge_base_utilities.validate_zenodo_json:main" diff --git a/src/ibex_imaging_knowledge_base_utilities/protocols_csv_2_md.py b/src/ibex_imaging_knowledge_base_utilities/protocols_csv_2_md.py new file mode 100644 index 0000000..4559f3c --- /dev/null +++ b/src/ibex_imaging_knowledge_base_utilities/protocols_csv_2_md.py @@ -0,0 +1,111 @@ +# ========================================================================= +# +# Copyright Ziv Yaniv +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0.txt +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ========================================================================= + +import pandas as pd +import argparse +import sys +from .argparse_types import file_path, file_path_endswith_md_in, dir_path + + +""" +This script converts the IBEX knowledge-base protocols.csv file to markdown. + +This script is automatically run when modifications to the protocols.csv file are merged +into the main branch of the ibex_knowledge_base repository (see .github/workflows/data2md.yml). + +Assumption: The protocols.csv file is valid. It conforms to the expected format + (three columns titled: Title,URL,Details). +""" + + +def _description_2_md(description): + num_words = 3 # number of words in the short description + num_spaces = 0 + for i, c in enumerate(description): + if c == " ": + num_spaces = num_spaces + 1 + if num_spaces == num_words: + break + return f"
{description[0:i]}...

{description}

" + + +def protocols_csv_to_md(template_file_path, csv_file_path, output_dir): + """ + Convert the IBEX knowledge-base protocols csv file to markdown. Output is written to the given output + directory using the given template file name. The template_file_path file is expected + to contain the string {protocols_table} which is replaced with the contents of the actual table. + """ + # Read the dataframe and keep entries that are "NA", don't convert to nan + df = pd.read_csv(csv_file_path, dtype=str, keep_default_na=False) + # Add the hyperlink to the title column, using the string "detailed protocol" + df["Title"] = df[["Title", "URL"]].apply( + lambda x: f"{x[0]} [[detailed protocol]({x[1]})].", axis=1 + ) + # Convert the detailed description to markdown, with the details html markup (accordion view). + df["Details"] = df["Details"].apply(_description_2_md) + + with open(template_file_path, "r") as fp: + input_md_str = fp.read() + with open(output_dir / template_file_path.stem, "w") as fp: + fp.write( + input_md_str.format( + protocols_table=df[["Title", "Details"]].to_markdown( + index=False, colalign=["left", "left"] + ) + ) + ) + + +def main(argv=None): + if argv is None: # script was invoked from commandline + argv = sys.argv[1:] + parser = argparse.ArgumentParser( + description="Convert knowledge-base protocols file from csv to md and link using URL." + ) + parser.add_argument( + "md_template_file", + type=file_path_endswith_md_in, + help='Path to template markdown file which contains the string "{protocol_table}".', + ) + parser.add_argument( + "csv_file", type=file_path, help="Path to the protocols.csv file." + ) + parser.add_argument( + "output_dir", + type=dir_path, + help="Path to the output directory (the protocols.md file is written to this directory).", + ) + args = parser.parse_args(argv) + + try: + return protocols_csv_to_md( + template_file_path=args.md_template_file, + csv_file_path=args.csv_file, + output_dir=args.output_dir, + ) + except Exception as e: + print( + f"{e}", + file=sys.stderr, + ) + return 1 + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tests/data/protocols.csv b/tests/data/protocols.csv new file mode 100644 index 0000000..ba7d640 --- /dev/null +++ b/tests/data/protocols.csv @@ -0,0 +1,2 @@ +Title,URL,Details +IBEX: an iterative immunolabeling and chemical bleaching method for high-content imaging of diverse tissues,https://doi.org/10.1038/s41596-021-00644-9,"High-content imaging is needed to catalog the variety of cellular phenotypes and multicellular ecosystems present in metazoan tissues. We recently developed iterative bleaching extends multiplexity (IBEX), an iterative immunolabeling and chemical bleaching method that enables multiplexed imaging (>65 parameters) in diverse tissues, including human organs relevant for international consortia efforts. IBEX is compatible with >250 commercially available antibodies and 16 unique fluorophores, and can be easily adopted to different imaging platforms using slides and nonproprietary imaging chambers. The overall protocol consists of iterative cycles of antibody labeling, imaging and chemical bleaching that can be completed at relatively low cost in 2-5 d by biologists with basic laboratory skills. To support widespread adoption, we provide extensive details on tissue processing, curated lists of validated antibodies and tissue-specific panels for multiplex imaging. Furthermore, instructions are included on how to automate the method using competitively priced instruments and reagents. Finally, we present a software solution for image alignment that can be executed by individuals without programming experience using open-source software and freeware. In summary, IBEX is a noncommercial method that can be readily implemented by academic laboratories and scaled to achieve high-content mapping of diverse tissues in support of a Human Reference Atlas or other such applications." \ No newline at end of file diff --git a/tests/data/protocols.md.in b/tests/data/protocols.md.in new file mode 100644 index 0000000..ebfe419 --- /dev/null +++ b/tests/data/protocols.md.in @@ -0,0 +1,13 @@ +--- +layout: default +--- + +# Protocols + + + +As part of the IBEX imaging community's commitment to excellence we strive to make research easily reproducible. We encourage the publication of detailed protocols covering all aspects of the experimental work, from hardware construction, sample preparation, imaging settings to computational analysis of the final images. Ideally, all protocols would be hosted on a single platform (e.g. [protocols.io](https://www.protocols.io/)). + +While that is the ideal, in practice we know there are multiple protocol repositories and we do not want to limit your choices. This page serves as a central location, referencing protocols across the various hosting platforms. + +{protocols_table} diff --git a/tests/test_scripts.py b/tests/test_scripts.py index 7be9644..c989331 100644 --- a/tests/test_scripts.py +++ b/tests/test_scripts.py @@ -21,6 +21,9 @@ from ibex_imaging_knowledge_base_utilities.csv_2_supporting import ( csv_2_supporting, ) +from ibex_imaging_knowledge_base_utilities.protocols_csv_2_md import ( + protocols_csv_to_md, +) from ibex_imaging_knowledge_base_utilities.validate_zenodo_json import ( validate_zenodo_json, ) @@ -252,3 +255,29 @@ def test_csv_to_supporting( self.files_md5([output_dir / file_path for file_path in output_file_paths]) == result_md5hash ) + + +class TestProtocolsCSV2MD(BaseTest): + @pytest.mark.parametrize( + "md_template_file_name, csv_file_name, result_md5hash", + [ + ( + "protocols.md.in", + "protocols.csv", + "b48e11fb1917376b9998c14608bfef7d", + ) + ], + ) + def test_protocols_csv_to_md( + self, md_template_file_name, csv_file_name, result_md5hash, tmp_path + ): + output_dir = tmp_path + protocols_csv_to_md( + template_file_path=self.data_path / md_template_file_name, + csv_file_path=self.data_path / csv_file_name, + output_dir=output_dir, + ) + assert ( + self.files_md5([output_dir / pathlib.Path(md_template_file_name).stem]) + == result_md5hash + )