Merge pull request #4 from zivy/updateReagentResourcesScript

Updating the reagent resources table creation code.
IBEXImagingCommunity · Feb 2, 2023 · 049e3f9 · 049e3f9
2 parents 5e024e4 + 097d088
commit 049e3f9
Show file tree

Hide file tree

Showing 5 changed files with 52 additions and 14 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -15,12 +15,18 @@ Each release should describe the changes using the following subsection types:
 When working on the package, add information under the "Unreleased" heading. In this manner the release notes are
 created incrementally, and do not require a concerted effort prior to a release.
 
-Using a manual approach to create the release notes instead of automatically deriving them from the 
+Using a manual approach to create the release notes instead of automatically deriving them from the
 commits allows us to provide a high level description of the features and issues, yet provide details when those are
 needed. This is equivalent to summarizing all activity on a feature branch versus reporting all commits on that branch.
 
 ## Unreleased
 
+## v0.3.2
+
+### Changed
+* reagent_resources_csv_2_md_url - Update the automatic path to supporting material creation. The paths cannot include parentheses, so thos are replaced with underscores. Additionally, the insertion of the table into the input markdown file is done via the string `replace` and not the `format` method because the use of `format` precludes the presence of curly braces in the template file. We now need to use curly braces in the input markdown file so that the table has an id value when the markdown is converted to html.
+
+
 ## v0.3.0
 
 ### Added

diff --git a/pyproject.toml b/pyproject.toml
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "ibex_imaging_knowledge_base_utilities"
-version = "0.3.0"
+version = "0.3.2"
 authors = [{ name="Ziv Yaniv", email="[email protected]" },
 ]
 description = "Utility scripts used for maintaining the IBEX Imaging Community Knowledge-Base"

diff --git a/src/ibex_imaging_knowledge_base_utilities/argparse_types.py b/src/ibex_imaging_knowledge_base_utilities/argparse_types.py
@@ -22,6 +22,16 @@
 # definitions of argparse types, enables argparse to validate the command line parameters
 
 
+def file_path_endswith_md_in(path):
+    p = pathlib.Path(path)
+    if p.is_file() and path.endswith(".md.in"):
+        return p
+    else:
+        raise argparse.ArgumentTypeError(
+            f'Invalid argument ({path}), not a file path, file does not exist, or path does not end with ".md.in".'
+        )
+
+
 def file_path(path):
     p = pathlib.Path(path)
     if p.is_file():

diff --git a/src/ibex_imaging_knowledge_base_utilities/reagent_resources_csv_2_md_url.py b/src/ibex_imaging_knowledge_base_utilities/reagent_resources_csv_2_md_url.py
@@ -20,7 +20,7 @@
 import argparse
 import sys
 import pathlib
-from .argparse_types import file_path, dir_path
+from .argparse_types import file_path, file_path_endswith_md_in, dir_path
 import requests
 import json
 from itertools import chain
@@ -71,9 +71,9 @@ def short_circuit_requests_get(url, params=None, **kwargs):
 
 def json_to_md_str_dict(json_file_path):
     with open(json_file_path) as fp:
-        json_dict = json.load(fp)
+        data_dict = json.load(fp)
     md_str_dict = {}
-    for raw_text, url_target in json_dict.items():
+    for raw_text, url_target in data_dict.items():
         try:
             res = requests.get(
                 url_target,
@@ -95,6 +95,13 @@ def json_to_md_str_dict(json_file_path):
     return md_str_dict
 
 
+def replace_char_list(input_str, change_chars_list, replacement_char):
+    for c in change_chars_list:
+        if c in input_str:
+            input_str = input_str.replace(c, replacement_char)
+    return input_str
+
+
 def data_to_md_str(data, supporting_material_root_dir):
     """
     The data parameter is a series with three entries:
@@ -110,11 +117,19 @@ def data_to_md_str(data, supporting_material_root_dir):
         urls_str = ""
         txt = [v.strip() for v in data[0].split(";") if v.strip() != ""]
         for v in txt[0:-1]:
-            # Replace all spaces and slashes with underscores so that the path
-            # matches the expected supporting material path
-            tc_subpath = f"{data[1]}_{data[2]}".replace(" ", "_").replace("/", "_")
+            # Replace spaces, slashes and brackets with underscores assume that the
+            # file exists, data validation happens prior to conversion of data to markdown.
+            tc_subpath = replace_char_list(
+                input_str=f"{data[1]}_{data[2]}",
+                change_chars_list=[" ", "\t", "/", "\\", "{", "}", "[", "]", "(", ")"],
+                replacement_char="_",
+            )
             urls_str += f"[{v}]({supporting_material_root_dir}/{tc_subpath}/{v}.md), "
-        tc_subpath = f"{data[1]}_{data[2]}".replace(" ", "_").replace("/", "_")
+        tc_subpath = replace_char_list(
+            input_str=f"{data[1]}_{data[2]}",
+            change_chars_list=[" ", "\t", "/", "\\", "{", "}", "[", "]", "(", ")"],
+            replacement_char="_",
+        )
         urls_str += (
             f"[{txt[-1]}]({supporting_material_root_dir}/{tc_subpath}/{txt[-1]}.md)"
         )
@@ -172,7 +187,12 @@ def csv_to_md_with_url(
     """
     # Read the dataframe and keep entries that are "NA", don't convert to nan
     df = pd.read_csv(csv_file_path, dtype=str, keep_default_na=False)
-    df.sort_values(by=["Target Name / Protein Biomarker"], inplace=True)
+    # Sort dataframe according to target, ignoring case.
+    df.sort_values(
+        by=["Target Name / Protein Biomarker"],
+        inplace=True,
+        key=lambda x: x.str.lower(),
+    )
     supporting_material_path = pathlib.PurePath(supporting_material_root_dir).name
     if not df.empty:
         print("Start linking to supporting material...")
@@ -216,9 +236,11 @@ def csv_to_md_with_url(
 
     with open(template_file_path, "r") as fp:
         input_md_str = fp.read()
-    with open(supporting_material_root_dir.parent / "reagent_resources.md", "w") as fp:
+    with open(supporting_material_root_dir.parent / template_file_path.stem, "w") as fp:
         fp.write(
-            input_md_str.format(reagent_resources_table=df.to_markdown(index=False))
+            input_md_str.replace(
+                "{reagent_resources_table}", df.to_markdown(index=False)
+            )
         )
     return 0
 
@@ -231,7 +253,7 @@ def main(argv=None):
     )
     parser.add_argument(
         "md_template_file",
-        type=file_path,
+        type=file_path_endswith_md_in,
         help='Path to template markdown file which contains the string "{reagent_resources_table}".',
     )
     parser.add_argument(

diff --git a/tests/test_scripts.py b/tests/test_scripts.py
@@ -94,7 +94,7 @@ def test_fluorescent_probe_csv_to_md(
 class TestBib2MD(BaseTest):
     @pytest.mark.parametrize(
         "bib_file_name, csl_file_name, result_md5hash",
-        [("publications.bib", "ibex.csl", "61f01467fe88de1f686afcbbd4abaed7")],
+        [("publications.bib", "ibex.csl", "b95a58740183fb04079027610e3d06c1")],
     )
     def test_bib_2_md(self, bib_file_name, csl_file_name, result_md5hash, tmp_path):
         # Write the output using the tmp_path fixture