Skip to content

Commit

Permalink
Merge pull request #4 from zivy/updateReagentResourcesScript
Browse files Browse the repository at this point in the history
Updating the reagent resources table creation code.
  • Loading branch information
zivy authored Feb 2, 2023
2 parents 5e024e4 + 097d088 commit 049e3f9
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 14 deletions.
8 changes: 7 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,18 @@ Each release should describe the changes using the following subsection types:
When working on the package, add information under the "Unreleased" heading. In this manner the release notes are
created incrementally, and do not require a concerted effort prior to a release.

Using a manual approach to create the release notes instead of automatically deriving them from the
Using a manual approach to create the release notes instead of automatically deriving them from the
commits allows us to provide a high level description of the features and issues, yet provide details when those are
needed. This is equivalent to summarizing all activity on a feature branch versus reporting all commits on that branch.

## Unreleased

## v0.3.2

### Changed
* reagent_resources_csv_2_md_url - Update the automatic path to supporting material creation. The paths cannot include parentheses, so thos are replaced with underscores. Additionally, the insertion of the table into the input markdown file is done via the string `replace` and not the `format` method because the use of `format` precludes the presence of curly braces in the template file. We now need to use curly braces in the input markdown file so that the table has an id value when the markdown is converted to html.


## v0.3.0

### Added
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "ibex_imaging_knowledge_base_utilities"
version = "0.3.0"
version = "0.3.2"
authors = [{ name="Ziv Yaniv", email="[email protected]" },
]
description = "Utility scripts used for maintaining the IBEX Imaging Community Knowledge-Base"
Expand Down
10 changes: 10 additions & 0 deletions src/ibex_imaging_knowledge_base_utilities/argparse_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,16 @@
# definitions of argparse types, enables argparse to validate the command line parameters


def file_path_endswith_md_in(path):
p = pathlib.Path(path)
if p.is_file() and path.endswith(".md.in"):
return p
else:
raise argparse.ArgumentTypeError(
f'Invalid argument ({path}), not a file path, file does not exist, or path does not end with ".md.in".'
)


def file_path(path):
p = pathlib.Path(path)
if p.is_file():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import argparse
import sys
import pathlib
from .argparse_types import file_path, dir_path
from .argparse_types import file_path, file_path_endswith_md_in, dir_path
import requests
import json
from itertools import chain
Expand Down Expand Up @@ -71,9 +71,9 @@ def short_circuit_requests_get(url, params=None, **kwargs):

def json_to_md_str_dict(json_file_path):
with open(json_file_path) as fp:
json_dict = json.load(fp)
data_dict = json.load(fp)
md_str_dict = {}
for raw_text, url_target in json_dict.items():
for raw_text, url_target in data_dict.items():
try:
res = requests.get(
url_target,
Expand All @@ -95,6 +95,13 @@ def json_to_md_str_dict(json_file_path):
return md_str_dict


def replace_char_list(input_str, change_chars_list, replacement_char):
for c in change_chars_list:
if c in input_str:
input_str = input_str.replace(c, replacement_char)
return input_str


def data_to_md_str(data, supporting_material_root_dir):
"""
The data parameter is a series with three entries:
Expand All @@ -110,11 +117,19 @@ def data_to_md_str(data, supporting_material_root_dir):
urls_str = ""
txt = [v.strip() for v in data[0].split(";") if v.strip() != ""]
for v in txt[0:-1]:
# Replace all spaces and slashes with underscores so that the path
# matches the expected supporting material path
tc_subpath = f"{data[1]}_{data[2]}".replace(" ", "_").replace("/", "_")
# Replace spaces, slashes and brackets with underscores assume that the
# file exists, data validation happens prior to conversion of data to markdown.
tc_subpath = replace_char_list(
input_str=f"{data[1]}_{data[2]}",
change_chars_list=[" ", "\t", "/", "\\", "{", "}", "[", "]", "(", ")"],
replacement_char="_",
)
urls_str += f"[{v}]({supporting_material_root_dir}/{tc_subpath}/{v}.md), "
tc_subpath = f"{data[1]}_{data[2]}".replace(" ", "_").replace("/", "_")
tc_subpath = replace_char_list(
input_str=f"{data[1]}_{data[2]}",
change_chars_list=[" ", "\t", "/", "\\", "{", "}", "[", "]", "(", ")"],
replacement_char="_",
)
urls_str += (
f"[{txt[-1]}]({supporting_material_root_dir}/{tc_subpath}/{txt[-1]}.md)"
)
Expand Down Expand Up @@ -172,7 +187,12 @@ def csv_to_md_with_url(
"""
# Read the dataframe and keep entries that are "NA", don't convert to nan
df = pd.read_csv(csv_file_path, dtype=str, keep_default_na=False)
df.sort_values(by=["Target Name / Protein Biomarker"], inplace=True)
# Sort dataframe according to target, ignoring case.
df.sort_values(
by=["Target Name / Protein Biomarker"],
inplace=True,
key=lambda x: x.str.lower(),
)
supporting_material_path = pathlib.PurePath(supporting_material_root_dir).name
if not df.empty:
print("Start linking to supporting material...")
Expand Down Expand Up @@ -216,9 +236,11 @@ def csv_to_md_with_url(

with open(template_file_path, "r") as fp:
input_md_str = fp.read()
with open(supporting_material_root_dir.parent / "reagent_resources.md", "w") as fp:
with open(supporting_material_root_dir.parent / template_file_path.stem, "w") as fp:
fp.write(
input_md_str.format(reagent_resources_table=df.to_markdown(index=False))
input_md_str.replace(
"{reagent_resources_table}", df.to_markdown(index=False)
)
)
return 0

Expand All @@ -231,7 +253,7 @@ def main(argv=None):
)
parser.add_argument(
"md_template_file",
type=file_path,
type=file_path_endswith_md_in,
help='Path to template markdown file which contains the string "{reagent_resources_table}".',
)
parser.add_argument(
Expand Down
2 changes: 1 addition & 1 deletion tests/test_scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def test_fluorescent_probe_csv_to_md(
class TestBib2MD(BaseTest):
@pytest.mark.parametrize(
"bib_file_name, csl_file_name, result_md5hash",
[("publications.bib", "ibex.csl", "61f01467fe88de1f686afcbbd4abaed7")],
[("publications.bib", "ibex.csl", "b95a58740183fb04079027610e3d06c1")],
)
def test_bib_2_md(self, bib_file_name, csl_file_name, result_md5hash, tmp_path):
# Write the output using the tmp_path fixture
Expand Down

0 comments on commit 049e3f9

Please sign in to comment.