Skip to content

Commit

Permalink
Merge pull request #3 from zivy/updateUtilities
Browse files Browse the repository at this point in the history
Update utilities
  • Loading branch information
zivy authored Jan 20, 2023
2 parents 670ceb4 + 3045065 commit 5e024e4
Show file tree
Hide file tree
Showing 10 changed files with 312 additions and 46 deletions.
13 changes: 13 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,19 @@ needed. This is equivalent to summarizing all activity on a feature branch versu

## Unreleased

## v0.3.0

### Added
* fluorescent_probes_csv_2_md - script for creating the knowledge-base fluorescent_probes markdown page from the fluorescent_probes.csv.

### Changed
* reagent_resources_csv_2_md_url - In addition to the reagent_resources.csv we now use a template file into which the table is written. Allows us to modify the descriptive text without modifying code. Additionally, the table is sorted on the "Target Name / Protein Biomarker" column.
* update_index_md_stats - Change the computed statistics to:
1. number_of_contributors - count both original contributors and folks that replicated the work.
1. number_of_validated_reagents - count rows in the reagent_resources.csv.
1. number_of_fluorescent_probes - count number of unique entries in conjugate column of the reagent_resources.csv (ignoring NA, Unconjugated, Biotin, HRP, UT014, UT015, UT016, UT019).
1. number_of_tissues - count unique combinations of Target_Species-Target_Tissue-Tissue_State.

## v0.2.0

### Added
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "ibex_imaging_knowledge_base_utilities"
version = "0.2.0"
version = "0.3.0"
authors = [{ name="Ziv Yaniv", email="[email protected]" },
]
description = "Utility scripts used for maintaining the IBEX Imaging Community Knowledge-Base"
Expand All @@ -30,5 +30,6 @@ dependencies = [
[project.scripts]
bib2md = "ibex_imaging_knowledge_base_utilities.bib2md:main"
reagent_resources_csv_2_md_url = "ibex_imaging_knowledge_base_utilities.reagent_resources_csv_2_md_url:main"
fluorescent_probes_csv_2_md = "ibex_imaging_knowledge_base_utilities.fluorescent_probes_csv_2_md:main"
update_index_md_stats = "ibex_imaging_knowledge_base_utilities.update_index_md_stats:main"
validate_zenodo_json = "ibex_imaging_knowledge_base_utilities.validate_zenodo_json:main"
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
# =========================================================================
#
# Copyright Ziv Yaniv
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0.txt
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# =========================================================================

import pandas as pd
import argparse
import sys
from .argparse_types import file_path, dir_path


"""
This script converts the IBEX knowledge-base fluorescent_probes.csv file to markdown.
This script is automatically run when modifications to the fluorescent_probes.csv file is merged
into the main branch of the ibex_knowledge_base repository (see .github/workflows/data2md.yml).
Assumption: The fluorescent_probes.csv file is valid. It conforms to the expected format (empty entries denoted
by the string "NA").
"""


def fluorescent_probe_csv_to_md(template_file_path, csv_file_path, output_dir):
"""
Convert the IBEX knowledge-base fluorescent probe csv file to markdown. Output is written to a
file named fluorescent_probes.md in the output directory. The template_file_path file is expected
to contain the string
{probe_table} which is replaced with the contents of the actual table.
"""
# Read the dataframe and keep entries that are "NA", don't convert to nan
df = pd.read_csv(csv_file_path, dtype=str, keep_default_na=False)
df.sort_values(by=["Excitation Max (nm)", "Emission Max (nm)"], inplace=True)
with open(template_file_path, "r") as fp:
input_md_str = fp.read()
with open(output_dir / "fluorescent_probes.md", "w") as fp:
fp.write(input_md_str.format(probe_table=df.to_markdown(index=False)))


def main(argv=None):
if argv is None: # script was invoked from commandline
argv = sys.argv[1:]
parser = argparse.ArgumentParser(
description="Convert knowledge-base fluorescent probes file from csv to md and sort according to excitation and emission." # noqa E501
)
parser.add_argument(
"md_template_file",
type=file_path,
help='Path to template markdown file which contains the string "{probe_table}".',
)
parser.add_argument(
"csv_file", type=file_path, help="Path to the fluorescent_probes.csv file."
)
parser.add_argument(
"output_dir",
type=dir_path,
help="Path to the output directory (the fluorescent_probes.md file is written to this directory).",
)
args = parser.parse_args(argv)

try:
return fluorescent_probe_csv_to_md(
template_file_path=args.md_template_file,
csv_file_path=args.csv_file,
output_dir=args.output_dir,
)
except Exception as e:
print(
f"{e}",
file=sys.stderr,
)
return 1
return 0


if __name__ == "__main__":
sys.exit(main())
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,6 @@
by the string "NA").
"""

md_header = "<!-- Do NOT edit this file. It is automatically generated from reagents_resources.csv -->\n\n"


def short_circuit_requests_get(url, params=None, **kwargs):
res = requests.Response()
Expand Down Expand Up @@ -161,15 +159,20 @@ def uniprots_to_md(uniprots_str, uniprot_md_str):


def csv_to_md_with_url(
csv_file_path, supporting_material_root_dir, vendor_to_website_json_file_path
template_file_path,
csv_file_path,
supporting_material_root_dir,
vendor_to_website_json_file_path,
):
"""
Convert the IBEX knowledge-base csv file to markdown and add links to the supporting
Convert the IBEX knowledge-base reagent resources csv file to markdown and add links to the supporting
material files. Output is written to a file named markdown.md in the parent directory
of the supporting_material_root_dir.
of the supporting_material_root_dir. The md_template_path file is expected to contain the
string {reagent_resources_table} which is replaced with the contents of the actual table.
"""
# Read the dataframe and keep entries that are "NA", don't convert to nan
df = pd.read_csv(csv_file_path, dtype=str, keep_default_na=False)
df.sort_values(by=["Target Name / Protein Biomarker"], inplace=True)
supporting_material_path = pathlib.PurePath(supporting_material_root_dir).name
if not df.empty:
print("Start linking to supporting material...")
Expand Down Expand Up @@ -210,8 +213,13 @@ def csv_to_md_with_url(
print(f"Vendor ({k}) not found in {vendor_to_website_json_file_path}.")
return 1
print("Finished linking to vendor websites...")

with open(template_file_path, "r") as fp:
input_md_str = fp.read()
with open(supporting_material_root_dir.parent / "reagent_resources.md", "w") as fp:
fp.write("# Reagent Resources\n\n" + md_header + df.to_markdown(index=False))
fp.write(
input_md_str.format(reagent_resources_table=df.to_markdown(index=False))
)
return 0


Expand All @@ -222,18 +230,23 @@ def main(argv=None):
description="Convert knowledge-base reagent resources file from csv to md and add hyperlinks."
)
parser.add_argument(
"csv_file", type=file_path, help="Path to the reagent_resources.csv file."
"md_template_file",
type=file_path,
help='Path to template markdown file which contains the string "{reagent_resources_table}".',
)
parser.add_argument(
"supporting_material_root_dir",
type=dir_path,
help="Path to the directory containing the supporting materials files.",
"csv_file", type=file_path, help="Path to the reagent_resources.csv file."
)
parser.add_argument(
"vendor_to_website",
type=file_path,
help="JSON file containing the mapping between vendor name and website",
)
parser.add_argument(
"supporting_material_root_dir",
type=dir_path,
help="Path to the directory containing the supporting materials files.",
)
parser.add_argument(
"--skip_url_validation",
action="store_true",
Expand All @@ -245,7 +258,10 @@ def main(argv=None):
if args.skip_url_validation:
requests.get = short_circuit_requests_get
return csv_to_md_with_url(
args.csv_file, args.supporting_material_root_dir, args.vendor_to_website
template_file_path=args.md_template_file,
csv_file_path=args.csv_file,
supporting_material_root_dir=args.supporting_material_root_dir,
vendor_to_website_json_file_path=args.vendor_to_website,
)
except Exception as e:
print(
Expand Down
55 changes: 39 additions & 16 deletions src/ibex_imaging_knowledge_base_utilities/update_index_md_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,36 +46,59 @@ def update_index_stats(input_md, input_csv, output_file):
with open(input_md, "r") as fp:
input_md_str = fp.read()
stats_dictionary = compute_stats_dictionary(input_csv)
stats_dictionary[
"do_not_edit_message"
] = "<!-- Do NOT edit this file. It is automatically generated from the reagent_resources.csv file -->\n\n"
with open(output_file, "w") as fp:
fp.write(input_md_str.format(**stats_dictionary))


def entry2list(entry):
"""
Replace a string entry with a If the entry is
nan a null string or "NA" return an empty list.
Otherwise, the string is split using the semicolon as
the separator character, leading and trailing whitespace is
removed from the substrings.
"""
if pd.isna(entry) or entry.strip() == "":
return set()
else:
res_list = [v.strip() for v in entry.split(";") if v.strip() != ""]
res = set(res_list)
if len(res_list) != len(res):
raise ValueError(f"entry with duplicate values - {entry}")
return res


def compute_stats_dictionary(input_csv):
stats_dict = {}
df = pd.read_csv(input_csv, dtype=str, keep_default_na=False)
stats_dict["number_of_contributors"] = df["Contributor"].nunique()
stats_dict["number_of_recommended_antibodies"] = df["Catalog Number"][
df["Recommend"] == "Yes"
].nunique()
stats_dict["number_of_not_recommended_antibodies"] = df["Catalog Number"][
df["Recommend"] == "No"
].nunique()
stats_dict["number_of_fluorophores"] = len(
# Compute number of contributors, both original and folks that
# replicated the validation and either agree or disagree with the
# original contribution. The original contributor added the ORCID
# to the "Agree" column and the "Contributor" column, so no need to
# look at the "Contributor" column.
all_contributions = df["Agree"].tolist() + df["Disagree"].tolist()
all_unique_contributors = set(
[
v.strip()
for x in all_contributions
for v in x.split(";")
if v.strip() != "NA"
]
)
stats_dict["number_of_contributors"] = len(all_unique_contributors)
stats_dict["number_of_validated_reagents"] = len(df)
stats_dict["number_of_fluorescent_probes"] = len(
df["Conjugate"][
~df["Conjugate"].isin(
[
"NA",
"AF594",
"eF615",
"Hoechst",
"JOJO-1",
"Unconjugated",
"PE/Dazzle AF594",
"Biotin",
"HRP",
"UT014",
"UT015",
"UT016",
"UT019",
]
)
].unique()
Expand Down
67 changes: 67 additions & 0 deletions tests/data/fluorescent_probes.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
Fluorescent Probe,Excitation Max (nm),Emission Max (nm),Signal Inactivation Conditions IBEX2D Manual
Hoechst,350,461,Does not bleach
VioGreen,388,520,1 mg/ml LiBH4 15 minutes
VioBlue,400,452,1 mg/ml LiBH4 15 minutes
Spark Violet 538,400,538,1 mg/ml LiBH4 15 minutes
StarBright Violet 670,401,667,Does not bleach within 15 minutes of 1 mg/ml LiBH4 treatment
StarBright Violet 710,402,713,Does not bleach within 15 minutes of 1 mg/ml LiBH4 treatment
BV421,405,421,1 mg/ml LiBH4 15 minutes + Light
eF450,405,450,1 mg/ml LiBH4 15 minutes
BV510,405,510,1 mg/ml LiBH4 15 minutes + Light
BV570,405,570,Does not bleach within 15 minutes of 1 mg/ml LiBH4 treatment
Pacific Blue,410,455,1 mg/ml LiBH4 15 minutes
Cyan Fluorescent Protein (CFP),435,485,Does not bleach
StarBright Blue 700,473,703,Does not bleach within 15 minutes of 1 mg/ml LiBH4 treatment
PerCP-Vio 700,482,704,Not tested
Green Fluorescent Protein (GFP),488,510,Does not bleach
iF488,488,530,1 mg/ml LiBH4 15 minutes
AF488,490,525,1 mg/ml LiBH4 15 minutes
AF488 (Plus),490,525,1 mg/ml LiBH4 15 minutes
FITC,490,525,1 mg/ml LiBH4 30 minutes
CL490,491,515,1 mg/ml LiBH4 15 minutes
Spark Blue 574,506,574,1 mg/ml LiBH4 15 minutes
iF514,511,527,Does not bleach within 15 minutes of 1 mg/ml LiBH4 treatment
Yellow Fluorescent Protein (YFP),513,527,Does not bleach
JOJO-1,530,544,Does not bleach
AF532,532,554,1 mg/ml LiBH4 15 minutes
iF532,537,560,1 mg/ml LiBH4 15 minutes
CL550,550,575,1 mg/ml LiBH4 15 minutes
AF555,555,580,1 mg/ml LiBH4 15 minutes
AF555 (Plus),555,580,1 mg/ml LiBH4 15 minutes
Spark YG 570,555,570,Does not bleach within 15 minutes of 1 mg/ml LiBH4 treatment
Red Fluorescent Protein (RFP),555,584,Does not bleach
AF546,556,573,Does not bleach within 15 minutes of 1 mg/ml LiBH4 treatment
eF570,556,569,1 mg/ml LiBH4 15 minutes
iF555,557,570,1 mg/ml LiBH4 15 minutes
PE,565,578,1 mg/ml LiBH4 15 minutes
RY586,565,586,Not tested
PE/iF594,565,606,1 mg/ml LiBH4 15 minutes
PE/Dazzle AF594,565,610,Does not bleach within 15 minutes of 1 mg/ml LiBH4 treatment
PE-Vio 615,565,619,Does not bleach within 15 minutes of 1 mg/ml LiBH4 treatment
PE-Vio 770,565,775,1 mg/ml LiBH4 15 minutes
AF568,578,603,Does not bleach within 15 minutes of 1 mg/ml LiBH4 treatment
iF594,588,604,1 mg/ml LiBH4 15 minutes
AF594,590,617,Does not bleach within 15 minutes of 1 mg/ml LiBH4 treatment.
CL594,593,614,Does not bleach within 15 minutes of 1 mg/ml LiBH4 treatment
CF594,593,615,Does not bleach within 15 minutes of 1 mg/ml LiBH4 treatment
eF615,595,615,Does not bleach within 15 minutes of 1 mg/ml LiBH4 treatment
Texas Red,596,615,Does not bleach within 15 minutes of 1 mg/ml LiBH4 treatment
AF633,631,650,1 mg/ml LiBH4 15 minutes
eF660,633,669,1 mg/ml LiBH4 15 minutes
AF647,650,665,1 mg/ml LiBH4 15 minutes
AF647 (Plus),650,665,1 mg/ml LiBH4 15 minutes
APC-Vio 770,652,775,Not tested
CL650,655,676,1 mg/ml LiBH4 15 minutes
iF647,656,670,1 mg/ml LiBH4 15 minutes
AF660,662,690,1 mg/ml LiBH4 15 minutes
AF680,679,702,1 mg/ml LiBH4 15 minutes
iF680,684,701,1 mg/ml LiBH4 15 minutes
Spark Red 718,687,718,1 mg/ml LiBH4 15 minutes
Vio Bright R720,695,720,1 mg/ml LiBH4 15 minutes
AF700,702,723,1 mg/ml LiBH4 15 minutes
AF750,749,775,1 mg/ml LiBH4 15 minutes
iF750,757,779,1 mg/ml LiBH4 15 minutes
BL759/780,759,780,1 mg/ml LiBH4 15 minutes
DL755,776,754,1 mg/ml LiBH4 15 minutes
AF790,784,814,1 mg/ml LiBH4 15 minutes
AF800 (Plus),786,790,1 mg/ml LiBH4 15 minutes
10 changes: 10 additions & 0 deletions tests/data/fluorescent_probes.md.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Fluorescent Probes Tested by the IBEX Imaging Community

<!-- Do NOT edit this file. It is automatically generated from the fluorescent_probes.md.in and fluorescent_probes.csv files. -->

Summary of fluorescent probes tested by the IBEX Imaging Community. Inactivation conditions are method specific.

For the original IBEX2D manual method that uses 1 mg/ml of LiBH4. The time, concentration of LiBH4, and method (continuous exchange, bleaching in the presence of light) may vary by user.


{probe_table}
Loading

0 comments on commit 5e024e4

Please sign in to comment.