Skip to content

Commit

Permalink
Merge branch 'main' into 2242-add-inter-class-diagram-generated-by-re…
Browse files Browse the repository at this point in the history
…fscan-to-schema-documentation
  • Loading branch information
eecavanna authored Dec 4, 2024
2 parents 906ca10 + 80c3c02 commit b21cacb
Show file tree
Hide file tree
Showing 45 changed files with 22,721 additions and 1,464 deletions.
31 changes: 24 additions & 7 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -84,19 +84,29 @@ update-linkml:
create-data-harmonizer:
npm init data-harmonizer $(SOURCE_SCHEMA_PATH)

prefixmaps:
@mkdir -p $(DEST)/prefixmap
$(RUN) gen-prefix-map nmdc_schema/nmdc_materialized_patterns.yaml > $(DEST)/prefixmap/nmdc-prefix-map.json

pydantic:
@mkdir -p $(DEST)/pydantic
$(RUN) gen-pydantic nmdc_schema/nmdc_materialized_patterns.yaml > $(DEST)/pydantic/nmdc-pydantic.py

# Note: `all` is an alias for `site`.
all: site
site: clean site-clean gen-project gendoc \
nmdc_schema/gold-to-mixs.sssom.tsv \
nmdc_schema/nmdc_materialized_patterns.schema.json nmdc_schema/nmdc_materialized_patterns.yaml \
migration-doctests
migration-doctests \
prefixmaps \
pydantic

%.yaml: gen-project

# was deploy: all mkd-gh-deploy
deploy: gendoc mkd-gh-deploy

gen-project: $(PYMODEL) # depends on src/schema/mixs.yaml # can be nuked with mixs-yaml-clean
gen-project: $(PYMODEL) prefixmaps pydantic # depends on src/schema/mixs.yaml # can be nuked with mixs-yaml-clean
$(RUN) gen-project \
--exclude excel \
--exclude graphql \
Expand All @@ -112,9 +122,10 @@ gen-project: $(PYMODEL) # depends on src/schema/mixs.yaml # can be nuked with mi
--include python \
--include rdf \
--config-file gen-project-config.yaml \
-d $(DEST) $(SOURCE_SCHEMA_PATH) && mv $(DEST)/*.py $(PYMODEL)
-d $(DEST) $(SOURCE_SCHEMA_PATH) && mv $(DEST)/*.py $(PYMODEL) && cp $(DEST)/pydantic/*.py $(PYMODEL)/nmdc-pydantic.py
cp project/jsonschema/nmdc.schema.json $(PYMODEL)


test: examples-clean site test-python migration-doctests examples/output
only-test: examples-clean test-python migration-doctests examples/output

Expand Down Expand Up @@ -160,11 +171,15 @@ $(DOCDIR):
# Then, use `refgraph` (part of `refscan`) to generate a pair of diagrams within the website's file tree.
# One of the diagrams is a graph showing all the _inter-collection_ relationships the schema says can exist,
# and the other diagram is a graph showing all the _inter-class_ relationships the schema says can exist.
gendoc: $(DOCDIR)
# added copying of images and renaming of TEMP.md
cp $(SRC)/docs/*md $(DOCDIR) ; \
cp -r $(SRC)/docs/images $(DOCDIR) ; \
gendoc: $(DOCDIR) prefixmaps
# Copy all documentation files to the documentation directory
cp -rf $(SRC)/docs/* $(DOCDIR)
# Added copying of images and renaming of TEMP.md
cp $(SRC)/docs/*md $(DOCDIR)
cp -r $(SRC)/docs/images $(DOCDIR)
# Generate documentation using the gen-doc command
$(RUN) gen-doc -d $(DOCDIR) --template-directory $(SRC)/$(TEMPLATEDIR) --include src/schema/deprecated.yaml $(SOURCE_SCHEMA_PATH)
# Create directory for JavaScript files and copy them
mkdir -p $(DOCDIR)/javascripts
$(RUN) cp $(SRC)/scripts/*.js $(DOCDIR)/javascripts/
# Use `refgraph` (part of `refscan`) to generate diagrams within the website's file tree.
Expand Down Expand Up @@ -237,6 +252,8 @@ site-clean: clean


squeaky-clean: clean examples-clean rdf-clean shuttle-clean site-clean # does not include mixs-yaml-clean
rm -rf $(PYMODEL)/nmdc.py
rm -rf $(PYMODEL)/nmdc-pydantic.py
mkdir project
rm -rf local/biosample_slots_ranges_report.tsv

Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<p align="center">
<img src="https://raw.githubusercontent.com/microbiomedata/nmdc-schema/main/images/nmdc_logo_long.jpeg" width="119" height="40"/>
</p>
<div style="text-align: center;">
<img src="https://raw.githubusercontent.com/microbiomedata/nmdc-schema/main/images/nmdc_logo_long.jpeg" width="119" height="40" alt="Long NMDC logo"/>
</div>

# National Microbiome Data Collaborative Schema

Expand Down
1 change: 1 addition & 0 deletions assets/misc/gold_seqMethod_to_nmdc_instrument_set.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ Illumina HiSeq nmdc:inst-14-79zxap02
Illumina HiSeq 2500 nmdc:inst-14-nn4b6k72
Illumina HiSeq 2500-1TB nmdc:inst-14-nn4b6k72
Illumina HiSeq 2500-Rapid nmdc:inst-14-nn4b6k72
Illumina NextSeq nmdc:inst-11-6zvyep02
Illumina NextSeq 550 nmdc:inst-14-xz5tb342
Illumina NovaSeq nmdc:inst-14-xx07be40
Illumina NovaSeq 6000 nmdc:inst-14-mr4r2w09
Expand Down
27 changes: 27 additions & 0 deletions assets/other_mixs_yaml_files/TargetGeneEnum.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
id: http:/example.com/TargetGeneEnum_schema
name: TargetGeneEnum_schema
enums:
TargetGeneEnum:
permissible_values:
16S_rRNA:
description: the small subunit of the bacterial/archean ribosome
aliases:
- 16S rRNA
- 16S ribosomal RNA
narrow_mappings:
- OBI:0002763
23S_rRNA:
aliases:
- 23S rRNA
- 23S ribosomal RNA
description: the large subunit of the bacterial/archean ribosome
18S_rRNA:
aliases:
- 18S rRNA
- 18S ribosomal RNA
description: the small subunit of the eukaryotic ribosome
28S_rRNA:
aliases:
-28S rRNA
- 28S ribosomal RNA
description: the large subunit of the eukaryotic ribosome
27 changes: 26 additions & 1 deletion assets/yq-for-mixs_subset_modified.txt
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,8 @@
'del(.classes)'
'del(.enums.[].name)'
'del(.enums.[].permissible_values.[].text)'
'del(.slots.[].name)'
'del(.slots.[].name)'
'del(.slots.[].domain_of)'
'del(.slots.add_recov_method.pattern)'
'del(.subsets.[].name)'

Expand All @@ -208,6 +209,30 @@
# add "M horizon" to soil_horizon_enum
'.enums.soil_horizon_enum.permissible_values.["M horizon"] = {}'

# replace host_sex_enum permissible values
'del(.enums.host_sex_enum.permissible_values)'
'.enums.host_sex_enum.permissible_values.["female"] = {}'
'.enums.host_sex_enum.permissible_values.["hermaphrodite"] = {}'
'.enums.host_sex_enum.permissible_values.["non-binary"] = {}'
'.enums.host_sex_enum.permissible_values.["male"] = {}'
'.enums.host_sex_enum.permissible_values.["transgender"] = {}'
'.enums.host_sex_enum.permissible_values.["transgender (female to male)"] = {}'
'.enums.host_sex_enum.permissible_values.["transgender (male to female)"] = {}'
'.enums.host_sex_enum.permissible_values.["undeclared"] = {}'

# Fix a few examples so that they are validatable
'del(.slots.env_broad_scale.examples)'
'.slots.env_broad_scale.examples.[0].value = "oceanic epipelagic zone biome [ENVO:01000035]"'

'del(.slots.env_local_scale.examples)'
'.slots.env_local_scale.examples.[0].value = "litter layer [ENVO:01000338]"'

'del(.slots.env_medium.examples)'
'.slots.env_medium.examples.[0].value = "soil [ENVO:00001998]"'

'del(.slots.host_body_product.examples)'
'.slots.host_body_product.examples.[0].value = "mucus [UBERON:0000912]"'

# as of 2024-01-30:
# ValueError: Conflicting URIs (https://raw.githubusercontent.com/microbiomedata/nmdc-schema/main/src/schema/mixs.yaml, https://w3id.org/linkml/types) for item: date
make: *** [Makefile:102: gen-project] Error 1
Expand Down
24 changes: 24 additions & 0 deletions nmdc_schema/migrators/adapters/adapter_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,17 @@ def set_field_of_each_document(
"""
pass

@abstractmethod
def remove_field_from_each_document(
self,
collection_name: str,
field_name: str,
) -> None:
r"""
Removes the specified field from each document in the collection.
"""
pass

@abstractmethod
def do_for_each_document(
self, collection_name: str, action: Callable[[dict], None]
Expand All @@ -126,3 +137,16 @@ def do_for_each_document(
to facilitate iterating over all documents in a collection without actually modifying them.
"""
pass

@abstractmethod
def copy_value_from_field_to_field_in_each_document(
self,
collection_name: str,
source_field_name: str,
destination_field_name: str,
) -> None:
r"""
For each document in the collection that has the source field, copy the value of the source field
into the destination field, creating the destination field if it doesn't already exist.
"""
pass
70 changes: 70 additions & 0 deletions nmdc_schema/migrators/adapters/dictionary_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,37 @@ def set_field_of_each_document(
for document in self._db[collection_name]:
document[field_name] = value

def remove_field_from_each_document(
self,
collection_name: str,
field_name: str,
) -> None:
r"""
Removes the specified field from each document in the collection.
>>> database = {
... "thing_set": [
... {"id": "1", "x": "a"},
... {"id": "2"},
... {"id": "3", "x": None},
... ]
... }
>>> da = DictionaryAdapter(database)
>>> da.remove_field_from_each_document("thing_set", "x")
>>> database["thing_set"][0]
{'id': '1'}
>>> database["thing_set"][1]
{'id': '2'}
>>> database["thing_set"][2]
{'id': '3'}
"""

# Iterate over every document in the collection, if the collection exists.
if collection_name in self._db:
for document in self._db[collection_name]:
if field_name in document:
del document[field_name]

def do_for_each_document(
self, collection_name: str, action: Callable[[dict], None]
) -> None:
Expand Down Expand Up @@ -364,3 +395,42 @@ def do_for_each_document(
if collection_name in self._db:
for document in self._db[collection_name]:
action(document)

def copy_value_from_field_to_field_in_each_document(
self,
collection_name: str,
source_field_name: str,
destination_field_name: str,
) -> None:
r"""
For each document in the collection that has the source field, copy the value of the source field
into the destination field, creating the destination field if it doesn't already exist.
>>> database = {
... "thing_set": [
... {"id": "1", "color": "blue"},
... {"id": "2", "color": None},
... {"id": "3"},
... {"id": "4", "color": "blue", "hue": "yellow"},
... {"id": "5", "color": "blue", "hue": None},
... ]
... }
>>> da = DictionaryAdapter(database)
>>> da.copy_value_from_field_to_field_in_each_document("thing_set", "color", "hue")
>>> database["thing_set"][0] # source field exists and is not empty
{'id': '1', 'color': 'blue', 'hue': 'blue'}
>>> database["thing_set"][1] # source field is empty
{'id': '2', 'color': None, 'hue': None}
>>> database["thing_set"][2] # source field does not exist
{'id': '3'}
>>> database["thing_set"][3] # destination field exists and is not empty
{'id': '4', 'color': 'blue', 'hue': 'blue'}
>>> database["thing_set"][4] # destination field exists and is empty
{'id': '5', 'color': 'blue', 'hue': 'blue'}
"""

# Iterate over every document in the collection, if the collection exists.
if collection_name in self._db:
for document in self._db[collection_name]:
if source_field_name in document:
document[destination_field_name] = document[source_field_name]
41 changes: 41 additions & 0 deletions nmdc_schema/migrators/adapters/mongo_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,23 @@ def set_field_of_each_document(
collection = self._db.get_collection(name=collection_name)
collection.update_many({}, {"$set": {field_name: value}})

def remove_field_from_each_document(
self,
collection_name: str,
field_name: str,
) -> None:
r"""
Removes the specified field from each document in the collection.
References:
- https://www.mongodb.com/docs/manual/reference/operator/update/unset/
"""

# Iterate over every document in the collection, if the collection exists.
if collection_name in self._db.list_collection_names():
collection = self._db.get_collection(name=collection_name)
collection.update_many({}, {"$unset": {field_name: 0}}) # value is arbitrary (e.g. 0)

def do_for_each_document(
self, collection_name: str, action: Callable[[dict], None]
) -> None:
Expand All @@ -212,3 +229,27 @@ def do_for_each_document(
collection = self._db.get_collection(name=collection_name)
for document in collection.find():
action(document)

def copy_value_from_field_to_field_in_each_document(
self,
collection_name: str,
source_field_name: str,
destination_field_name: str,
) -> None:
r"""
For each document in the collection that has the source field, copy the value of the source field
into the destination field, creating the destination field if it doesn't already exist.
References:
- https://www.mongodb.com/docs/manual/reference/method/db.collection.updateMany
- https://www.mongodb.com/docs/manual/reference/operator/update/set/
- https://pymongo.readthedocs.io/en/stable/api/pymongo/collection.html#pymongo.collection.Collection.update_many
"""

# Update every document in the collection, if the collection exists.
if collection_name in self._db.list_collection_names():
collection = self._db.get_collection(name=collection_name)
collection.update_many(
{source_field_name: {"$exists": True}},
[{"$set": {destination_field_name: f"${source_field_name}"}}]
)
Loading

0 comments on commit b21cacb

Please sign in to comment.