Skip to content

Commit

Permalink
Merge branch 'main' into joyce/fix-more-cols
Browse files Browse the repository at this point in the history
  • Loading branch information
joyceyan authored Jan 31, 2025
2 parents f0bc5af + 9c1f6af commit c49aa66
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 6 deletions.
41 changes: 41 additions & 0 deletions cellxgene_schema_cli/tests/test_schema_compliance.py
Original file line number Diff line number Diff line change
Expand Up @@ -3352,3 +3352,44 @@ def test_sex_ontology_term_id__invalid(self, validator_with_roundworm_adata):
"'sex_ontology_term_id' MUST be 'PATO:0000384' for male, 'PATO:0001340' for hermaphrodite, or 'unknown'."
)
assert error_message in validator.errors


class TestMultiSpecies:
"""
Tests to verify our support for human / mouse is not impacted by support for additional species
"""

@pytest.mark.parametrize(
"cell_type_ontology_term_id",
[
"UBERON:0000001", # Wrong ontology
"ZFA:0000003", # Valid for zebrafish, not valid for human or mouse data
"FBbt:00049192", # Valid for fruit fly, not valid for human or mouse data
"WBbt:0008611", # Valid for roundworm, not valid for human or mouse data
"na", # Allowed for other organisms, not allowed if organism is fruit fly
],
)
def test_cell_type_ontology_term_id__invalid(self, validator_with_adata, cell_type_ontology_term_id):
validator = validator_with_adata
obs = validator.adata.obs
obs.loc[obs.index[0], "cell_type_ontology_term_id"] = cell_type_ontology_term_id
validator.validate_adata()
assert len(validator.errors) > 0

@pytest.mark.parametrize(
"tissue_ontology_term_id",
[
"CL:0000001", # Wrong ontology
"ZFA:0001262", # Valid for zebrafish, not valid for human or mouse data
"FBbt:00007337", # Valid for fruit fly, not valid for human or mouse data
"WBbt:0006749", # Valid for roundworm, not valid for human or mouse data
"na",
"unknown",
],
)
def test_tissue_ontology_term_id__invalid(self, validator_with_adata, tissue_ontology_term_id):
validator = validator_with_adata
obs = validator.adata.obs
obs.loc[obs.index[0], "tissue_ontology_term_id"] = tissue_ontology_term_id
validator.validate_adata()
assert len(validator.errors) > 0
12 changes: 6 additions & 6 deletions schema/drafts/5.3.0.md
Original file line number Diff line number Diff line change
Expand Up @@ -259,29 +259,29 @@ ENSEMBL identifiers are required for genes and [External RNA Controls Consortium

The following gene annotation dependencies are *pinned* for this version of the schema. ~~For multi-organism experiments, cells from any Metazoan organism are allowed as long as orthologs from the following organism annotations are used.~~

| Organism | Source | Required version | Download |
| NCBITaxon | Source | Required version | Download |
|:--|:--|:--|:--|
| <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A6239"><code>NCBITaxon:6239</code></a><br>for <i>Caenorhabditis elegans</i> | [ENSEMBL](https://www.ensembl.org/Caenorhabditis_elegans/Info/Index) | WBcel235 (GCA_000002985.3)<br>Ensembl 113 | [Caenorhabditis_elegans.WBcel235.113.gtf](https://ftp.ensembl.org/pub/release-113/gtf/caenorhabditis_elegans/Caenorhabditis_elegans.WBcel235.113.gtf.gz) |
| <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A6239"><code>NCBITaxon:6239</code></a><br>for <i>Caenorhabditis elegans</i> | [ENSEMBL](https://www.ensembl.org/Caenorhabditis_elegans/Info/Index) | WBcel235 (GCA_000002985.3)<br>Ensembl 113 | [Caenorhabditis_elegans.WBcel235.113.gtf](https://ftp.ensembl.org/pub/release-113/gtf/caenorhabditis_elegans/Caenorhabditis_elegans.WBcel235.113.gtf.gz) |
| <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A9483"><code>NCBITaxon:9483</code></a><br>for <i>Callithrix jacchus</i> | [ENSEMBL](https://www.ensembl.org/Callithrix_jacchus/Info/Index) | mCalJac1.pat.X<br>(GCA_011100555.1)<br>Ensembl 113 | [Callithrix_jacchus.mCalJac1.pat.X.113.gtf](https://ftp.ensembl.org/pub/release-113/gtf/callithrix_jacchus/Callithrix_jacchus.mCalJac1.pat.X.113.gtf.gz) |
| <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A7955"><code>NCBITaxon:7955</code></a><br>for <i>Danio rerio</i> | [ENSEMBL](https://www.ensembl.org/Danio_rerio/Info/Index) | GRCz11 (GCA_000002035.4)<br>Ensembl 113 | [Danio_rerio.GRCz11.113.gtf](https://ftp.ensembl.org/pub/release-113/gtf/danio_rerio/Danio_rerio.GRCz11.113.gtf.gz) |
| <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A7227"><code>NCBITaxon:7227</code></a><br>for <i>Drosophila melanogaster</i>| [ENSEMBL](https://www.ensembl.org/Drosophila_melanogaster/Info/Index) | BDGP6.46 (GCA_000001215.4)<br>Ensembl 113 | [Drosophila_melanogaster.BDGP6.46.113.gtf](https://ftp.ensembl.org/pub/release-113/gtf/drosophila_melanogaster/Drosophila_melanogaster.BDGP6.46.113.gtf.gz) |
| <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A9595"><code>NCBITaxon:9595</code></a><br>for <i>Gorilla gorilla gorilla</i> | [ENSEMBL](https://www.ensembl.org/Gorilla_gorilla/Info/Index) | gorGor4<br>(GCA_000151905.3)<br>Ensembl 113 | [Gorilla_gorilla.gorGor4.113.gtf](https://ftp.ensembl.org/pub/release-113/gtf/gorilla_gorilla/Gorilla_gorilla.gorGor4.113.gtf.gz) |
| <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A9606"><code>NCBITaxon:9606</code></a><br>for <i>Homo sapiens</i> | [GENCODE](https://www.gencodegenes.org/human/) | Human reference GRCh38.p14<br>(GENCODE v44/Ensembl 110) | [gencode.v44.primary_assembly.annotation.gtf](https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_44/gencode.v44.primary_assembly.annotation.gtf.gz) |
| <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A10090"><code>NCBITaxon:10090</code></a><br>for <i>Mus musculus</i> | [GENCODE](https://www.gencodegenes.org/mouse/) | Mouse reference GRCm39<br>(GENCODE vM33/Ensembl 110) | [gencode.vM33.primary_assembly.annotation.gtf](https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M33/gencode.vM33.primary_assembly.annotation.gtf.gz) |
| <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A9541"><code>NCBITaxon:9541</code></a><br>for <i>Macaca fascicularis</i> | [ENSEMBL](https://www.ensembl.org/Macaca_fascicularis/Info/Index) | Macaca_fascicularis_6.0<br> (GCA_011100615.1)<br>Ensembl 113 | [Macaca_fascicularis.Macaca_fascicularis_6.0.113.gtf](https://ftp.ensembl.org/pub/release-113/gtf/macaca_fascicularis/Macaca_fascicularis.Macaca_fascicularis_6.0.113.gtf.gz) |
| <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A9544"><code>NCBITaxon:9544</code></a><br>for <i>Macaca mulatta</i> | [ENSEMBL](https://www.ensembl.org/Macaca_mulatta/Info/Index) | Mmul_10<br>(GCA_003339765.3)<br>Ensembl 113 | [Macaca_mulatta.Mmul_10.113.gtf](https://ftp.ensembl.org/pub/release-113/gtf/macaca_mulatta/Macaca_mulatta.Mmul_10.113.gtf.gz) |
| <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A30608"><code>NCBITaxon:30608</code></a><br>for <i>Microcebus murinus</i> | [ENSEMBL](https://www.ensembl.org/Microcebus_murinus/Info/Index) | Mmur_3.0<br>(GCA_000165445.3)<br>Ensembl 113| [Microcebus_murinus.Mmur_3.0.113.gtf](https://ftp.ensembl.org/pub/release-113/gtf/microcebus_murinus/Microcebus_murinus.Mmur_3.0.113.gtf.gz) |
| <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A30608"><code>NCBITaxon:30608</code></a><br>for <i>Microcebus murinus</i> | [ENSEMBL](https://www.ensembl.org/Microcebus_murinus/Info/Index) | Mmur_3.0<br>(GCA_000165445.3)<br>Ensembl 113| [Microcebus_murinus.Mmur_3.0.113.gtf](https://ftp.ensembl.org/pub/release-113/gtf/microcebus_murinus/Microcebus_murinus.Mmur_3.0.113.gtf.gz) |
| <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A10090"><code>NCBITaxon:10090</code></a><br>for <i>Mus musculus</i> | [GENCODE](https://www.gencodegenes.org/mouse/) | Mouse reference GRCm39<br>(GENCODE vM33/Ensembl 110) | [gencode.vM33.primary_assembly.annotation.gtf](https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M33/gencode.vM33.primary_assembly.annotation.gtf.gz) |
| <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A9986"><code>NCBITaxon:9986</code></a><br>for <i>Oryctolagus cuniculus</i> | [ENSEMBL](https://www.ensembl.org/Oryctolagus_cuniculus/Info/Index) | OryCun2.0<br>(GCA_000003625.1)<br>Ensembl 113 | [Oryctolagus_cuniculus.OryCun2.0.113.gtf](https://ftp.ensembl.org/pub/release-113/gtf/oryctolagus_cuniculus/Oryctolagus_cuniculus.OryCun2.0.113.gtf.gz) |
| <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A9598"><code>NCBITaxon:9598</code></a><br>for <i>Pan troglodytes</i> | [ENSEMBL](https://www.ensembl.org/Pan_troglodytes/Info/Index) | Pan_tro_3.0<br>(GCA_000001515.5)<br>Ensembl 113 | [Pan_troglodytes.Pan_tro_3.0.113.gtf](https://ftp.ensembl.org/pub/release-113/gtf/pan_troglodytes/Pan_troglodytes.Pan_tro_3.0.113.gtf.gz) |
| <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A10116"><code>NCBITaxon:10116</code></a><br>for <i>Rattus norvegicus</i> | [ENSEMBL](https://www.ensembl.org/Rattus_norvegicus/Info/Index) | mRatBN7.2<br>(GCA_015227675.2)<br>Ensembl 113| [Rattus_norvegicus.mRatBN7.2.113.gtf](https://ftp.ensembl.org/pub/release-113/gtf/rattus_norvegicus/Rattus_norvegicus.mRatBN7.2.113.gtf.gz) |
| <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A2697049"><code>NCBITaxon:2697049</code></a><br>for <i>SARS-CoV-2</i> | [ENSEMBL](https://covid-19.ensembl.org/index.html) | SARS-CoV-2 reference (ASM985889v3) | [Sars\_cov\_2.ASM985889v3.101.gtf](https://ftp.ensemblgenomes.org/pub/viruses/gtf/sars_cov_2/Sars_cov_2.ASM985889v3.101.gtf.gz) |
| <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A9823"><code>NCBITaxon:9823</code></a><br>for <i>Sus scrofa</i> | [ENSEMBL](https://www.ensembl.org/Sus_scrofa/Info/Index) | Sscrofa11.1<br>(GCA_000003025.6)<br>Ensembl 113 | [Sus_scrofa.Sscrofa11.1.113.gtf](https://ftp.ensembl.org/pub/release-113/gtf/sus_scrofa/Sus_scrofa.Sscrofa11.1.113.gtf.gz) |
| | [ThermoFisher ERCC Spike-Ins] | ThermoFisher ERCC RNA Spike-In Control Mixes (Cat # 4456740, 4456739) | [cms_095047.txt] |
| <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A32630"><code>NCBITaxon:32630</code></a><br>for <i>synthetic construct</i> | [ThermoFisher ERCC<br>Spike-Ins] | ThermoFisher ERCC RNA Spike-In Control Mixes (Cat # 4456740, 4456739) | [cms_095047.txt] |
|||||

[RNA Spike-In Control Mixes]: https://www.thermofisher.com/document-connect/document-connect.html?url=https%3A%2F%2Fassets.thermofisher.com%2FTFS-Assets%2FLSG%2Fmanuals%2Fcms_086340.pdf&title=VXNlciBHdWlkZTogRVJDQyBSTkEgU3Bpa2UtSW4gQ29udHJvbCBNaXhlcyAoRW5nbGlzaCAp

[ThermoFisher ERCC Spike-Ins]: https://www.thermofisher.com/order/catalog/product/4456740#/4456740
[ThermoFisher ERCC<br>Spike-Ins]: https://www.thermofisher.com/order/catalog/product/4456740#/4456740
[cms_095047.txt]: https://assets.thermofisher.com/TFS-Assets/LSG/manuals/cms_095047.txt


Expand Down

0 comments on commit c49aa66

Please sign in to comment.