diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml new file mode 100644 index 00000000..f87e1328 --- /dev/null +++ b/.github/workflows/deploy.yaml @@ -0,0 +1,58 @@ +name: Deploy Jekyll site to Pages + +on: + # Allow the workflow to be triggered manually. In particular, this allows it to be triggered + # from a workflow in the go-site repository. + workflow_dispatch: + push: + branches: ["master"] + +# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages +permissions: + contents: read + pages: write + id-token: write + +# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. +# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. +concurrency: + group: "pages" + cancel-in-progress: false + +jobs: + # Build job + build: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Setup Ruby + uses: ruby/setup-ruby@v1 + with: + ruby-version: '2.7' + bundler-cache: true # runs 'bundle install' and caches installed gems automatically + - name: Setup Pages + id: pages + uses: actions/configure-pages@v4 + - name: Fetch GO_REFs + run: make _data/gorefs.yaml + - name: Build with Jekyll + # Outputs to the './_site' directory by default + run: bundle exec jekyll build --baseurl "${{ steps.pages.outputs.base_path }}" + env: + JEKYLL_ENV: production + - name: Upload artifact + # Automatically uploads an artifact from the './_site' directory by default + uses: actions/upload-pages-artifact@v3 + + # Deployment job + deploy: + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + runs-on: ubuntu-latest + needs: build + steps: + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 diff --git a/.gitignore b/.gitignore index 3e39da0d..f50b6199 100644 --- a/.gitignore +++ b/.gitignore @@ -30,6 +30,10 @@ tramp .org-id-locations *_archive +## Other IDEs +.vscode +.idea + ### ### From upstream jekyll theme. ### @@ -42,3 +46,6 @@ _site vendor/bundle _algolia_api_key + +## Transient data +_data/gorefs.yaml diff --git a/Gemfile b/Gemfile index f7d2f808..bf4c7806 100644 --- a/Gemfile +++ b/Gemfile @@ -14,7 +14,10 @@ group :jekyll_plugins do gem "jekyll-redirect-from" gem "jekyll-seo-tag" gem 'jekyll-algolia', '~> 1.0' + gem 'jekyll-datapage-generator' end # Windows does not include zoneinfo files, so bundle the tzinfo-data gem gem 'tzinfo-data', platforms: [:mingw, :mswin, :x64_mingw, :jruby] + +gem "rinku", "~> 2.0" diff --git a/Gemfile.lock b/Gemfile.lock index 68818cfb..1c528f63 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -34,6 +34,7 @@ GEM nokogiri (~> 1.6) progressbar (~> 1.9) verbal_expressions (~> 0.1.5) + jekyll-datapage-generator (1.4.0) jekyll-feed (0.11.0) jekyll (~> 3.3) jekyll-redirect-from (0.16.0) @@ -65,6 +66,7 @@ GEM rb-fsevent (0.11.0) rb-inotify (0.10.1) ffi (~> 1.0) + rinku (2.0.6) rouge (1.11.1) safe_yaml (1.0.5) sass (3.7.4) @@ -81,10 +83,12 @@ PLATFORMS DEPENDENCIES jekyll (= 3.4.3) jekyll-algolia (~> 1.0) + jekyll-datapage-generator jekyll-feed jekyll-redirect-from jekyll-seo-tag jekyll-sitemap + rinku (~> 2.0) tzinfo-data webrick diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..459b2f61 --- /dev/null +++ b/Makefile @@ -0,0 +1,4 @@ +.PHONY: _data/gorefs.yaml + +_data/gorefs.yaml: + wget -O $@ https://raw.githubusercontent.com/geneontology/go-site/master/metadata/gorefs.yaml diff --git a/_config.yml b/_config.yml index 926026ef..df692334 100644 --- a/_config.yml +++ b/_config.yml @@ -23,6 +23,7 @@ gems: - jekyll-redirect-from - jekyll-seo-tag - jekyll-sitemap + - jekyll-datapage-generator exclude: - Gemfile @@ -30,6 +31,7 @@ exclude: - .idea/ - .gitignore - README.md + - vendor # In GitHub workflows, the ruby/setup-ruby action will install gems here timezone: America/Los_Angeles defaults: @@ -60,9 +62,6 @@ collections: permalink: /blog/:year/:month/:day/:title/ output: true -plugins_dir: -- jekyll-redirect-from - sass: sass_dir: _sass @@ -81,3 +80,9 @@ algolia: - covid-19.html # nodes_to_index: 'article' # elements to be indexed nodes_to_index: 'p,blockquote,li,div,paragraph,td,span,h1,h2,h3' + +page_gen: + - data: gorefs + template: goref + dir: GO_REF + name_expr: "record['id'].sub('GO_REF:', '')" diff --git a/_docs/download-go-annotations.md b/_docs/download-go-annotations.md index 624f7d9b..2cdac214 100644 --- a/_docs/download-go-annotations.md +++ b/_docs/download-go-annotations.md @@ -9,23 +9,69 @@ redirect_from: # Download annotations -## Current GO annotation downloads -The [GAF download page](http://current.geneontology.org/products/pages/downloads.html) has GAF files for selected species. +### Getting annotations for a selected organism -GAF & GPAD+GPI files are also available from the [/annotations/](http://current.geneontology.org/annotations/index.html){:target="blank"} directory of the current release: [http://current.geneontology.org](http://current.geneontology.org){:target="blank"} +This page has instructions for getting GO annotations for almost any organism. If your organism is not available in the [official GO products](http://current.geneontology.org/products/pages/downloads.html), [UniProt GAFs by proteome](https://ftp.ebi.ac.uk/pub/databases/GO/goa/proteomes/){:target="blank"}, or [NCBI RefSeq](https://ftp.ncbi.nlm.nih.gov/genomes/refseq/){:target="blank"}, we recommend using the latest version of [InterProScan](https://interproscan-docs.readthedocs.io/en/latest/){:target="blank"} for unannotated organisms. -### Other species -If your organism is not available in the above links, you can use [AmiGO's annotation search](https://amigo.geneontology.org/amigo/search/annotation) feature to view or download annotations. [See our FAQ](https://geneontology.org/docs/faq/#where-can-i-view-or-download-the-complete-sets-of-go-annotations) for further information as well as how to retrieve annotations for species that are not available in AmiGO. +Jump to a section: +- [Commonly studied organisms](/docs/download-go-annotations/#1-commonly-studied-organisms) +- [All other organisms](/docs/download-go-annotations/#2-all-other-organisms) -## About GO annotation formats -+ Released monthly -+ Files are taxon-specific, with a few exceptions including the Reactome and *Candida* Genome Database files +#### Required Files +Most tools that use GO annotations take two input files: +1. a file with the **annotations** (in Gene Annotation Format, or GAF) +2. a file with the GO **ontology** structure (in Open Biomedical Ontology Format, or OBO) + +Because the ontology and annotations are constantly being improved over time, we recommend downloading the latest version of the annotations for your organism and the corresponding ontology file for that GO version. The version should be specified in the header of the annotation file. + +#### Citing GO +To ensure reproducibility for any publication where GO was used at any point in the research, please include: +* [appropriate GO publication(s)- refer to the full GO citation policy](/docs/go-citation-policy/) +* the URL where the files were obtained +* the date on the header of the GAF file +* the ontology version number + +### [1. Commonly studied organisms](http://current.geneontology.org/products/pages/downloads.html) +[This GAF download page has annotations for selected commonly-studied species](http://current.geneontology.org/products/pages/downloads.html). + +For organisms with many expert-curated GO annotations (those with MODs, dedicated databases, etc.), we recommend downloading annotations from the links in the above-linked table. These organisms often have a large number of manual annotations supported by direct experimental evidence as well as annotations based on other evidence types. + +* These annotations should be used with the [latest version of the GO ontology](http://current.geneontology.org/ontology/index.html). +* Annotations for these organisms are also available as GPAD/GPI companion files; see the [/annotations/](http://current.geneontology.org/annotations/index.html){:target="blank"} directory of the current release [http://current.geneontology.org](http://current.geneontology.org){:target="blank"}. For more information on these infrequently used filetypes see the format pages for [GPAD](/docs/gene-product-association-data-gpad-format/)+[GPI](/docs/gene-product-information-gpi-format/). + +### 2. All other organisms +For all other organisms we recommend downloading annotations from one of the following sources: UniProt or NCBI RefSeq. Both of these provide highly accurate computational methods. The header of the annotation file specifies the version of the ontology you should use to accompany the annotation file. Older versions of the [GO ontology can be downloaded from the GO download archives](http://release.geneontology.org/). + +* [UniProt GAFs by proteome](https://ftp.ebi.ac.uk/pub/databases/GO/goa/proteomes/){:target="blank"}: Annotation files are available for about 20,000 complete proteomes (one protein sequence per protein-coding gene). Use these files if you want to use **UniProtKB identifiers**. + * Go to [https://ftp.ebi.ac.uk/pub/databases/GO/goa/proteomes/](https://ftp.ebi.ac.uk/pub/databases/GO/goa/proteomes/){:target="blank"} + * Navigate to your organism & download the `.goa` file, e.g. [`22426.A_gambiae.goa`](https://ftp.ebi.ac.uk/pub/databases/GO/goa/proteomes/22426.A_gambiae.goa){:target="blank"} + *Tip: use your browser's in-page search to find the species name.* + +* [NCBI RefSeq](https://ftp.ncbi.nlm.nih.gov/genomes/refseq/){:target="blank"}: If your organism has a reference genome assembly in NCBI, GO annotations are available in GAF format through NCBI Gene identifiers. Annotation files are available for all eukaryotic genomes available at NCBI RefSeq. Note that GO annotations are not currently available for archaea, bacteria or viruses. + * Go to [NCBI](https://www.ncbi.nlm.nih.gov/){:target="blank"} + * Navigate to your organism, e.g. [Anopheles gambiae](https://www.ncbi.nlm.nih.gov/search/all/?term=Anopheles%20gambiae){:target="blank"} + * Follow the ["Genomes" link](https://www.ncbi.nlm.nih.gov/datasets/genome/?taxon=7165){:target="blank"} + * Select the [reference assembly](https://www.ncbi.nlm.nih.gov/datasets/genome/GCF_943734735.2/) at the top of the list; this entry is indicated with a green "reference genome" icon and a GCF identifer listed in the RefSeq column + * Click on the [FTP link](https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/943/734/735/GCF_943734735.2_idAnoGambNW_F1_1/){:target="blank"} + * Download the file with the suffix `gene_ontology.gaf.gz`, e.g. `GCF_943734735.2-RS_2023_12_gene_ontology.gaf.gz` + +### 3. If you cannot find annotations for your organism for download as described above +[Get help from the GO helpdesk](https://help.geneontology.org/). + +### 4. If your organism’s genome sequence is not yet publicly available +For example, if you have a set of new (protein) sequences that you want to annotate with GO terms, we recommend that you generate annotations using the latest version of InterProScan. +For most genomic analyses, your input file should have one protein sequence per protein-coding gene, though any set of protein sequences can be used. +Download InterProScan at [https://www.ebi.ac.uk/interpro/about/interproscan](https://www.ebi.ac.uk/interpro/about/interproscan/){:target="blank"}. + +## More information on GO annotation formats ++ GO has monthly releases ++ Annotation files are taxon-specific, with a few exceptions including the Reactome and *Candida* Genome Database files + Current format guides: - + [GAF format](/docs/go-annotation-file-gaf-format-2.2/) + + [GAF format 2.2](/docs/go-annotation-file-gaf-format-2.2/) + [GPAD](/docs/gene-product-association-data-gpad-format/) + [GPI](/docs/gene-product-information-gpi-format/) companion files ## Programmatic access to GO annotations As for any resource in GO, GO annotations are accessible through the DOI-versioned release stored in [Zenodo](https://doi.org/10.5281/zenodo.1205159){:target="blank"}. -## Error or omission ? -Any errors or omissions in annotations should be reported by writing to the [GO helpdesk](http://help.geneontology.org/){:target="blank"} +## Error or omission? +Any errors or omissions in annotations should be reported by writing to the [GO helpdesk](http://help.geneontology.org/){:target="blank"}. diff --git a/_docs/download-go-cams.md b/_docs/download-go-cams.md index f3feafd3..52c8ddfc 100644 --- a/_docs/download-go-cams.md +++ b/_docs/download-go-cams.md @@ -26,4 +26,4 @@ permalink: /docs/download-go-cams/ As for any resource in GO, GO-CAMs are accessible through the DOI-versioned release stored in [Zenodo](https://doi.org/10.5281/zenodo.1205159){:target="blank"}. ## Error or omission ? -Any errors or omissions in annotations should be reported by writing to the [GO helpdesk](https://help.geneontology.org/){:target="blank"} +Any errors or omissions in annotations should be reported by writing to the [GO helpdesk](https://help.geneontology.org/){:target="blank"}. diff --git a/_docs/download-ontology.md b/_docs/download-ontology.md index c992de7c..c4257005 100644 --- a/_docs/download-ontology.md +++ b/_docs/download-ontology.md @@ -30,7 +30,7 @@ Files are available in the following formats: |**Subset name**|**Maintainer**|**File name**|**OBO format**|**OWL format**|**json format**| |------------------|-------------|-------------|-------------|------------|-------------| -|**GO slim AGR subset**|Developed by GO Consortium for the [Alliance of Genomes Resources](https://www.alliancegenome.org/){:target="blank"} |goslim_agr |[obo](https://current.geneontology.org/ontology/subsets/goslim_agr.obo) |[owl](https://current.geneontology.org/ontology/subsets/goslim_agr.owl){:target="blank"} |[json](https://current.geneontology.org/ontology/subsets/goslim_agr.json){:target="blank"} | +|***A*lliance of *G*enome *R*esources subset**|Developed by GO Consortium for the [Alliance of Genomes Resources](https://www.alliancegenome.org/){:target="blank"} |goslim_agr |[obo](https://current.geneontology.org/ontology/subsets/goslim_agr.obo) |[owl](https://current.geneontology.org/ontology/subsets/goslim_agr.owl){:target="blank"} |[json](https://current.geneontology.org/ontology/subsets/goslim_agr.json){:target="blank"} | |**Generic GO subset**|[GO Consortium](https://help.geneontology.org/){:target="blank"} |goslim_generic|[obo](https://current.geneontology.org/ontology/subsets/goslim_generic.obo)| [owl](https://current.geneontology.org/ontology/subsets/goslim_generic.owl){:target="blank"} |[json](https://current.geneontology.org/ontology/subsets/goslim_generic.json){:target="blank"} | |*__Aspergillus__* **subset**|[_Aspergillus_ Genome Data](http://www.aspgd.org/){:target="blank"} |goslim_aspergillus|[obo](https://current.geneontology.org/ontology/subsets/goslim_aspergillus.obo) |[owl](https://current.geneontology.org/ontology/subsets/goslim_aspergillus.owl){:target="blank"} |[json](https://current.geneontology.org/ontology/subsets/goslim_aspergillus.json){:target="blank"} | |*__Candida albicans__* **subset**|[_Candida_ Genome Database](http://www.candidagenome.org/){:target="blank"} |goslim_candida|[obo](https://current.geneontology.org/ontology/subsets/goslim_candida.obo)|[owl](https://current.geneontology.org/ontology/subsets/goslim_candida.owl){:target="blank"} |[json](https://current.geneontology.org/ontology/subsets/goslim_candida.json){:target="blank"} | @@ -52,8 +52,6 @@ For internal checking purposes, GO maintains two "anti-slims", terms to which an |**Subset name**|**Usage** |**File name** |**OBO format** |**OWL format** |**json format** | |------------------|----------|----------|----------|----------|----------| |**Do not annotate**|The set of high level terms that are useful for grouping, but should have no direct annotations| gocheck_do_not_annotate |[obo](https://current.geneontology.org/ontology/subsets/gocheck_do_not_annotate.obo)| [owl](https://current.geneontology.org/ontology/subsets/gocheck_do_not_annotate.owl){:target="blank"} |[json](https://current.geneontology.org/ontology/subsets/gocheck_do_not_annotate.json){:target="blank"} | -|**Do not manually annotate**|The set of high level terms that are useful for grouping, but should have no direct annotations except from automated tools| gocheck_do_not_manually_annotate|[obo](https://current.geneontology.org/ontology/subsets/gocheck_do_not_manually_annotate.obo)|[owl](https://current.geneontology.org/ontology/subsets/gocheck_do_not_manually_annotate.owl){:target="blank"} |[json](https://current.geneontology.org/ontology/subsets/gocheck_do_not_manually_annotate.json){:target="blank"} | - ## Cross-references of GO to other classification systems diff --git a/_docs/faq.md b/_docs/faq.md index 85b91673..34615323 100644 --- a/_docs/faq.md +++ b/_docs/faq.md @@ -822,19 +822,22 @@ No - the term will always have the same children wherever, and however many time {::comment} -FAQ tags:  +FAQ tags: [mappings](/faq-tags/mappings) [ontology](/faq-tags/ontology) {:/comment} -You can use the YeastMine Analyze tool available at SGD! This tool will return a table of GO ID, GO term name, GO term namespace (cellular component, molecular function, or biological process) and GO term description for each valid GO ID you supply. This will work for any organism, as the GO is the same! +You can use the AllianceMine's Upload List tool available at the Alliance website! This tool will return a table of GO ID, GO term name, and GO term description for each valid GO ID you supply. This will work for any organism, as the GO is the same! + +1. Go to the [Upload List tool on AllianceMine](https://www.alliancegenome.org/bluegenes/alliancemine/upload/input){:target="blank"} +2. In the List Type pull down, select `GO Term` +3. Enter your GO ids or upload a file, making sure GO IDs have the correct format (GO:0016020, GO:0016301...) +4. Click on `Continue`, and then on the next page use the `Save List` button. +5. You can use the `Save list` button on the next page to use this list in AllianceMine, or use the `Export` button to see download options. -1. Go to the [Analyze tool on YeastMine](http://yeastmine.yeastgenome.org/yeastmine/bag.do){:target="blank"} -2. In the Select Type pull down, select `GO Term` -3. Enter your GO ids or upload a list in the full format (GO:0016020, GO:0016301...) -4. Click on `Create List`. The tool offers several options to download the list when you use the `Save a list of...` button. +If you need the aspect (cellular component, molecular function, or biological process) for each term, you can add this to the results before saving. Use the `Add Columns`, click `Namespace` to highlight that option, then click the `Add 1 columns` button in the lower right. You can also use the AllianceMine features to filter your list, for example to select only molecular_function terms in your list. If you have a list of GO terms and wish to retrieve GO IDs and/or definitions, you can use the steps above. Make sure multi-word GO terms are in double quotes (sporulation,"lactase activity","codeine metabolic process") as the tool will otherwise recognise spaces as delimiters. @@ -843,7 +846,7 @@ If you have a list of GO terms and wish to retrieve GO IDs and/or definitions, y {::comment} -FAQ tags:  +FAQ tags: [ontology](/faq-tags/ontology) {:/comment} diff --git a/_docs/gene-product-association-data-gpad-format-20.md b/_docs/gene-product-association-data-gpad-format-20.md index 71f53abc..8bfc36d0 100644 --- a/_docs/gene-product-association-data-gpad-format-20.md +++ b/_docs/gene-product-association-data-gpad-format-20.md @@ -37,10 +37,6 @@ Submitting groups may choose to include optional additional information, for exa ### Annotation file fields The GPAD format comprises 12 tab-delimited fields. Some fields are optional, some fields are mandatory and cardinality varies by field and other conditions. For fields that permit multiple values, values should be separated by pipes (\|) for `OR` statements and commas (,) for `AND` statements. -GPAD 2.0 sample line: - - SGD:S000002164 NOT RO:0002331 GO:0043409 PMID:26546002 ECO:0000316 SGD:S000003631 2018-01-19 SGD RO:0002233(UniProtKB:Q00772),BFO:0000050(GO:0071852) noctua-model-id=gomodel:6086f4f200000223|model-state=production|contributor=orcid:0000-0003-3212-6364 - | **Column** | **Content** | **Required?** | **Cardinality** | **Example** | |----------|---------|-------------|---------|--------| |1 | [DB:DB_Object_ID ](#1-db-db-object-id "Definition and requirements for DB:DB Object ID (column 1)") | required | 1 | SGD:S000002164 | @@ -56,6 +52,12 @@ GPAD 2.0 sample line: |11 | [Annotation Extension](#11-annotation-extension "Definition and requirements for Annotation Extension (column 11)") | optional | 0 or greater | RO:0002233(UniProtKB:Q00772),BFO:0000050(GO:0071852)| |12 | [Annotation Properties](#12-annotation-properties "Definition and requirements for Annotation Properties (column 12)") | optional | 0 or greater | noctua-model-id=gomodel:6086f4f200000223\|model-state=production\|contributor=orcid:0000-0003-3212-6364| +### GPAD 2.0 examples + + SGD:S000002164 NOT RO:0002331 GO:0043409 PMID:26546002 ECO:0000316 SGD:S000003631 2018-01-19 SGD RO:0002233(UniProtKB:Q00772),BFO:0000050(GO:0071852) noctua-model-id=gomodel:6086f4f200000223|model-state=production|contributor=orcid:0000-0003-3212-6364 + UniProtKB:A0AA85ABI6 RO:0002327 GO:0017128 GO_REF:0000118 ECO:0007826 PANTHER:PTHR23248:SF9 2024-04-08 TreeGrafter id=GOA:8034655976|comment=go_evidence:IEA + + ### Definitions and requirements for field contents #### 1. DB:DB Object ID @@ -119,7 +121,7 @@ One of the codes from the [Evidence & Conclusion Ontology](http://www.evidenceon This field is mandatory, cardinality 1. #### 7. With [or] From -Also referred to as **With, From** or the **With/From** column +Also referred to as **With, From** or the **With/From** column. This field is used to hold an identifier for annotations using certain evidence codes: ECO:0000305 ([IC](https://wiki.geneontology.org/index.php/Inferred_by_Curator_(IC))); ECO:0000203, ECO:0000256, and ECO:0000265 ([IEA & child terms](https://wiki.geneontology.org/index.php/Inferred_from_Electronic_Annotation_(IEA))); ECO:00000316 ([IGI](https://wiki.geneontology.org/Inferred_from_Genetic_Interaction_(IGI))); ECO:0000021 ([IPI](https://wiki.geneontology.org/Inferred_from_Physical_Interaction_(IPI))); ECO:0000031, ECO:0000250 and ECO:0000255 ([ISS & child terms](https://wiki.geneontology.org/Inferred_from_Sequence_or_structural_Similarity_(ISS))). diff --git a/_docs/gene-product-information-gpi-format-20.md b/_docs/gene-product-information-gpi-format-20.md new file mode 100644 index 00000000..aa948e5e --- /dev/null +++ b/_docs/gene-product-information-gpi-format-20.md @@ -0,0 +1,144 @@ +--- +title: Gene Product Information (GPI) format 2.0 +permalink: /docs/gene-product-information-gpi-format-2.0/ + +--- +# This page describes the Gene Product Information (GPI) 2.0 format. This format has not yet been implemented in GO but is provided to help with the changeover from previous GPAD/GPI versions. +## Currently under construction + + +# Gene Product Information (GPI) files + +The Gene Ontology Consortium stores annotation data, the representation of gene product attributes using GO terms, in tab-delimited text files. Each non-header line in an annotation file represents a single association between a gene product and a GO term with a certain evidence code and the reference to support the link. + +This guide lays out the format specifications for the *G*ene *P*roduct *I*nformation (GPI) 2.0 format. +**Note that the GPI file is the companion file for the [GPAD file](/docs/gene-product-association-data-gpad-format/). +Both files should be submitted together using the same version.** + +GPAD/GPI is intended for internal GO use. GO also provides annotations as [GAF files](/docs/go-annotation-file-gaf-format-2.2/) and reccommends use of the GAF format for most use cases. + +For more general information on annotation, please see the [Introduction to GO annotation](/docs/go-annotations/). + +# Changes from the GPI 1.2 to GPI 2.0 +**Header** +* **The `gpi-version` header must read `2.0` for this format.** + +**Columns** +* **Columns 1 & 2 from the GPI 1.2 are now combined in a single column containing an ID in CURIE syntax, e.g. `UniProtKB:P56704`.** +* **NCBI taxon IDs are to be prefixed with `NCBITaxon:` to indicate the source of the ID, e.g. `NCBITaxon:6239`** +* **Dates must now follow the ISO-8601 format YYYY-MM-DD; time may be included as YYYY-MM-DDTHH:MM:SS** + + +# Gene Product Information (GPI) 2.0 format + +## GPI Header +### Required information to provide in the header: +All annotation files must start with a single line denoting the file format. The database/group generating the file as listed in dbxrefs.yaml and the ISO-8601 formatted date the file was generated must be included in the header. Example for GPI 2.0: + + !gpi-version: 2.0 + !generated-by: SGD + !date-generated: 2024-05-01 + +Other information, such as contact details for the submitter or database group, database URLs, etc. can be included in an association file header by prefixing the line with an exclamation mark (`!`); such lines will be ignored by parsers. + +## GPI fields + +The file format comprises 11 tab-delimited fields. Fields with multiple values (for example, gene product synonyms) should separate values by pipes. + +| **Column** | **Content** | **Required?** | **Cardinality** | **Example**| +|----------|---------|-------------|---------|--------| +| 1 | [DB:DB_Object_ID](#dbdb-object-id "Definition and requirements for DB:DB Object ID (column 1)") | required | 1 | UniProtKB:Q4VCS5-1| +| 2 | [DB_Object_Symbol](#db-object-symbol "Definition and requirements for DB Object Symbol (column 2)") | required | 1 | AMOT| +| 3 | [DB_Object_Name](#db-object-name "Definition and requirements for DB Object Name (column 3)") | optional | 0 or greater | Angiomotin| +| 4 | [DB_Object_Synonym(s)](#db-object-synonym "Definition and requirements for DB Object Synonym(s) (column 4)") | optional | 0 or greater | KIAA1071| +| 5 | [DB_Object_Type](#db-object-type "Definition and requirements for DB Object Type (column 5)") | required | 1 | PR:000000001| +| 6 | [DB_Object_Taxon](#db-object-taxon "Definition and requirements for DB Object Taxon (column 6)") | required | 1 | NCBITaxon:9606| +| 7 | [Encoded_by](#encoded-by "Definition and requirements for Encoded by (column 7)") | optional | 0 or greater | HGNC:17810 | +| 8 | [Parent_Protein](#parent-protein "Definition and requirements for Parent Protein (column 8)") | optional | 0 or 1 | UniProtKB:Q4VCS5| +| 9 | [Protein_Containing_Complex_Members](#protein-containing-complex-members "Definition and requirements for Protein Containing Complex Members (column 9)") | optional | 0 or greater | SGD:S000003821,SGD:S000001456,SGD:S000005047| +| 10 | [DB_Xref(s)](#db-xrefs "Definition and requirements for DB_Xref(s) (column 10)") | optional | 0 or greater | NCBIGene:154796\|ENSEMBL:ENSG00000126016 | +| 11 | [Gene_Product_Properties](#gene-product-properties "Definition and requirements for Gene Product Properties (column 11)") | optional | 0 or greater | db_subset=Swiss-Prot| + + +### GPI 2.0 example content + +> SGD:S000005027 Sal1 ADP/ATP transporter YNL083W PR:000000001 NCBItaxon:559292 UniProtKB:D6W196 + +Complex: +> SGD:S000217643 CBF1:MET4:MET28CBF1-MET4-MET28 sulfur metabolism transcription factor complex GO:0032991 NCBItaxon:559292 SGD:S000003821,SGD:S000001456,SGD:S000005047 ComplexPortal:CPX-1016 + +ncRNA: +> RNAcentral:URS0000527F89_9606 Homo sapiens (human) hsa-miR-145-5p SO:0000276 NCBITaxon:9606 HGNC:31532 NCBIGene:406937|ENSEMBL:ENSG00000276365 + +### Definitions and requirements for field contents + +#### DB:DB Object ID +The **DB** prefix is the database abbreviation (namespace) from which the unique identifier **DB Object ID** is drawn and must be one of the values from the set of GO database cross-references. The **DB:DB Object ID** is the combined identifier for the database object. + +This field is mandatory, cardinality 1. + + +#### DB Object Symbol +A (unique and valid) symbol to which the **DB:DB_Object_ID** is matched. No white spaces allowed. + +The text entered in the **DB_Object_Symbol** should refer to the entity in **DB:DB_Object_ID**. The **DB_Object_Symbol** field should contain a symbol that is recognizable to a biologist wherever possible (gene product symbol, abbreviation widely used in the literature, ORF name, etc.). It is not a unique identifier or an accession number (unlike the **DB:DB_Object_ID**), although IDs can be used as a **DB_Object_Symbol** if there is no more biologically meaningful symbol available (e.g., when an unnamed gene is annotated). For example, several alternative transcripts from one gene may be annotated separately, each with specific gene product identifiers in **DB:DB_Object_ID**, but with the same gene symbol in the **DB_Object_Symbol** column. + +This field is mandatory, cardinality 1. + +#### DB Object Name +The name of the gene or gene product in **DB:DB_Object_ID**. The text entered in the **DB_Object_Name** should refer to the entity in **DB:DB_Object_ID**. White spaces are allowed in this field. + +This field is not mandatory, cardinality 0, 1. + +#### DB Object Synonym +Alternative names for the entity in **DB:DB_Object_ID**. These entries may be a gene symbol or other text. Note that we strongly recommend that synonyms are included in the GPI file, as this aids the searching of GO. + +This field is not mandatory, cardinality 0, 1, >1 [white space allowed]; for cardinality >1 use a pipe to separate entries (e.g. YFL039C\|ABY1\|END7\|actin gene). + +#### DB Object Type +An ontology identifier for the biological entity in **DB:DB_Object_ID** which is annotated with GO. This field uses Sequence Ontology, Protein Ontology, and GO IDs and must correspond to one of the [permitted GPI entity types](https://github.com/geneontology/go-annotation/blob/master/specs/gpad-gpi-2-0.md#gpi-entity-types) or a more granular child term. Common entries include: + +* protein PR:000000001 +* protein-coding gene SO:0001217 +* gene SO:0000704 +* ncRNA SO:0000655 + ** any subtype of ncRNA in the Sequence Ontology, including ncRNA-coding gene SO:0001263 +* protein-containing complex GO:0032991 + +The object type listed in the **DB_Object_Type** field must match the database entry identified by the **DB:DB_Object_ID**. + + +This field is mandatory, cardinality 1. + +#### DB Object Taxon +The NCBI taxon ID of the species encoding the **DB:DB_Object_ID**, including the prefix `NCBItaxon:`. + +This field is mandatory, cardinality 1. + +#### Encoded by +For proteins and transcripts, **Encoded by** refers to the gene ID that encodes those entities, e.g. ENSG00000197153. + +This field is not mandatory, cardinality 0, 1, >1 ; for cardinality >1 use a pipe to separate entries. + +#### Parent Protein +When column 1 refers to a protein isoform or modified protein, this column refers to the gene-centric reference protein accession of the column 1 entry. + +This field is optional, cardinality 0+; multiple identifiers should be pipe-separated. + +#### Protein Containing Complex Members +When column 1 references a protein-containing complex, this column contains the gene-centric reference protein accessions. + +This field is optional, cardinality 0+; multiple identifiers should be pipe-separated. + +#### DB Xrefs +Identifiers for the object in **DB:DB_Object_ID** found in other databases. Identifiers used must be standard 2-part global identifiers, e.g. UniProtKB:Q60FP0. For gene products in model organism databases, **DB_Xrefs** must include the UniProtKB ID, and may also include NCBI gene or protein IDs, etc. + +This field is optional, cardinality 0+; multiple identifiers should be pipe-separated. + +#### Gene Product Properties +The Properties column can be filled with a pipe separated list of values in the format "property_name = property_value". There is a fixed vocabulary for the property names and this list can be extended when necessary. Supported properties will include: 'GO annotation complete', "Phenotype annotation complete' (the value for these two properties would be a date), 'Target set' (e.g. Reference Genome, kidney, etc.), 'Database subset' (e.g. Swiss-Prot, TrEMBL). + +This field is optional, cardinality 0+; multiple properties should be pipe-separated. diff --git a/_docs/go-annotation-file-gaf-format-22.md b/_docs/go-annotation-file-gaf-format-22.md index 5fb18816..2991ee28 100644 --- a/_docs/go-annotation-file-gaf-format-22.md +++ b/_docs/go-annotation-file-gaf-format-22.md @@ -87,7 +87,7 @@ The annotation flat file format is comprised of 17 tab-delimited fields. #### DB (column 1) Refers to the database from which the identifier in **DB object ID** (column 2) is drawn. This is not necessarily the group submitting the file. If a UniProtKB ID is the **DB object ID** (column 2), **DB** (column 1) should be UniProtKB. -Must be one of the values from the set of [GO database cross-references]([http://amigo.geneontology.org/xrefs](https://github.com/geneontology/go-site/blob/master/metadata/db-xrefs.yaml)). +Must be one of the values from the set of [GO database cross-references](http://amigo.geneontology.org/xrefs). This field is mandatory, cardinality 1. diff --git a/_docs/go-archives.md b/_docs/go-archives.md index 9a137f35..822d4827 100644 --- a/_docs/go-archives.md +++ b/_docs/go-archives.md @@ -51,6 +51,8 @@ _If you are looking for current, actively maintained GO slims, please [see the g |Rice (Syngenta) | J. Yu et al. [PMID:11935018](http://www.ncbi.nlm.nih.gov/pubmed/11935018){:target="blank"} Apr 2002 |[old GO format](http://cvsweb.geneontology.org/cgi-bin/cvsweb.cgi/go/GO_slims/archived_GO_slims/goslim_Rice_Syngenta.0204){:target="blank"}| |UniProtKB-GOA | N. Mulder, M. Pruess [PMID:12230037](http://www.ncbi.nlm.nih.gov/pubmed/12230037){:target="blank"} Nov 2002 |[old GO format](http://cvsweb.geneontology.org/cgi-bin/cvsweb.cgi/go/GO_slims/archived_GO_slims/goslim_goa.2002){:target="blank"}| |Yeast | SGD curators Aug 2003 |[old GO format](http://cvsweb.geneontology.org/cgi-bin/cvsweb.cgi/go/GO_slims/archived_GO_slims/goslim_yeast.2003){:target="blank"}| +|Do not manually annotate|The set of high level terms that are useful for grouping, but should have no direct annotations except from automated tools| [obo](http://release.geneontology.org/2024-01-17/ontology/subsets/gocheck_do_not_manually_annotate.obo)|[owl](http://release.geneontology.org/2024-01-17/ontology/subsets/gocheck_do_not_manually_annotate.owl){:target="blank"} |[json](http://release.geneontology.org/2024-01-17/ontology/subsets/gocheck_do_not_manually_annotate.json){:target="blank"} [tsv](http://release.geneontology.org/2024-01-17/ontology/subsets/gocheck_do_not_manually_annotate.tsv) | + ## How the GO Archive was built The archive was generated using the data scattered across 3 legacy systems, namely the GO CVS, the GO SVN and the old product archive. Each of those systems was created at different times to serve different purposes and they were partially redundant, both in terms of the types of data they contained and in time frames (e.g. SVN was maintained from 2011 to 2018 while CVS was maintained from 2002 to 2018). The project is hosted on [GitHub](https://github.com/geneontology/archive-reconstruction){:target="blank"}. diff --git a/_docs/go-citation-policy.md b/_docs/go-citation-policy.md index dc065f5e..5f124689 100644 --- a/_docs/go-citation-policy.md +++ b/_docs/go-citation-policy.md @@ -22,33 +22,33 @@ If you used a specific software/analysis tool in your research, in addition to t + **GO-CAMs**: Thomas PD, Hill DP, Mi H, Osumi-Sutherland D, Van Auken K, Carbon S, Balhoff JP, Albou LP, Good B, Gaudet P, Lewis SE, Mungall CJ. Gene Ontology Causal Activity Modeling (GO-CAM) moves beyond GO annotations to structured descriptions of biological functions and systems. Nat Genet. 2019 Oct;51(10):1429-1433. DOI: [10.1038/s41588-019-0500-1](https://doi.org/10.1038/s41588-019-0500-1){:target="blank"} \[[abstract](https://pubmed.ncbi.nlm.nih.gov/31548717/){:target="blank"} \| [full text](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7012280/pdf/nihms-1067180.pdf){:target="blank"}\] -If you produce tools and/or services that use GO data, the user should be provided with the GO release date and DOI along with the results (e.g. "2023-01-01" and "DOI: [10.5281/zenodo.7504797](https://dx.doi.org/10.5281/zenodo.7504797){:target="blank"}"). +If you produce tools and/or services that use GO data, the user should be provided with the GO release date and DOI along with the results (e.g. "2024-01-17" and "DOI: [10.5281/zenodo.10536401](https://doi.org/10.5281/zenodo.10536401){:target="blank"}"). ## Citing data from downloads -If you wish to cite data provided by the Gene Ontology knowledgebase, either from AmiGO or the files downloaded from the GO website, please state the release date and/or version number of the data, preferably both; e.g. "2023-01-01" and "[10.5281/zenodo.7504797](https://dx.doi.org/10.5281/zenodo.7504797){:target="blank"}". Both annotation and ontology data may change over time, and to reproduce the results of an analysis, it is important that the same initial GO data is used. +If you wish to cite data provided by the Gene Ontology knowledgebase, either from AmiGO or the files downloaded from the GO website, please state the release date and/or version number of the data, preferably both; e.g. "2024-01-17" and "[10.5281/zenodo.10536401](https://doi.org/10.5281/zenodo.10536401){:target="blank"}". Both annotation and ontology data may change over time, and to reproduce the results of an analysis, it is important that the same initial GO data is used. ## Citing data from AmiGO The data release can be found in the [AmiGO](https://amigo.geneontology.org/amigo){:target="blank"} page footer on the right-hand side. To cite the annotations of individual database groups please see the GO publications list. -## GO logo - -If you intend to use the logo on your website, please include a link to the GO home page, https://geneontology.org. Please [contact the GO Helpdesk](http://help.geneontology.org) if you need a larger or higher resolution version. The logo is subject to our use and license. - -The GO logo is available in four sizes. For reuse, please do not hotlink images, but download them instead (e.g. right click the appropriate image and size, then select "Save Link As"): - -| **Mini (and favicon)** | **Small** | **Regular** | **Large** | -|------|-------|---------|-------| -| [![mini logo](/assets/go-logo.mini.png){:width="100"}](/assets/go-logo.mini.png){:target="blank"} | [![small logo](/assets/go-logo.small.png){:width="100"}](/assets/go-logo.small.png){:target="blank"} | [![regular logo](/assets/go-logo.png){:width="100"}](/assets/go-logo.png){:target="blank"} | [![full logo](/assets/go-logo.large.png){:width="100"}](/assets/go-logo.large.png){:target="blank"} | -| [![fav icon](/assets/go-logo-favicon.ico){:width="50"}](/assets/go-logo-favicon.ico){:target="blank"} | [![mini icon](/assets/go-logo-icon.mini.png){:width="50"}](/assets/go-logo-icon.mini.png){:target="blank"} | [![small icon](/assets/go-logo-icon.small.png){:width="50"}](/assets/go-logo-icon.small.png){:target="blank"} | [![regular icon](/assets/go-logo-icon.png){:width="50"}](/assets/go-logo-icon.png){:target="blank"} | +## Best practices for linking to GO entities +GO uses persistent uniform resource locator (PURLs) for all the objects it describes. If you use or provide links to the following entities, please ensure you are using PURLs. Examples: +* GO terms: https://purl.obolibrary.org/obo/GO_0022008 +* GO_REFs: https://purl.obolibrary.org/obo/go/references/0000015 +* GO-CAM models: https://model.geneontology.org/65c57c3400001018 +* GO ontology (versions) + * current: https://purl.obolibrary.org/obo/go/go.owl + * snapshot: https://purl.obolibrary.org/obo/go/snapshot/go.owl + * dated release: https://purl.obolibrary.org/obo/go/releases/2024-01-17/go.owl +Technical documentation can be found on the [OBOFoundry GitHub site](https://github.com/OBOFoundry/purl.obolibrary.org/blob/master/README.md){:target="blank"}. ## License Gene Ontology Consortium data and data products are licensed under the [Creative Commons Attribution 4.0 Unported License](https://creativecommons.org/licenses/by/4.0/legalcode){:target="blank"}. A human-readable version and explanation is available at the [Creative Commons website](https://creativecommons.org/licenses/by/4.0/){:target="blank"}. For information about how to properly credit data use, please review the [Creative Commons FAQ](http://wiki.creativecommons.org/Frequently_Asked_Questions){:target="blank"} or contact the GO Helpdesk. -We ask that when using or citing GO data that the particular release is mentioned. For example, we'd ask that the date (e.g. "2023-01-01") is included where applicable, and optionally the Zenodo DOI (e.g. "10.5281/zenodo.7504797"). Links, where applicable, would be a useful addition for end-users. +We ask that when using or citing GO data that the particular release is mentioned. For example, we'd ask that the date (e.g. "2024-01-17") is included where applicable, and optionally the Zenodo DOI (e.g. "10.5281/zenodo.10536401"). Links, where applicable, would be a useful addition for end-users. ### Attribution @@ -64,7 +64,7 @@ According to the terms of GO's [CC BY 4.0 license](https://creativecommons.org/l For example, if you are offering downloads containing GO data, have a data licensing page in your application, or refer to licensed data in your documentation, an appropriate notice may be: -> [Gene Ontology](https://geneontology.org) data from the [2023-01-01 release](http://release.geneontology.org/2023-01-01) ([DOI:10.5281/zenodo.7504797](https://doi.org/10.5281/zenodo.7504797){:target="blank"}) is made available under the terms of the [CC BY 4.0 license](https://creativecommons.org/licenses/by/4.0/legalcode). +> [Gene Ontology](https://geneontology.org) data from the [2024-01-17 release](http://release.geneontology.org/2024-01-17) ([DOI:10.5281/zenodo.10536401](https://doi.org/10.5281/zenodo.10536401){:target="blank"}) is made available under the terms of the [CC BY 4.0 license](https://creativecommons.org/licenses/by/4.0/legalcode). For further reading, suggest: @@ -75,6 +75,17 @@ For further reading, suggest: GOC software and tools are under their own licenses; please see their respective homepages for further details. +## GO logo + +If you intend to use the logo on your website, please include a link to the GO home page, https://geneontology.org. Please [contact the GO Helpdesk](http://help.geneontology.org) if you need a larger or higher resolution version. The logo is subject to our use and license. + +The GO logo is available in four sizes. For reuse, please do not hotlink images, but download them instead (e.g. right click the appropriate image and size, then select "Save Link As"): + +| **Mini (and favicon)** | **Small** | **Regular** | **Large** | +|------|-------|---------|-------| +| [![mini logo](/assets/go-logo.mini.png){:width="100"}](/assets/go-logo.mini.png){:target="blank"} | [![small logo](/assets/go-logo.small.png){:width="100"}](/assets/go-logo.small.png){:target="blank"} | [![regular logo](/assets/go-logo.png){:width="100"}](/assets/go-logo.png){:target="blank"} | [![full logo](/assets/go-logo.large.png){:width="100"}](/assets/go-logo.large.png){:target="blank"} | +| [![fav icon](/assets/go-logo-favicon.ico){:width="50"}](/assets/go-logo-favicon.ico){:target="blank"} | [![mini icon](/assets/go-logo-icon.mini.png){:width="50"}](/assets/go-logo-icon.mini.png){:target="blank"} | [![small icon](/assets/go-logo-icon.small.png){:width="50"}](/assets/go-logo-icon.small.png){:target="blank"} | [![regular icon](/assets/go-logo-icon.png){:width="50"}](/assets/go-logo-icon.png){:target="blank"} | + ## Website disclaimer -All information on this website is copyright © 1999–2023 Gene Ontology Consortium. Permission to use the information contained in this database was given by the researchers and institutes who contributed or published the information. Users of the data are solely responsible for compliance with any copyright restrictions. Documents from this server are provided "AS-IS" without any warranty, expressed or implied. +All information on this website is copyright © 1999–2024 Gene Ontology Consortium. Permission to use the information contained in this database was given by the researchers and institutes who contributed or published the information. Users of the data are solely responsible for compliance with any copyright restrictions. Documents from this server are provided "AS-IS" without any warranty, expressed or implied. diff --git a/_docs/taxon-constraints.md b/_docs/taxon-constraints.md index 3a01ce27..2c8b2044 100644 --- a/_docs/taxon-constraints.md +++ b/_docs/taxon-constraints.md @@ -2,7 +2,7 @@ title: Taxon constraints in the Gene Ontology permalink: /docs/taxon-constraints/ redirect_from: -- /cgi-bin/references.cgi#GO_REF:0000056 +- /cgi-bin/references.cgi --- # Taxon constraints in the Gene Ontology diff --git a/_includes/goref.html b/_includes/goref.html new file mode 100644 index 00000000..9337fe9c --- /dev/null +++ b/_includes/goref.html @@ -0,0 +1,42 @@ +
+ {{ include.goref.id }} + {% if include.goref.is_obsolete %} + obsolete + {% endif %} +
+

{{ include.goref.title }}

+

{{ include.goref.authors }}; {{ include.goref.year }}

+

{{ include.goref.description | markdownify | autolinkify }}

+ +{% if include.goref.comments %} +

Comments

+ +{% endif %} + +{% if include.goref.citation %} +
+
Citation
+
+ + {{ include.goref.citation }} + +
+
+{% endif %} + +{% if include.goref.external_accession %} +
+
External xrefs
+
+ +
+
+{% endif %} \ No newline at end of file diff --git a/_includes/goref_toc_list_item.html b/_includes/goref_toc_list_item.html new file mode 100644 index 00000000..4ac86e7f --- /dev/null +++ b/_includes/goref_toc_list_item.html @@ -0,0 +1,9 @@ +
  • + {% comment %} This href must be kept in sync with page_gen settings in _config.yaml {% endcomment %} + + [{{ include.goref.id }}] {{ include.goref.title }} + +
  • diff --git a/_includes/topnav.html b/_includes/topnav.html index 62f56c7e..fbadda2e 100644 --- a/_includes/topnav.html +++ b/_includes/topnav.html @@ -61,13 +61,13 @@