From 16e6e8f67b6128c39d1c5d27a61dc03a75344435 Mon Sep 17 00:00:00 2001
From: suzialeksander <suzia@stanford.edu>
Date: Fri, 1 Mar 2024 14:12:40 -0700
Subject: [PATCH 01/52] Update topnav.html

just changing the order to hide the gap where there's a missing icon
---
 _includes/topnav.html | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/_includes/topnav.html b/_includes/topnav.html
index 62f56c7e..fbadda2e 100644
--- a/_includes/topnav.html
+++ b/_includes/topnav.html
@@ -61,13 +61,13 @@
             <div class="navbar-right">
                 <ul class="nav navbar-nav">
                     <li>
-                        <a href="https://github.com/geneontology" target="blank" style="padding: 8px 3px; font-size: 15px;">
-                            <i class="fa fa-github" aria-hidden="true"></i>
+                        <a rel="me" href="https://genomic.social/@go" target="blank" style="padding: 8px 3px; font-size: 15px;">    
+                            <i class="fa-brands fa-mastodon" aria-hidden="true"></i>
                         </a>
                     </li>
                     <li>
-                        <a rel="me" href="https://genomic.social/@go" target="blank" style="padding: 8px 3px; font-size: 15px;">    
-                            <i class="fa-brands fa-mastodon" aria-hidden="true"></i>
+                        <a href="https://github.com/geneontology" target="blank" style="padding: 8px 3px; font-size: 15px;">
+                            <i class="fa fa-github" aria-hidden="true"></i>
                         </a>
                     </li>
                     <li>

From f51e870ce5852a21464323fc6503f36d2a6c933f Mon Sep 17 00:00:00 2001
From: Patrick Kalita <pkalita@lbl.gov>
Date: Wed, 6 Mar 2024 15:41:00 -0800
Subject: [PATCH 02/52] Add Makefile with target to fetch gorefs.yaml from
 go-site repo

---
 .gitignore | 7 +++++++
 Makefile   | 4 ++++
 2 files changed, 11 insertions(+)
 create mode 100644 Makefile

diff --git a/.gitignore b/.gitignore
index 3e39da0d..f50b6199 100644
--- a/.gitignore
+++ b/.gitignore
@@ -30,6 +30,10 @@ tramp
 .org-id-locations
 *_archive
 
+## Other IDEs
+.vscode
+.idea
+
 ###
 ### From upstream jekyll theme.
 ###
@@ -42,3 +46,6 @@ _site
 vendor/bundle
 
 _algolia_api_key
+
+## Transient data
+_data/gorefs.yaml
diff --git a/Makefile b/Makefile
new file mode 100644
index 00000000..459b2f61
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,4 @@
+.PHONY: _data/gorefs.yaml
+
+_data/gorefs.yaml:
+	wget -O $@ https://raw.githubusercontent.com/geneontology/go-site/master/metadata/gorefs.yaml

From ccc8238c0d005594cb239c8db811cf3f27650ecd Mon Sep 17 00:00:00 2001
From: Patrick Kalita <pkalita@lbl.gov>
Date: Wed, 6 Mar 2024 15:56:08 -0800
Subject: [PATCH 03/52] Add GO REFs page

---
 _includes/goref.html          | 43 ++++++++++++++++++++++++
 _sass/custom/_typography.scss | 62 ++++++++++++++++++++++++-----------
 gorefs.html                   | 14 ++++++++
 3 files changed, 99 insertions(+), 20 deletions(-)
 create mode 100644 _includes/goref.html
 create mode 100644 gorefs.html

diff --git a/_includes/goref.html b/_includes/goref.html
new file mode 100644
index 00000000..a73d358e
--- /dev/null
+++ b/_includes/goref.html
@@ -0,0 +1,43 @@
+<a class="permalink-anchor" id="{{ include.goref.id }}"></a>
+<div>
+    <span class="label label-primary">{{ include.goref.id }}</span>
+    {% if include.goref.is_obsolete %}
+    <span class="label label-danger text-uppercase">obsolete</span>
+    {% endif %}
+</div>
+<h2><a class="permalink" href="#{{ include.goref.id }}">{{ include.goref.title }}</a></h2>
+<p>{{ include.goref.authors }}; {{ include.goref.year }}</p>
+<p>{{ include.goref.description | markdownify }}</p>
+
+{% if include.goref.comments %}
+<h3>Comments</h3>
+<ul>
+{% for comment in include.goref.comments %}
+    <li>{{ comment | markdownify }}</li>
+{% endfor %}
+</ul>
+{% endif %}
+
+{% if include.goref.citation %}
+<div class="row">
+    <div class="col-sm-2"><b>Citation</b></div>
+    <div class="col-sm-10">
+        <a href="http://www.ncbi.nlm.nih.gov/pubmed/{{ include.goref.citation | remove_first: 'PMID:' }}" target="_blank">
+            {{ include.goref.citation }}
+        </a>
+    </div>
+</div>
+{% endif %}
+
+{% if include.goref.external_accession %}
+<div class="row">
+    <div class="col-sm-2"><b>External xrefs</b></div>
+    <div class="col-sm-10">
+        <ul class="list-unstyled">
+        {% for xref in include.goref.external_accession %}
+            <li>{{ xref }}</li>
+        {% endfor %}
+        </ul>
+    </div>
+</div>
+{% endif %}
\ No newline at end of file
diff --git a/_sass/custom/_typography.scss b/_sass/custom/_typography.scss
index 26ab1333..d15c5f6a 100644
--- a/_sass/custom/_typography.scss
+++ b/_sass/custom/_typography.scss
@@ -37,29 +37,51 @@ a {
     text-decoration: none;
 }
 
-a:not(.btn):not(.yasr_btn):not(.list-group-item):not(.dropdown-menu__item__link):not(.go-link):not(.basic-link) {
-    color: $color-primary;
-    text-decoration: none !important;
-    display: inline-block;
+.permalink {
+    font-family: inherit;
     position: relative;
+
+    &:hover::before {
+        content: '#';
+        position: absolute;
+        left: -0.8em;
+        top: 0.2em;
+        font-size: 0.8em;
+        opacity: 0.7;
+    }
 }
 
-a:not(.btn):not(.yasr_btn):not(.list-group-item):not(.dropdown-menu__item__link):not(.go-link):not(.basic-link):after {
-    background: none repeat scroll 0 0 transparent;
-    bottom: 0;
-    content: "";
+.permalink-anchor {
+    // Account for the fixed header
     display: block;
-    height: 2px;
-    left: 50%;
-    position: absolute;
-    background: $color-primary;
-    transition: width 0.3s ease 0s, left 0.3s ease 0s;
-    width: 0;
+    position: relative;
+    top: -65px;
+    visibility: hidden;
 }
 
-a:not(.btn):not(.list-group-item):not(.dropdown-menu__item__link):not(.go-link):not(.basic-link):hover:after {
-    width: 100%;
-    left: 0;
+a:not(.btn):not(.yasr_btn):not(.list-group-item):not(.dropdown-menu__item__link):not(.go-link):not(.basic-link):not(.permalink) {
+    color: $color-primary;
+    text-decoration: none !important;
+    display: inline-block;
+    position: relative;
+
+    &::after {
+        background: none repeat scroll 0 0 transparent;
+        bottom: 0;
+        content: "";
+        display: block;
+        height: 2px;
+        left: 50%;
+        position: absolute;
+        background: $color-primary;
+        transition: width 0.3s ease 0s, left 0.3s ease 0s;
+        width: 0;
+    }
+
+    &:hover::after {
+        width: 100%;
+        left: 0;
+    }
 }
 
 .btn {
@@ -107,10 +129,10 @@ a:not(.btn):not(.list-group-item):not(.dropdown-menu__item__link):not(.go-link):
         padding: 8px 6px;
         text-decoration: none;
         font-weight: 400 !important;
-        width: 100%; 
+        width: 100%;
         transition: padding 0.2s;
         // transition:  0.15s text-align ease;
-        
+
         &:hover {
             text-decoration: none;
             font-weight: 400 !important;
@@ -207,7 +229,7 @@ a:not(.btn):not(.list-group-item):not(.dropdown-menu__item__link):not(.go-link):
 }
 
 .fa {
-    text-shadow: rgba(0, 0, 0, 0.25) 1.5px 1.5px 5px;    
+    text-shadow: rgba(0, 0, 0, 0.25) 1.5px 1.5px 5px;
 }
 
 .input_hint {
diff --git a/gorefs.html b/gorefs.html
new file mode 100644
index 00000000..37f033dd
--- /dev/null
+++ b/gorefs.html
@@ -0,0 +1,14 @@
+---
+layout: default
+---
+
+<div class="container">
+    <h1>GO REFs</h1>
+    <p>This is a collection of references used by the GO Consortium</p>
+
+    {% for goref in site.data.gorefs %}
+    <hr>
+    {% include goref.html goref=goref %}
+    {% endfor %}
+</div>
+

From 2f8cd4de88b7df43514ea2681ef58754618174a0 Mon Sep 17 00:00:00 2001
From: Patrick Kalita <pkalita@lbl.gov>
Date: Wed, 6 Mar 2024 16:10:20 -0800
Subject: [PATCH 04/52] Add warning about missing gorefs file in dev mode

---
 gorefs.html | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/gorefs.html b/gorefs.html
index 37f033dd..01978de0 100644
--- a/gorefs.html
+++ b/gorefs.html
@@ -6,6 +6,14 @@
     <h1>GO REFs</h1>
     <p>This is a collection of references used by the GO Consortium</p>
 
+    {% if jekyll.environment == "development" %}
+        {% unless site.data.gorefs %}
+            <div class="alert alert-warning">
+                <strong>Warning!</strong> No GO_REFs found. Did you run <code>make _data/gorefs.yml</code>?
+            </div>
+        {% endunless %}
+    {% endif %}
+
     {% for goref in site.data.gorefs %}
     <hr>
     {% include goref.html goref=goref %}

From bdef0cf486b4bd1bbf23e9dd7ad415e17fe695b5 Mon Sep 17 00:00:00 2001
From: Patrick Kalita <pkalita@lbl.gov>
Date: Thu, 7 Mar 2024 09:57:25 -0800
Subject: [PATCH 05/52] Add custom filter for automatically linking plain URLs

---
 Gemfile                 |  2 ++
 Gemfile.lock            |  2 ++
 _config.yml             |  3 ---
 _includes/goref.html    |  4 ++--
 _plugins/autolinkify.rb | 11 +++++++++++
 5 files changed, 17 insertions(+), 5 deletions(-)
 create mode 100644 _plugins/autolinkify.rb

diff --git a/Gemfile b/Gemfile
index f7d2f808..cd029436 100644
--- a/Gemfile
+++ b/Gemfile
@@ -18,3 +18,5 @@ end
 
 # Windows does not include zoneinfo files, so bundle the tzinfo-data gem
 gem 'tzinfo-data', platforms: [:mingw, :mswin, :x64_mingw, :jruby]
+
+gem "rinku", "~> 2.0"
diff --git a/Gemfile.lock b/Gemfile.lock
index 68818cfb..86a44730 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -65,6 +65,7 @@ GEM
     rb-fsevent (0.11.0)
     rb-inotify (0.10.1)
       ffi (~> 1.0)
+    rinku (2.0.6)
     rouge (1.11.1)
     safe_yaml (1.0.5)
     sass (3.7.4)
@@ -85,6 +86,7 @@ DEPENDENCIES
   jekyll-redirect-from
   jekyll-seo-tag
   jekyll-sitemap
+  rinku (~> 2.0)
   tzinfo-data
   webrick
 
diff --git a/_config.yml b/_config.yml
index 926026ef..67bac968 100644
--- a/_config.yml
+++ b/_config.yml
@@ -60,9 +60,6 @@ collections:
     permalink: /blog/:year/:month/:day/:title/
     output: true
 
-plugins_dir:
-- jekyll-redirect-from
-
 sass:
   sass_dir: _sass
 
diff --git a/_includes/goref.html b/_includes/goref.html
index a73d358e..8cc74e73 100644
--- a/_includes/goref.html
+++ b/_includes/goref.html
@@ -7,13 +7,13 @@
 </div>
 <h2><a class="permalink" href="#{{ include.goref.id }}">{{ include.goref.title }}</a></h2>
 <p>{{ include.goref.authors }}; {{ include.goref.year }}</p>
-<p>{{ include.goref.description | markdownify }}</p>
+<p>{{ include.goref.description | markdownify | autolinkify }}</p>
 
 {% if include.goref.comments %}
 <h3>Comments</h3>
 <ul>
 {% for comment in include.goref.comments %}
-    <li>{{ comment | markdownify }}</li>
+    <li>{{ comment | markdownify | autolinkify  }}</li>
 {% endfor %}
 </ul>
 {% endif %}
diff --git a/_plugins/autolinkify.rb b/_plugins/autolinkify.rb
new file mode 100644
index 00000000..8b9be0b4
--- /dev/null
+++ b/_plugins/autolinkify.rb
@@ -0,0 +1,11 @@
+require 'rinku'
+
+module Jekyll
+    module AutolinkifyFilter
+        def autolinkify(input)
+            Rinku.auto_link(input, :all, 'target="_blank"')
+        end
+    end
+end
+
+Liquid::Template.register_filter(Jekyll::AutolinkifyFilter)

From c5ea9e7d529f70bb21e54e3c2506cd06b9802eb5 Mon Sep 17 00:00:00 2001
From: pgaudet <pgaudet1@gmail.com>
Date: Wed, 13 Mar 2024 17:14:35 +0100
Subject: [PATCH 06/52] Update download-ontology.md

Removed 'do not manually annotate'
---
 _docs/download-ontology.md | 2 --
 1 file changed, 2 deletions(-)

diff --git a/_docs/download-ontology.md b/_docs/download-ontology.md
index c992de7c..e643025b 100644
--- a/_docs/download-ontology.md
+++ b/_docs/download-ontology.md
@@ -52,8 +52,6 @@ For internal checking purposes, GO maintains two "anti-slims", terms to which an
 |**Subset name**|**Usage** |**File name** |**OBO format** |**OWL format** |**json format** |
 |------------------|----------|----------|----------|----------|----------|
 |**Do not annotate**|The set of high level terms that are useful for grouping, but should have no direct annotations| gocheck_do_not_annotate |[obo](https://current.geneontology.org/ontology/subsets/gocheck_do_not_annotate.obo)| [owl](https://current.geneontology.org/ontology/subsets/gocheck_do_not_annotate.owl){:target="blank"}  |[json](https://current.geneontology.org/ontology/subsets/gocheck_do_not_annotate.json){:target="blank"}  |
-|**Do not manually annotate**|The set of high level terms that are useful for grouping, but should have no direct annotations except from automated tools| gocheck_do_not_manually_annotate|[obo](https://current.geneontology.org/ontology/subsets/gocheck_do_not_manually_annotate.obo)|[owl](https://current.geneontology.org/ontology/subsets/gocheck_do_not_manually_annotate.owl){:target="blank"}  |[json](https://current.geneontology.org/ontology/subsets/gocheck_do_not_manually_annotate.json){:target="blank"}  |
-
 
 ## Cross-references of GO to other classification systems
 

From 9d8f161756a907e605c441d5aaf7cb216551834a Mon Sep 17 00:00:00 2001
From: pgaudet <pgaudet1@gmail.com>
Date: Wed, 13 Mar 2024 17:18:32 +0100
Subject: [PATCH 07/52] Update go-archives.md

added do not manually annotate
---
 _docs/go-archives.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/_docs/go-archives.md b/_docs/go-archives.md
index 9a137f35..e3036f57 100644
--- a/_docs/go-archives.md
+++ b/_docs/go-archives.md
@@ -51,6 +51,8 @@ _If you are looking for current, actively maintained GO slims, please [see the g
 |Rice (Syngenta) |	J. Yu et al. [PMID:11935018](http://www.ncbi.nlm.nih.gov/pubmed/11935018){:target="blank"} Apr 2002 |[old GO format](http://cvsweb.geneontology.org/cgi-bin/cvsweb.cgi/go/GO_slims/archived_GO_slims/goslim_Rice_Syngenta.0204){:target="blank"}|
 |UniProtKB-GOA |	N. Mulder, M. Pruess [PMID:12230037](http://www.ncbi.nlm.nih.gov/pubmed/12230037){:target="blank"} Nov 2002 |[old GO format](http://cvsweb.geneontology.org/cgi-bin/cvsweb.cgi/go/GO_slims/archived_GO_slims/goslim_goa.2002){:target="blank"}|
 |Yeast |	SGD curators Aug 2003 	|[old GO format](http://cvsweb.geneontology.org/cgi-bin/cvsweb.cgi/go/GO_slims/archived_GO_slims/goslim_yeast.2003){:target="blank"}|
+|Do not manually annotate|The set of high level terms that are useful for grouping, but should have no direct annotations except from automated tools| gocheck_do_not_manually_annotate|[obo](http://release.geneontology.org/2024-01-17/ontology/subsets/gocheck_do_not_manually_annotate.obo)|[owl](http://release.geneontology.org/2024-01-17/ontology/subsets/gocheck_do_not_manually_annotate.owl){:target="blank"}  |[json]([(http://release.geneontology.org/2024-01-17/ontology/subsets/gocheck_do_not_manually_annotate.json){:target="blank"}  |
+
 
 ## How the GO Archive was built
 The archive was generated using the data scattered across 3 legacy systems, namely the GO CVS, the GO SVN and the old product archive. Each of those systems was created at different times to serve different purposes and they were partially redundant, both in terms of the types of data they contained and in time frames (e.g. SVN was maintained from 2011 to 2018 while CVS was maintained from 2002 to 2018). The project is hosted on [GitHub](https://github.com/geneontology/archive-reconstruction){:target="blank"}.

From 62a84fd1e79fe66d0250598b629017a4ec21764d Mon Sep 17 00:00:00 2001
From: pgaudet <pgaudet1@gmail.com>
Date: Wed, 13 Mar 2024 17:23:01 +0100
Subject: [PATCH 08/52] Update go-archives.md

updated archive
---
 _docs/go-archives.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/_docs/go-archives.md b/_docs/go-archives.md
index e3036f57..822d4827 100644
--- a/_docs/go-archives.md
+++ b/_docs/go-archives.md
@@ -51,7 +51,7 @@ _If you are looking for current, actively maintained GO slims, please [see the g
 |Rice (Syngenta) |	J. Yu et al. [PMID:11935018](http://www.ncbi.nlm.nih.gov/pubmed/11935018){:target="blank"} Apr 2002 |[old GO format](http://cvsweb.geneontology.org/cgi-bin/cvsweb.cgi/go/GO_slims/archived_GO_slims/goslim_Rice_Syngenta.0204){:target="blank"}|
 |UniProtKB-GOA |	N. Mulder, M. Pruess [PMID:12230037](http://www.ncbi.nlm.nih.gov/pubmed/12230037){:target="blank"} Nov 2002 |[old GO format](http://cvsweb.geneontology.org/cgi-bin/cvsweb.cgi/go/GO_slims/archived_GO_slims/goslim_goa.2002){:target="blank"}|
 |Yeast |	SGD curators Aug 2003 	|[old GO format](http://cvsweb.geneontology.org/cgi-bin/cvsweb.cgi/go/GO_slims/archived_GO_slims/goslim_yeast.2003){:target="blank"}|
-|Do not manually annotate|The set of high level terms that are useful for grouping, but should have no direct annotations except from automated tools| gocheck_do_not_manually_annotate|[obo](http://release.geneontology.org/2024-01-17/ontology/subsets/gocheck_do_not_manually_annotate.obo)|[owl](http://release.geneontology.org/2024-01-17/ontology/subsets/gocheck_do_not_manually_annotate.owl){:target="blank"}  |[json]([(http://release.geneontology.org/2024-01-17/ontology/subsets/gocheck_do_not_manually_annotate.json){:target="blank"}  |
+|Do not manually annotate|The set of high level terms that are useful for grouping, but should have no direct annotations except from automated tools| [obo](http://release.geneontology.org/2024-01-17/ontology/subsets/gocheck_do_not_manually_annotate.obo)|[owl](http://release.geneontology.org/2024-01-17/ontology/subsets/gocheck_do_not_manually_annotate.owl){:target="blank"}  |[json](http://release.geneontology.org/2024-01-17/ontology/subsets/gocheck_do_not_manually_annotate.json){:target="blank"} [tsv](http://release.geneontology.org/2024-01-17/ontology/subsets/gocheck_do_not_manually_annotate.tsv) |
 
 
 ## How the GO Archive was built

From 6f7bf5711649473967fc67851aa04cc5b8553c1e Mon Sep 17 00:00:00 2001
From: suzialeksander <suzia@stanford.edu>
Date: Thu, 21 Mar 2024 17:52:45 -0600
Subject: [PATCH 09/52] Create gene-product-information-gpi-format-20.md

New GPI page. Not currently linked anywhere on the website.
---
 .../gene-product-information-gpi-format-20.md | 106 ++++++++++++++++++
 1 file changed, 106 insertions(+)
 create mode 100644 _docs/gene-product-information-gpi-format-20.md

diff --git a/_docs/gene-product-information-gpi-format-20.md b/_docs/gene-product-information-gpi-format-20.md
new file mode 100644
index 00000000..08a6c9c8
--- /dev/null
+++ b/_docs/gene-product-information-gpi-format-20.md
@@ -0,0 +1,106 @@
+---
+title: Gene Product Information (GPI) format 2.0
+permalink: /docs/gene-product-information-gpi-format-2.0/
+
+---
+# This page describes the Gene Product Information (GPI) 2.0 format. This format has not yet been implemented in GO but is provided to help with the changeover from previous GPAD/GPI versions.
+## Currently under comstruction
+
+
+# Gene Product Information (GPI) files
+
+This guide lays out the format specifications for the *G*ene *P*roduct *I*nformation (GPI) 2.0 format.
+**Note that the GPI file is the companion file for the [GPAD file](/docs/gene-product-association-data-gpad-format/).**
+
+The Gene Ontology Consortium stores annotation data, the representation of gene product attributes using GO terms, in tab-delimited text files. Each line in the file represents a single association between a gene product and a GO term with a certain evidence code and the reference to support the link.
+
+GO also provides annotations as [GAF files](/docs/go-annotation-file-gaf-format-2.2/). For more general information on annotation, please see the [Introduction to GO annotation](/docs/go-annotations/).
+
+# Changes from the GPI 1.2 to GPI 2.0
+**Header**
+* **The `gpi-version` header must read `2.0` for this format.**
+  
+**Columns**
+* Columns 1 & 2 from the GPI 1.2 are now combined in a single column containing an id in CURIE syntax, e.g. UniProtKB:P56704.**
+* **NCBI taxon ids are to be prefixed with 'NCBITaxon:' to indicate the source of the id, e.g. NCBITaxon:6239**
+* **Dates must now follow the ISO-8601 format, e.g. YYYY-MM-DD; time may be included as YYYY-MM-DDTHH:MM:SS**
+<!-- does col 5 have to be an ontology ID or are ontology labels, entity types ok? -->
+# Gene Product Information (GPI) 2.0 format
+
+## GPI Header
+All annotation files must start with a single line denoting the file format. For GPI it is as follows:
+
+    !gpi-version: 2.0
+
+Other information, such as contact details for the submitter or database group, useful links, etc., can be included in an association file by prefixing the line with an exclamation mark (**!**); such lines will be ignored by parsers.
+
+Required information to provide in the header:
+
+    !generated-by: database listed in dbxrefs.yaml
+    !date-generated: YYYY-MM-DD or YYYY-MM-DDTHH:MM
+
+## GPI fields
+
+The file format comprises 10 tab-delimited fields. Fields with multiple values (for example, gene product synonyms) should separate values by pipes.
+
+| **Column** | **Content** | **Required?**	| **Cardinality** | **Example**|
+|----------|---------|-------------|---------|--------|
+| 1 | [DB:DB_Object_ID](#db-db-object-id "Definition and requirements for DB:DB Object ID (column 1)") |	required |	1 |	UniProtKB:Q4VCS5|
+| 2 | [DB_Object_Symbol](#db-object-symbol "Definition and requirements for DB Object Symbol (column 2)") |	required |	1 |	AMOT|
+| 3 | [DB_Object_Name](#db-object-name "Definition and requirements for DB Object Name (column 3)") |	optional |	0 or greater |	Angiomotin|
+| 4 | [DB_Object_Synonym(s)](#db-object-synonyms "Definition and requirements for DB Object Synonym(s) (column 4)") |	optional |	0 or greater |	E230009N18Rik|KIAA1071|
+| 5 | [DB_Object_Type](#db-object-type "Definition and requirements for DB Object Type (column 5)") |	required |	1 |	PR:000000001|
+| 6 | [DB_Object_Taxon](#taxon "Definition and requirements for DB Object Taxon (column 6)") |	required |	1 |	taxon:9606|
+| 7 | [Encoded_by](#encoded-by "Definition and requirements for Encoded by (column 7)") | optional | 0 or greater | ***EXAMPLE NEEDED***|
+| 8 | [Parent_Protein](#parent-protein "Definition and requirements for Parent Protein (column 8)") |	optional |	0 or 1 |	UniProtKB:Q4VCS5|
+| 9 | [Protein_Containing_Complex_Members](#complex-members "Definition and requirements for Protein Containing Complex Members (column 9)") | optional | 0 or greater | ***EXAMPLE NEEDED***|
+| 10 | [DB_Xref(s)](#db-xrefs "Definition and requirements for DB_Xref(s) (column 10)") |	optional |	0 or greater | |
+| 11 | [Gene_Product_Properties](#gene-product-properties "Definition and requirements for Gene Product Properties (column 11)") |	optional |	0 or greater |	db_subset=Swiss-Prot|
+
+### Definitions and requirements for field contents
+
+#### DB:DB_Object_ID
+The **DB** prefix is the database abbreviation (namespace) from which the unique identifier **DB Object ID** is drawn and must be one of the values from the set of GO database cross-references. The **DB:DB Object ID** is the combined identifier for the database object.
+This field is mandatory, cardinality 1.\
+
+<!--In GPI 1.0 format, the identifier may reference a top-level primary gene or gene product identifier, or an identified variant of a gene or gene product, for example identifiers that specify distinct proteins produced by differential splicing, alternative translational starts, post-translational cleavage, or post-translational modification. Identifiers for functional RNAs and protein complexes can also be included in this column. 
+    If the gene product is not a top-level gene or gene product identifier, the **Parent_Object_ID** field should contain the canonical form of the gene or gene product. 
+    Note that while the **DB_Object_ID** is the identifier for a database object that may be used for annotation, it may or may not correspond exactly to what is described in a paper. For example, a paper describing functional characterization of a protein may result in annotations to the gene encoding the protein (gene ID in **DB_Object_ID**) or annotations to the protein (protein ID in **DB_Object_ID**), depending on annotation practice of the contributing group. 
+-->
+#### DB_Object_Symbol
+A (unique and valid) symbol to which the **DB:DB_Object_ID** is matched.\
+This field is mandatory, cardinality 1.\
+The **DB_Object_Symbol** field should contain a symbol that is recognizable to a biologist wherever possible (an abbreviation widely used in the literature, for example). It is not a unique identifier or an accession number (unlike the **DB:DB_Object_ID**), although IDs can be used as a **DB_Object_Symbol** if there is no more biologically meaningful symbol available (e.g., when an unnamed gene is annotated). ORF names can be used for otherwise unnamed genes or proteins. If gene products are annotated, the gene product symbol can be used if available. Many gene product annotation entries may share a gene symbol. 
+The text entered in the **DB_Object_Name** and **DB_Object_Symbol** should refer to the entity in **DB:DB_Object_ID**. For example, several alternative transcripts from one gene may be annotated separately, each with specific gene product identifiers in **DB:DB_Object_ID**, but with the same gene symbol in the **DB_Object_Symbol** column. 
+#### DB_Object_Name
+The name of the gene or gene product in **DB:DB_Object_ID**.\
+This field is not mandatory, cardinality 0, 1 [white space allowed]\
+The text entered in the **DB_Object_Name** and **DB_Object_Symbol** should refer to the entity in **DB:DBB_Object_ID**. 
+#### DB_Object_Synonym
+These entries may be a gene symbol or other text. Note that we strongly recommend that synonyms are included in the GPI file, as this aids the searching of GO.\
+This field is not mandatory, cardinality 0, 1, >1 [white space allowed]; for cardinality >1 use a pipe to separate entries (e.g. YFL039C|ABY1|END7|actin gene). 
+#### DB_Object_Type
+A description of the type of the gene or gene product being annotated. This field uses Sequence Ontology labels and may correspond to one of the following: gene, protein_complex; protein; transcript; ncRNA; rRNA; tRNA; snRNA; snoRNA; or any subtype of ncRNA in the Sequence Ontology. If the precise product type is unknown, gene_product should be used.\
+This field is mandatory, cardinality 1.\
+The object type (gene, transcript, protein, protein_complex, etc.) listed in the **DB_Object_Type** field must match the database entry identified by the **DB:DB_Object_ID**. Note that **DB_Object_Type** refers to the database entry (i.e. it represents a protein, functional RNA, etc.); this column does not reflect anything about the GO term or the evidence on which the annotation is based. 
+#### DB Object Taxon
+The NCBI taxon ID of the species encoding the gene product.\
+This field is mandatory, cardinality 1.\
+The taxon should be specified as a number with the prefix "taxon". 
+#### Encoded by
+
+#### Parent Protein CHANGE THIS TEXT
+If the **DB:DB_Object_ID** refers to a variant of a gene product, this column will hold the identifier of the gene product from which it was derived.\
+This field is mandatory, cardinality 1, when variant forms of a gene product (e.g. identifiers that specify distinct proteins produced by differential splicing, alternative translational starts, post-translational cleavage or post-translational modification) are represented in **DB:DB_Object_ID**. If the **DB:DB_Object_ID** refers to the canonical form of a gene product, this column should be blank.\
+The identifier used must be a standard 2-part global identifier, e.g. UniProtKB:OK0206 
+The entity in the **Parent_Object_ID** column may not necessarily be the canonical form of the gene product; the canonical form would be identifiable as an entry for that gene product in the GPI file that would have the **Parent_Object_ID** blank. 
+#### Protein Containing Complex Members
+
+#### DB_Xrefs
+Identifiers for the object in **DB:DB_Object_ID** found in other databases.\
+This field is optional, cardinality 0+; multiple identifiers should be pipe-separated.\
+Identifiers used must be a standard 2-part global identifiers, e.g. UniProtKB:OK0206 
+This column should be used to record IDs for this object in other databases; for gene products in model organism databases, this must include the UniProtKB ID, and may also include NCBI gene or protein IDs, etc. 
+#### Gene Product Properties
+This field is optional, cardinality 0+; multiple properties should be pipe-separated.\
+The Properties column can be filled with a pipe separated list of values in the format "property_name = property_value". There is a fixed vocabulary for the property names and this list can be extended when necessary. Supported properties will include: 'GO annotation complete', "Phenotype annotation complete' (the value for these two properties would be a date), 'Target set' (e.g. Reference Genome, Kidney etc.), 'Database subset' (e.g. Swiss-Prot, TrEMBL). 

From 309b51339ad9f34011a57cdaa5a910feb75c1367 Mon Sep 17 00:00:00 2001
From: suzialeksander <suzia@stanford.edu>
Date: Tue, 26 Mar 2024 10:11:06 -0600
Subject: [PATCH 10/52] Update gene-product-information-gpi-format-20.md

---
 .../gene-product-information-gpi-format-20.md | 22 ++++++++++++++-----
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/_docs/gene-product-information-gpi-format-20.md b/_docs/gene-product-information-gpi-format-20.md
index 08a6c9c8..171186cc 100644
--- a/_docs/gene-product-information-gpi-format-20.md
+++ b/_docs/gene-product-information-gpi-format-20.md
@@ -4,13 +4,14 @@ permalink: /docs/gene-product-information-gpi-format-2.0/
 
 ---
 # This page describes the Gene Product Information (GPI) 2.0 format. This format has not yet been implemented in GO but is provided to help with the changeover from previous GPAD/GPI versions.
-## Currently under comstruction
+## Currently under construction
 
 
 # Gene Product Information (GPI) files
 
 This guide lays out the format specifications for the *G*ene *P*roduct *I*nformation (GPI) 2.0 format.
-**Note that the GPI file is the companion file for the [GPAD file](/docs/gene-product-association-data-gpad-format/).**
+**Note that the GPI file is the companion file for the [GPAD file](/docs/gene-product-association-data-gpad-format/).
+Both files should be submitted together using the same version.**
 
 The Gene Ontology Consortium stores annotation data, the representation of gene product attributes using GO terms, in tab-delimited text files. Each line in the file represents a single association between a gene product and a GO term with a certain evidence code and the reference to support the link.
 
@@ -68,10 +69,10 @@ This field is mandatory, cardinality 1.\
     Note that while the **DB_Object_ID** is the identifier for a database object that may be used for annotation, it may or may not correspond exactly to what is described in a paper. For example, a paper describing functional characterization of a protein may result in annotations to the gene encoding the protein (gene ID in **DB_Object_ID**) or annotations to the protein (protein ID in **DB_Object_ID**), depending on annotation practice of the contributing group. 
 -->
 #### DB_Object_Symbol
-A (unique and valid) symbol to which the **DB:DB_Object_ID** is matched.\
+A (unique and valid) symbol to which the **DB:DB_Object_ID** is matched. No white spaces allowed.\
 This field is mandatory, cardinality 1.\
 The **DB_Object_Symbol** field should contain a symbol that is recognizable to a biologist wherever possible (an abbreviation widely used in the literature, for example). It is not a unique identifier or an accession number (unlike the **DB:DB_Object_ID**), although IDs can be used as a **DB_Object_Symbol** if there is no more biologically meaningful symbol available (e.g., when an unnamed gene is annotated). ORF names can be used for otherwise unnamed genes or proteins. If gene products are annotated, the gene product symbol can be used if available. Many gene product annotation entries may share a gene symbol. 
-The text entered in the **DB_Object_Name** and **DB_Object_Symbol** should refer to the entity in **DB:DB_Object_ID**. For example, several alternative transcripts from one gene may be annotated separately, each with specific gene product identifiers in **DB:DB_Object_ID**, but with the same gene symbol in the **DB_Object_Symbol** column. 
+The text entered in the **DB_Object_Symbol** should refer to the entity in **DB:DB_Object_ID**. For example, several alternative transcripts from one gene may be annotated separately, each with specific gene product identifiers in **DB:DB_Object_ID**, but with the same gene symbol in the **DB_Object_Symbol** column. 
 #### DB_Object_Name
 The name of the gene or gene product in **DB:DB_Object_ID**.\
 This field is not mandatory, cardinality 0, 1 [white space allowed]\
@@ -80,7 +81,15 @@ The text entered in the **DB_Object_Name** and **DB_Object_Symbol** should refer
 These entries may be a gene symbol or other text. Note that we strongly recommend that synonyms are included in the GPI file, as this aids the searching of GO.\
 This field is not mandatory, cardinality 0, 1, >1 [white space allowed]; for cardinality >1 use a pipe to separate entries (e.g. YFL039C|ABY1|END7|actin gene). 
 #### DB_Object_Type
-A description of the type of the gene or gene product being annotated. This field uses Sequence Ontology labels and may correspond to one of the following: gene, protein_complex; protein; transcript; ncRNA; rRNA; tRNA; snRNA; snoRNA; or any subtype of ncRNA in the Sequence Ontology. If the precise product type is unknown, gene_product should be used.\
+An ontology identifier for the type of gene or gene product being annotated. This field uses Sequence Ontology, Protein Ontology, and GO labels and must correspond to one of the [permitted GPI entity types](https://github.com/geneontology/go-annotation/blob/master/specs/gpad-gpi-2-0.md#gpi-entity-types) or a more granular child term: SO:0001217 (protein-coding gene)
+ncRNA-coding gene 	ncRNA_gene 	SO:0001263 (ncRNA-coding gene)
+mRNA 	mRNA 	SO:0000234 (mRNA)
+ncRNA 	ncRNA 	SO:0000655 (ncRNA)
+protein 	protein 	PR:000000001 (protein)
+protein-containing complex 	protein-containing complex 	GO:0032991 (protein-containing complex)
+
+
+marker or uncloned locus 	genetic_marker 	SO:0001645; or any subtype of ncRNA in the Sequence Ontology. If the precise product type is unknown, gene_product should be used.\
 This field is mandatory, cardinality 1.\
 The object type (gene, transcript, protein, protein_complex, etc.) listed in the **DB_Object_Type** field must match the database entry identified by the **DB:DB_Object_ID**. Note that **DB_Object_Type** refers to the database entry (i.e. it represents a protein, functional RNA, etc.); this column does not reflect anything about the GO term or the evidence on which the annotation is based. 
 #### DB Object Taxon
@@ -88,7 +97,8 @@ The NCBI taxon ID of the species encoding the gene product.\
 This field is mandatory, cardinality 1.\
 The taxon should be specified as a number with the prefix "taxon". 
 #### Encoded by
-
+For proteins and transcripts, **Encoded by** refers to the gene id that encodes those entities.
+This field is not mandatory, cardinality 0, 1, >1 ; for cardinality >1 use a pipe to separate entries. 
 #### Parent Protein CHANGE THIS TEXT
 If the **DB:DB_Object_ID** refers to a variant of a gene product, this column will hold the identifier of the gene product from which it was derived.\
 This field is mandatory, cardinality 1, when variant forms of a gene product (e.g. identifiers that specify distinct proteins produced by differential splicing, alternative translational starts, post-translational cleavage or post-translational modification) are represented in **DB:DB_Object_ID**. If the **DB:DB_Object_ID** refers to the canonical form of a gene product, this column should be blank.\

From 6231be8e8dbd4b1d76f71ddbbfcc4679fdac7eb7 Mon Sep 17 00:00:00 2001
From: suzialeksander <suzia@stanford.edu>
Date: Tue, 26 Mar 2024 12:12:14 -0600
Subject: [PATCH 11/52] Update download-go-annotations.md

draft to include infro from https://docs.google.com/document/d/1j1zO-JHMaXi4ESrjTm_X1amLKdm3vO4szUaeB5LvxvA/edit
---
 _docs/download-go-annotations.md | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/_docs/download-go-annotations.md b/_docs/download-go-annotations.md
index 8448896b..0da55872 100644
--- a/_docs/download-go-annotations.md
+++ b/_docs/download-go-annotations.md
@@ -9,13 +9,23 @@ redirect_from:
 
 # Download annotations 
 
-## Current GO annotation downloads
-The [GAF download page](http://current.geneontology.org/products/pages/downloads.html) has GAF files for selected species.
+### Getting annotations for a selected organism
+Most tools that use GO annotations take two input files, a file with the annotations (in Gene Annotation Format, or GAF), and a file with the GO ontology structure (in Open Biomedical Ontology Format, or OBO). Because the ontology and annotations are being improved over time, we recommend downloading the latest version of the annotations for your organism, and the corresponding ontology version as specified below. In addition to the file URL, please report in any publications the date on the header of the GAF file, and ontology version number, to ensure reproducibility.
 
-GAF & GPAD+GPI files are also available from the [/annotations/](http://current.geneontology.org/annotations/index.html){:target="blank"} directory of the current release: [http://current.geneontology.org](http://current.geneontology.org){:target="blank"}
+## Commonly studied organisms 
+The [GAF download page](http://current.geneontology.org/products/pages/downloads.html) has annotations for selected species. For organisms with many expert-curated GO annotations (MODs, etc.), we recommend downloading annotations from the links in the above-linked table. These organisms often have a large number of annotations supported by direct experimental evidence, as well as annotations based on other evidence types.
 
-### Other species
-If your organism is not available in the above links, you can use [AmiGO's annotation search](https://amigo.geneontology.org/amigo/search/annotation) feature to view or download annotations.  [See our FAQ](https://geneontology.org/docs/faq/#where-can-i-view-or-download-the-complete-sets-of-go-annotations) for further information as well as how to retrieve annotations for species that are not available in AmiGO.
+[GAF format](/docs/go-annotation-file-gaf-format-2.2/) annotations, as well as the [GPAD](/docs/gene-product-association-data-gpad-format/)+[GPI](/docs/gene-product-information-gpi-format/) companion files, are available from the [/annotations/](http://current.geneontology.org/annotations/index.html){:target="blank"} directory of the current release: [http://current.geneontology.org](http://current.geneontology.org){:target="blank"}
+
+### Other organisms
+For all other organisms we recommend downloading annotations from one of the following sources, which use highly accurate computational methods:
+
++ [UniProt GAFs by proteome](https://ftp.ebi.ac.uk/pub/databases/GO/goa/proteomes/){:target="blank"}: for a large number of complete proteomes, existing GO annotations can be found through EBI's FTP server
+
++ [NCBI RefSeq](https://ftp.ncbi.nlm.nih.gov/genomes/refseq/){:target="blank"}: if your organism has a reference sequence in NCBI, GO annotations are available through NCBI's FTP server
+  + Navigate to your organism, e.g. [Anopheles_gambiae](https://ftp.ncbi.nlm.nih.gov/genomes/refseq/invertebrate/Anopheles_gambiae/representative/GCF_000005575.2_AgamP3/){:target="blank"}
+  + Download the file with the suffix gene_ontology.gaf.gz
+These annotations should be used with GO ontology version noted in the header of the GAF 
 
 ## About GO annotation formats
 + Released monthly

From fc84f0afbd9b8a12b39e2a736301bfa8103bff30 Mon Sep 17 00:00:00 2001
From: Patrick Kalita <pkalita@lbl.gov>
Date: Wed, 27 Mar 2024 15:34:53 -0700
Subject: [PATCH 12/52] Generate individual page for each GO_REF

---
 Gemfile              |  1 +
 Gemfile.lock         |  2 ++
 _config.yml          |  6 ++++++
 _includes/goref.html |  3 +--
 _layouts/goref.html  | 13 +++++++++++++
 gorefs.html          | 18 +++++++++++++-----
 6 files changed, 36 insertions(+), 7 deletions(-)
 create mode 100644 _layouts/goref.html

diff --git a/Gemfile b/Gemfile
index cd029436..bf4c7806 100644
--- a/Gemfile
+++ b/Gemfile
@@ -14,6 +14,7 @@ group :jekyll_plugins do
    gem "jekyll-redirect-from"
    gem "jekyll-seo-tag"
    gem 'jekyll-algolia', '~> 1.0'
+   gem 'jekyll-datapage-generator'
 end
 
 # Windows does not include zoneinfo files, so bundle the tzinfo-data gem
diff --git a/Gemfile.lock b/Gemfile.lock
index 86a44730..1c528f63 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -34,6 +34,7 @@ GEM
       nokogiri (~> 1.6)
       progressbar (~> 1.9)
       verbal_expressions (~> 0.1.5)
+    jekyll-datapage-generator (1.4.0)
     jekyll-feed (0.11.0)
       jekyll (~> 3.3)
     jekyll-redirect-from (0.16.0)
@@ -82,6 +83,7 @@ PLATFORMS
 DEPENDENCIES
   jekyll (= 3.4.3)
   jekyll-algolia (~> 1.0)
+  jekyll-datapage-generator
   jekyll-feed
   jekyll-redirect-from
   jekyll-seo-tag
diff --git a/_config.yml b/_config.yml
index 67bac968..03c4e370 100644
--- a/_config.yml
+++ b/_config.yml
@@ -23,6 +23,7 @@ gems:
   - jekyll-redirect-from
   - jekyll-seo-tag
   - jekyll-sitemap
+  - jekyll-datapage-generator
 
 exclude:
   - Gemfile
@@ -78,3 +79,8 @@ algolia:
     - covid-19.html
 #   nodes_to_index: 'article' # elements to be indexed 
   nodes_to_index: 'p,blockquote,li,div,paragraph,td,span,h1,h2,h3'
+
+page_gen:
+  - data: gorefs
+    template: goref
+    name: id
diff --git a/_includes/goref.html b/_includes/goref.html
index 8cc74e73..9337fe9c 100644
--- a/_includes/goref.html
+++ b/_includes/goref.html
@@ -1,11 +1,10 @@
-<a class="permalink-anchor" id="{{ include.goref.id }}"></a>
 <div>
     <span class="label label-primary">{{ include.goref.id }}</span>
     {% if include.goref.is_obsolete %}
     <span class="label label-danger text-uppercase">obsolete</span>
     {% endif %}
 </div>
-<h2><a class="permalink" href="#{{ include.goref.id }}">{{ include.goref.title }}</a></h2>
+<h2>{{ include.goref.title }}</h2>
 <p>{{ include.goref.authors }}; {{ include.goref.year }}</p>
 <p>{{ include.goref.description | markdownify | autolinkify }}</p>
 
diff --git a/_layouts/goref.html b/_layouts/goref.html
new file mode 100644
index 00000000..f05a3bfc
--- /dev/null
+++ b/_layouts/goref.html
@@ -0,0 +1,13 @@
+---
+layout: default
+---
+
+<div class="container">
+    <div class="u-margin-bottom-medium">
+        <a href="{{ '/gorefs.html' | relative_url }}">
+            <i class="fa fa-chevron-left" aria-hidden="true"></i>
+            All GO REFs
+        </a>
+    </div>
+    {% include goref.html goref=page %}
+</div>
diff --git a/gorefs.html b/gorefs.html
index 01978de0..1fc50ab5 100644
--- a/gorefs.html
+++ b/gorefs.html
@@ -1,4 +1,10 @@
 ---
+# Note that when developing locally, this page must be reached by visiting /gorefs.html. Without
+# the .html extension (/gorefs), an index page for the gorefs directory generated by the
+# jekyll-datapage-generator plugin will be displayed instead. This is a quirk of the Jekyll
+# development web server. When deployed on GitHub pages, this page will be available at /gorefs
+# without the .html extension.
+
 layout: default
 ---
 
@@ -14,9 +20,11 @@ <h1>GO REFs</h1>
         {% endunless %}
     {% endif %}
 
-    {% for goref in site.data.gorefs %}
-    <hr>
-    {% include goref.html goref=goref %}
-    {% endfor %}
+    <ul>
+        {% for goref in site.data.gorefs %}
+            <li>
+                <a href="{{ goref.id | datapage_url: 'gorefs' }}">[{{ goref.id }}] {{ goref.title }}</a>
+            </li>
+        {% endfor %}
+    </ul>
 </div>
-

From 06f39373a96757e0ed23caed41d7de93d97894a4 Mon Sep 17 00:00:00 2001
From: Patrick Kalita <pkalita@lbl.gov>
Date: Wed, 27 Mar 2024 17:13:15 -0700
Subject: [PATCH 13/52] Separate GO REF index list into obsolete and
 non-obsolete lists

---
 _includes/goref_toc_list_item.html |  5 +++++
 _sass/custom/_typography.scss      |  8 ++++++++
 gorefs.html                        | 15 ++++++++++++---
 3 files changed, 25 insertions(+), 3 deletions(-)
 create mode 100644 _includes/goref_toc_list_item.html

diff --git a/_includes/goref_toc_list_item.html b/_includes/goref_toc_list_item.html
new file mode 100644
index 00000000..8f16bf16
--- /dev/null
+++ b/_includes/goref_toc_list_item.html
@@ -0,0 +1,5 @@
+<li>
+    <a class="u-margin-bottom-xsmall" href="{{ include.goref.id | datapage_url: 'gorefs' }}">
+        [{{ include.goref.id }}] {{ include.goref.title }}
+    </a>
+</li>
diff --git a/_sass/custom/_typography.scss b/_sass/custom/_typography.scss
index d15c5f6a..629a5e50 100644
--- a/_sass/custom/_typography.scss
+++ b/_sass/custom/_typography.scss
@@ -82,6 +82,14 @@ a:not(.btn):not(.yasr_btn):not(.list-group-item):not(.dropdown-menu__item__link)
         width: 100%;
         left: 0;
     }
+
+    // Because these links are inline-block elements, in a <li> element the ::marker next to one
+    // would be aligned to the bottom by default if it wraps across multiple lines. This looks
+    // weird. So if the link is a direct child of a list item, set the vertical-align to top.
+    // Re funky syntax: https://github.com/sass/sass/issues/1425
+    @at-root li > #{&} {
+        vertical-align: top;
+    }
 }
 
 .btn {
diff --git a/gorefs.html b/gorefs.html
index 1fc50ab5..97dff685 100644
--- a/gorefs.html
+++ b/gorefs.html
@@ -22,9 +22,18 @@ <h1>GO REFs</h1>
 
     <ul>
         {% for goref in site.data.gorefs %}
-            <li>
-                <a href="{{ goref.id | datapage_url: 'gorefs' }}">[{{ goref.id }}] {{ goref.title }}</a>
-            </li>
+        {% unless goref.is_obsolete %}
+            {% include goref_toc_list_item.html goref=goref %}
+        {% endunless %}
+        {% endfor %}
+    </ul>
+
+    <h2>Obsolete GO REFs</h2>
+    <ul>
+        {% for goref in site.data.gorefs %}
+        {% if goref.is_obsolete %}
+            {% include goref_toc_list_item.html goref=goref %}
+        {% endif %}
         {% endfor %}
     </ul>
 </div>

From 3d2250d052e9edb10c907472d35319ce246d3585 Mon Sep 17 00:00:00 2001
From: suzialeksander <suzia@stanford.edu>
Date: Mon, 1 Apr 2024 16:54:20 -0600
Subject: [PATCH 14/52] Update download-go-annotations.md

nearly done, except the PAN-GO table is not the one linked for a first-chouce download
---
 _docs/download-go-annotations.md | 59 ++++++++++++++++++++------------
 1 file changed, 38 insertions(+), 21 deletions(-)

diff --git a/_docs/download-go-annotations.md b/_docs/download-go-annotations.md
index 0da55872..705f6b3e 100644
--- a/_docs/download-go-annotations.md
+++ b/_docs/download-go-annotations.md
@@ -10,28 +10,45 @@ redirect_from:
 # Download annotations 
 
 ### Getting annotations for a selected organism
-Most tools that use GO annotations take two input files, a file with the annotations (in Gene Annotation Format, or GAF), and a file with the GO ontology structure (in Open Biomedical Ontology Format, or OBO). Because the ontology and annotations are being improved over time, we recommend downloading the latest version of the annotations for your organism, and the corresponding ontology version as specified below. In addition to the file URL, please report in any publications the date on the header of the GAF file, and ontology version number, to ensure reproducibility.
-
-## Commonly studied organisms 
-The [GAF download page](http://current.geneontology.org/products/pages/downloads.html) has annotations for selected species. For organisms with many expert-curated GO annotations (MODs, etc.), we recommend downloading annotations from the links in the above-linked table. These organisms often have a large number of annotations supported by direct experimental evidence, as well as annotations based on other evidence types.
-
-[GAF format](/docs/go-annotation-file-gaf-format-2.2/) annotations, as well as the [GPAD](/docs/gene-product-association-data-gpad-format/)+[GPI](/docs/gene-product-information-gpi-format/) companion files, are available from the [/annotations/](http://current.geneontology.org/annotations/index.html){:target="blank"} directory of the current release: [http://current.geneontology.org](http://current.geneontology.org){:target="blank"}
-
-### Other organisms
-For all other organisms we recommend downloading annotations from one of the following sources, which use highly accurate computational methods:
-
-+ [UniProt GAFs by proteome](https://ftp.ebi.ac.uk/pub/databases/GO/goa/proteomes/){:target="blank"}: for a large number of complete proteomes, existing GO annotations can be found through EBI's FTP server
-
-+ [NCBI RefSeq](https://ftp.ncbi.nlm.nih.gov/genomes/refseq/){:target="blank"}: if your organism has a reference sequence in NCBI, GO annotations are available through NCBI's FTP server
-  + Navigate to your organism, e.g. [Anopheles_gambiae](https://ftp.ncbi.nlm.nih.gov/genomes/refseq/invertebrate/Anopheles_gambiae/representative/GCF_000005575.2_AgamP3/){:target="blank"}
-  + Download the file with the suffix gene_ontology.gaf.gz
-These annotations should be used with GO ontology version noted in the header of the GAF 
-
-## About GO annotation formats
-+ Released monthly
-+ Files are taxon-specific, with a few exceptions including the Reactome and *Candida* Genome Database files
+Most tools that use GO annotations take two input files: 
+1. a file with the **annotations** (in Gene Annotation Format, or GAF)
+2. a file with the GO **ontology** structure (in Open Biomedical Ontology Format, or OBO)
+   
+Because the ontology and annotations are constantly being improved over time, we recommend downloading the latest version of the annotations for your organism and the corresponding ontology file for that GO version. The version should be specified in the header of the annotation file. 
+
+#### Citing files
+To ensure reproducibility for any publication where GO was used at any point in the research, please include:
+* [appropriate GO publication(s)](docs/go-citation-policy/)
+* the URL where the files were obtained
+* the date on the header of the GAF file
+* the ontology version number
+
+## [1. Commonly studied organisms](http://current.geneontology.org/products/pages/downloads.html)
+[The GAF download page has annotations for selected commonly-studied species](http://current.geneontology.org/products/pages/downloads.html).
+For organisms with many expert-curated GO annotations (MODs, etc.), we recommend downloading annotations from the links in the above-linked table. These organisms often have a large number of manual annotations supported by direct experimental evidence, as well as annotations based on other evidence types.
+<!-- * Most of these have two downloads available, one with the full set of GO annotations, and one with only the “core” function annotations (PAN-GO) for each organism. /-->
+* These annotations should be used with the [latest version of the GO ontology](http://current.geneontology.org/ontology/index.html).
+* Annotations for these organisms are also available as GPAD/GPI companion files; see the [/annotations/](http://current.geneontology.org/annotations/index.html){:target="blank"} directory of the current release [http://current.geneontology.org](http://current.geneontology.org){:target="blank"}. For more information on these infrequently used filetypes see the format pages for [GPAD](/docs/gene-product-association-data-gpad-format/)+[GPI](/docs/gene-product-information-gpi-format/).
+
+### 2. All other organisms
+For all other organisms we recommend downloading annotations from one of the following sources, UniProt or NCBI RefSeq- both of which use highly accurate computational methods. The header of the annotation file specifies the version of the ontology you should use to accompany the annotation file. Older versions of the [GO ontology can be downloaded from the GO download archives](http://release.geneontology.org/).
+
+* [UniProt GAFs by proteome](https://ftp.ebi.ac.uk/pub/databases/GO/goa/proteomes/){:target="blank"}: Annotation files are available for about 20,000 complete proteomes (one protein sequence per protein-coding gene). Use these files if you want to use **UniProtKB identifiers**.
+  * Go to [https://ftp.ebi.ac.uk/pub/databases/GO/goa/proteomes/](https://ftp.ebi.ac.uk/pub/databases/GO/goa/proteomes/){:target="blank"}
+  * Navigate to your organism's file, e.g. [22426.A_gambiae.goa](https://ftp.ebi.ac.uk/pub/databases/GO/goa/proteomes/22426.A_gambiae.goa){:target="blank"}.
+    *Tip: use your browser's in-page search to find the species name.*
+
+* [NCBI RefSeq](https://ftp.ncbi.nlm.nih.gov/genomes/refseq/){:target="blank"}: If your organism has a reference sequence in NCBI, GO annotations are available through NCBI's FTP server. Use these files if you want to use **Entrez Gene identifiers**. Annotation files are available for all eukaryotic genomes available at NCBI. Note that GO annotations are not currently available for archaea, bacteria or viruses
+  * Go to [https://ftp.ncbi.nlm.nih.gov/genomes/refseq/](https://ftp.ncbi.nlm.nih.gov/genomes/refseq/){:target="blank"}
+  * Navigate to your organism, e.g. Anopheles_gambiae/ is in the `/invertebrate` directory
+  * Open the `representative/` directory, and open the directory within that
+  * Download the file with the suffix `gene_ontology.gaf.gz`, e.g. `GCF_943734735.2-RS_2023_12_gene_ontology.gaf.gz`
+
+## More information on GO annotation formats
++ GO has monthly releases
++ Annotation files are taxon-specific, with a few exceptions including the Reactome and *Candida* Genome Database files
 + Current format guides:
-  + [GAF format](/docs/go-annotation-file-gaf-format-2.2/) 
+  + [GAF format 2.2](/docs/go-annotation-file-gaf-format-2.2/) 
   + [GPAD](/docs/gene-product-association-data-gpad-format/) + [GPI](/docs/gene-product-information-gpi-format/) companion files
   
 ## Programmatic access to GO annotations

From 5120521a418f48e62d2a60c44ed5829d60b0c51d Mon Sep 17 00:00:00 2001
From: suzialeksander <suzia@stanford.edu>
Date: Mon, 1 Apr 2024 17:01:34 -0600
Subject: [PATCH 15/52] Update download-go-annotations.md

format
---
 _docs/download-go-annotations.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/_docs/download-go-annotations.md b/_docs/download-go-annotations.md
index 705f6b3e..b1eca140 100644
--- a/_docs/download-go-annotations.md
+++ b/_docs/download-go-annotations.md
@@ -35,10 +35,10 @@ For all other organisms we recommend downloading annotations from one of the fol
 
 * [UniProt GAFs by proteome](https://ftp.ebi.ac.uk/pub/databases/GO/goa/proteomes/){:target="blank"}: Annotation files are available for about 20,000 complete proteomes (one protein sequence per protein-coding gene). Use these files if you want to use **UniProtKB identifiers**.
   * Go to [https://ftp.ebi.ac.uk/pub/databases/GO/goa/proteomes/](https://ftp.ebi.ac.uk/pub/databases/GO/goa/proteomes/){:target="blank"}
-  * Navigate to your organism's file, e.g. [22426.A_gambiae.goa](https://ftp.ebi.ac.uk/pub/databases/GO/goa/proteomes/22426.A_gambiae.goa){:target="blank"}.
+  * Navigate to your organism & download the `.goa` file, e.g. [22426.A_gambiae.goa](https://ftp.ebi.ac.uk/pub/databases/GO/goa/proteomes/22426.A_gambiae.goa){:target="blank"}
     *Tip: use your browser's in-page search to find the species name.*
 
-* [NCBI RefSeq](https://ftp.ncbi.nlm.nih.gov/genomes/refseq/){:target="blank"}: If your organism has a reference sequence in NCBI, GO annotations are available through NCBI's FTP server. Use these files if you want to use **Entrez Gene identifiers**. Annotation files are available for all eukaryotic genomes available at NCBI. Note that GO annotations are not currently available for archaea, bacteria or viruses
+* [NCBI RefSeq](https://ftp.ncbi.nlm.nih.gov/genomes/refseq/){:target="blank"}: If your organism has a reference sequence in NCBI, GO annotations are available through NCBI's FTP server. Use these files if you want to use **Entrez Gene identifiers**. Annotation files are available for all eukaryotic genomes available at NCBI. Note that GO annotations are not currently available for archaea, bacteria or viruses.
   * Go to [https://ftp.ncbi.nlm.nih.gov/genomes/refseq/](https://ftp.ncbi.nlm.nih.gov/genomes/refseq/){:target="blank"}
   * Navigate to your organism, e.g. Anopheles_gambiae/ is in the `/invertebrate` directory
   * Open the `representative/` directory, and open the directory within that

From 30a04d62077467e86fff0860a72dc77e3ff0f823 Mon Sep 17 00:00:00 2001
From: Matthias Blum <mat.blum@gmail.com>
Date: Tue, 2 Apr 2024 15:16:17 +0100
Subject: [PATCH 16/52] Fix broken link

Fix a broken link on the documentation of the GO Annotation File v2.2 (https://geneontology.org/docs/go-annotation-file-gaf-format-2.2/)
---
 _docs/go-annotation-file-gaf-format-22.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/_docs/go-annotation-file-gaf-format-22.md b/_docs/go-annotation-file-gaf-format-22.md
index 5fb18816..2991ee28 100644
--- a/_docs/go-annotation-file-gaf-format-22.md
+++ b/_docs/go-annotation-file-gaf-format-22.md
@@ -87,7 +87,7 @@ The annotation flat file format is comprised of 17 tab-delimited fields.
 #### DB (column 1)
 Refers to the database from which the identifier in **DB object ID** (column 2) is drawn. This is not necessarily the group submitting the file. If a UniProtKB ID is the **DB object ID** (column 2), **DB** (column 1) should be UniProtKB.
 
-Must be one of the values from the set of [GO database cross-references]([http://amigo.geneontology.org/xrefs](https://github.com/geneontology/go-site/blob/master/metadata/db-xrefs.yaml)).
+Must be one of the values from the set of [GO database cross-references](http://amigo.geneontology.org/xrefs).
 
     This field is mandatory, cardinality 1.
 

From c4487899c8b1106623c85a86cdcba3a400a1d887 Mon Sep 17 00:00:00 2001
From: suzialeksander <suzia@stanford.edu>
Date: Mon, 8 Apr 2024 16:40:44 -0600
Subject: [PATCH 17/52] Update gene-product-information-gpi-format-20.md

fixing links, still needs edits
---
 .../gene-product-information-gpi-format-20.md | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/_docs/gene-product-information-gpi-format-20.md b/_docs/gene-product-information-gpi-format-20.md
index 171186cc..490dd045 100644
--- a/_docs/gene-product-information-gpi-format-20.md
+++ b/_docs/gene-product-information-gpi-format-20.md
@@ -46,21 +46,21 @@ The file format comprises 10 tab-delimited fields. Fields with multiple values (
 
 | **Column** | **Content** | **Required?**	| **Cardinality** | **Example**|
 |----------|---------|-------------|---------|--------|
-| 1 | [DB:DB_Object_ID](#db-db-object-id "Definition and requirements for DB:DB Object ID (column 1)") |	required |	1 |	UniProtKB:Q4VCS5|
+| 1 | [DB:DB_Object_ID](#dbdb-object-id "Definition and requirements for DB:DB Object ID (column 1)") |	required |	1 |	UniProtKB:Q4VCS5|
 | 2 | [DB_Object_Symbol](#db-object-symbol "Definition and requirements for DB Object Symbol (column 2)") |	required |	1 |	AMOT|
 | 3 | [DB_Object_Name](#db-object-name "Definition and requirements for DB Object Name (column 3)") |	optional |	0 or greater |	Angiomotin|
-| 4 | [DB_Object_Synonym(s)](#db-object-synonyms "Definition and requirements for DB Object Synonym(s) (column 4)") |	optional |	0 or greater |	E230009N18Rik|KIAA1071|
+| 4 | [DB_Object_Synonym(s)](#db-object-synonym "Definition and requirements for DB Object Synonym(s) (column 4)") |	optional |	0 or greater |	E230009N18Rik|KIAA1071|
 | 5 | [DB_Object_Type](#db-object-type "Definition and requirements for DB Object Type (column 5)") |	required |	1 |	PR:000000001|
-| 6 | [DB_Object_Taxon](#taxon "Definition and requirements for DB Object Taxon (column 6)") |	required |	1 |	taxon:9606|
+| 6 | [DB_Object_Taxon](#db-object-taxon "Definition and requirements for DB Object Taxon (column 6)") |	required |	1 |	taxon:9606|
 | 7 | [Encoded_by](#encoded-by "Definition and requirements for Encoded by (column 7)") | optional | 0 or greater | ***EXAMPLE NEEDED***|
 | 8 | [Parent_Protein](#parent-protein "Definition and requirements for Parent Protein (column 8)") |	optional |	0 or 1 |	UniProtKB:Q4VCS5|
-| 9 | [Protein_Containing_Complex_Members](#complex-members "Definition and requirements for Protein Containing Complex Members (column 9)") | optional | 0 or greater | ***EXAMPLE NEEDED***|
-| 10 | [DB_Xref(s)](#db-xrefs "Definition and requirements for DB_Xref(s) (column 10)") |	optional |	0 or greater | |
+| 9 | [Protein_Containing_Complex_Members](#protein-containing-complex-members "Definition and requirements for Protein Containing Complex Members (column 9)") | optional | 0 or greater | ***EXAMPLE NEEDED***|
+| 10 | [DB_Xref(s)](#db_xrefs "Definition and requirements for DB_Xref(s) (column 10)") |	optional |	0 or greater | |
 | 11 | [Gene_Product_Properties](#gene-product-properties "Definition and requirements for Gene Product Properties (column 11)") |	optional |	0 or greater |	db_subset=Swiss-Prot|
 
 ### Definitions and requirements for field contents
 
-#### DB:DB_Object_ID
+#### DB:DB Object ID
 The **DB** prefix is the database abbreviation (namespace) from which the unique identifier **DB Object ID** is drawn and must be one of the values from the set of GO database cross-references. The **DB:DB Object ID** is the combined identifier for the database object.
 This field is mandatory, cardinality 1.\
 
@@ -68,19 +68,19 @@ This field is mandatory, cardinality 1.\
     If the gene product is not a top-level gene or gene product identifier, the **Parent_Object_ID** field should contain the canonical form of the gene or gene product. 
     Note that while the **DB_Object_ID** is the identifier for a database object that may be used for annotation, it may or may not correspond exactly to what is described in a paper. For example, a paper describing functional characterization of a protein may result in annotations to the gene encoding the protein (gene ID in **DB_Object_ID**) or annotations to the protein (protein ID in **DB_Object_ID**), depending on annotation practice of the contributing group. 
 -->
-#### DB_Object_Symbol
+#### DB Object Symbol
 A (unique and valid) symbol to which the **DB:DB_Object_ID** is matched. No white spaces allowed.\
 This field is mandatory, cardinality 1.\
 The **DB_Object_Symbol** field should contain a symbol that is recognizable to a biologist wherever possible (an abbreviation widely used in the literature, for example). It is not a unique identifier or an accession number (unlike the **DB:DB_Object_ID**), although IDs can be used as a **DB_Object_Symbol** if there is no more biologically meaningful symbol available (e.g., when an unnamed gene is annotated). ORF names can be used for otherwise unnamed genes or proteins. If gene products are annotated, the gene product symbol can be used if available. Many gene product annotation entries may share a gene symbol. 
 The text entered in the **DB_Object_Symbol** should refer to the entity in **DB:DB_Object_ID**. For example, several alternative transcripts from one gene may be annotated separately, each with specific gene product identifiers in **DB:DB_Object_ID**, but with the same gene symbol in the **DB_Object_Symbol** column. 
-#### DB_Object_Name
+#### DB Object Name
 The name of the gene or gene product in **DB:DB_Object_ID**.\
 This field is not mandatory, cardinality 0, 1 [white space allowed]\
 The text entered in the **DB_Object_Name** and **DB_Object_Symbol** should refer to the entity in **DB:DBB_Object_ID**. 
-#### DB_Object_Synonym
+#### DB Object Synonym
 These entries may be a gene symbol or other text. Note that we strongly recommend that synonyms are included in the GPI file, as this aids the searching of GO.\
 This field is not mandatory, cardinality 0, 1, >1 [white space allowed]; for cardinality >1 use a pipe to separate entries (e.g. YFL039C|ABY1|END7|actin gene). 
-#### DB_Object_Type
+#### DB Object Type
 An ontology identifier for the type of gene or gene product being annotated. This field uses Sequence Ontology, Protein Ontology, and GO labels and must correspond to one of the [permitted GPI entity types](https://github.com/geneontology/go-annotation/blob/master/specs/gpad-gpi-2-0.md#gpi-entity-types) or a more granular child term: SO:0001217 (protein-coding gene)
 ncRNA-coding gene 	ncRNA_gene 	SO:0001263 (ncRNA-coding gene)
 mRNA 	mRNA 	SO:0000234 (mRNA)

From 43df3e4d594169534e22577a9ad0f3c45e7b3e3d Mon Sep 17 00:00:00 2001
From: Patrick Kalita <pkalita@lbl.gov>
Date: Tue, 9 Apr 2024 14:24:56 -0700
Subject: [PATCH 18/52] Add GitHub Actions workflow to fetch GO REF data and
 build/deploy Jekyll site

---
 .github/workflows/deploy.yaml | 55 +++++++++++++++++++++++++++++++++++
 _config.yml                   |  1 +
 2 files changed, 56 insertions(+)
 create mode 100644 .github/workflows/deploy.yaml

diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml
new file mode 100644
index 00000000..6ee5ffeb
--- /dev/null
+++ b/.github/workflows/deploy.yaml
@@ -0,0 +1,55 @@
+name: Deploy Jekyll site to Pages
+
+on:
+  push:
+    branches: ["master"]
+
+# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
+permissions:
+  contents: read
+  pages: write
+  id-token: write
+
+# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
+# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete.
+concurrency:
+  group: "pages"
+  cancel-in-progress: false
+
+jobs:
+  # Build job
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Setup Ruby
+        uses: ruby/setup-ruby@v1
+        with:
+          ruby-version: '2.7'
+          bundler-cache: true # runs 'bundle install' and caches installed gems automatically
+      - name: Setup Pages
+        id: pages
+        uses: actions/configure-pages@v4
+      - name: Fetch GO_REFs
+        run: make _data/gorefs.yaml
+      - name: Build with Jekyll
+        # Outputs to the './_site' directory by default
+        run: bundle exec jekyll build --baseurl "${{ steps.pages.outputs.base_path }}"
+        env:
+          JEKYLL_ENV: production
+      - name: Upload artifact
+        # Automatically uploads an artifact from the './_site' directory by default
+        uses: actions/upload-pages-artifact@v3
+
+  # Deployment job
+  deploy:
+    environment:
+      name: github-pages
+      url: ${{ steps.deployment.outputs.page_url }}
+    runs-on: ubuntu-latest
+    needs: build
+    steps:
+      - name: Deploy to GitHub Pages
+        id: deployment
+        uses: actions/deploy-pages@v4
diff --git a/_config.yml b/_config.yml
index 03c4e370..423a4966 100644
--- a/_config.yml
+++ b/_config.yml
@@ -31,6 +31,7 @@ exclude:
   - .idea/
   - .gitignore
   - README.md
+  - vendor  # In GitHub workflows, the ruby/setup-ruby action will install gems here
 timezone: America/Los_Angeles
 defaults:
 

From 2152c74578d7ff2ab530340bf30e2f46420c2e12 Mon Sep 17 00:00:00 2001
From: Patrick Kalita <pkalita@lbl.gov>
Date: Tue, 9 Apr 2024 14:25:30 -0700
Subject: [PATCH 19/52] Remove illegal redirect

---
 _docs/taxon-constraints.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/_docs/taxon-constraints.md b/_docs/taxon-constraints.md
index 3a01ce27..2c8b2044 100644
--- a/_docs/taxon-constraints.md
+++ b/_docs/taxon-constraints.md
@@ -2,7 +2,7 @@
 title: Taxon constraints in the Gene Ontology
 permalink: /docs/taxon-constraints/
 redirect_from: 
-- /cgi-bin/references.cgi#GO_REF:0000056
+- /cgi-bin/references.cgi
 ---
 
 # Taxon constraints in the Gene Ontology

From f6788b164ed9b7e7be70af76594ff68abcd93ba8 Mon Sep 17 00:00:00 2001
From: suzialeksander <suzia@stanford.edu>
Date: Tue, 9 Apr 2024 15:49:52 -0600
Subject: [PATCH 20/52] Update download-go-annotations.md

formatting
---
 _docs/download-go-annotations.md | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/_docs/download-go-annotations.md b/_docs/download-go-annotations.md
index b1eca140..4f99a945 100644
--- a/_docs/download-go-annotations.md
+++ b/_docs/download-go-annotations.md
@@ -23,19 +23,20 @@ To ensure reproducibility for any publication where GO was used at any point in
 * the date on the header of the GAF file
 * the ontology version number
 
-## [1. Commonly studied organisms](http://current.geneontology.org/products/pages/downloads.html)
-[The GAF download page has annotations for selected commonly-studied species](http://current.geneontology.org/products/pages/downloads.html).
-For organisms with many expert-curated GO annotations (MODs, etc.), we recommend downloading annotations from the links in the above-linked table. These organisms often have a large number of manual annotations supported by direct experimental evidence, as well as annotations based on other evidence types.
+### [1. Commonly studied organisms](http://current.geneontology.org/products/pages/downloads.html)
+[This GAF download page has annotations for selected commonly-studied species](http://current.geneontology.org/products/pages/downloads.html).
+
+For organisms with many expert-curated GO annotations (those with MODs, dedicated databases, etc.), we recommend downloading annotations from the links in the above-linked table. These organisms often have a large number of manual annotations supported by direct experimental evidence as well as annotations based on other evidence types.
 <!-- * Most of these have two downloads available, one with the full set of GO annotations, and one with only the “core” function annotations (PAN-GO) for each organism. /-->
 * These annotations should be used with the [latest version of the GO ontology](http://current.geneontology.org/ontology/index.html).
 * Annotations for these organisms are also available as GPAD/GPI companion files; see the [/annotations/](http://current.geneontology.org/annotations/index.html){:target="blank"} directory of the current release [http://current.geneontology.org](http://current.geneontology.org){:target="blank"}. For more information on these infrequently used filetypes see the format pages for [GPAD](/docs/gene-product-association-data-gpad-format/)+[GPI](/docs/gene-product-information-gpi-format/).
 
 ### 2. All other organisms
-For all other organisms we recommend downloading annotations from one of the following sources, UniProt or NCBI RefSeq- both of which use highly accurate computational methods. The header of the annotation file specifies the version of the ontology you should use to accompany the annotation file. Older versions of the [GO ontology can be downloaded from the GO download archives](http://release.geneontology.org/).
+For all other organisms we recommend downloading annotations from one of the following sources: UniProt or NCBI RefSeq. Both of these provide highly accurate computational methods. The header of the annotation file specifies the version of the ontology you should use to accompany the annotation file. Older versions of the [GO ontology can be downloaded from the GO download archives](http://release.geneontology.org/).
 
 * [UniProt GAFs by proteome](https://ftp.ebi.ac.uk/pub/databases/GO/goa/proteomes/){:target="blank"}: Annotation files are available for about 20,000 complete proteomes (one protein sequence per protein-coding gene). Use these files if you want to use **UniProtKB identifiers**.
   * Go to [https://ftp.ebi.ac.uk/pub/databases/GO/goa/proteomes/](https://ftp.ebi.ac.uk/pub/databases/GO/goa/proteomes/){:target="blank"}
-  * Navigate to your organism & download the `.goa` file, e.g. [22426.A_gambiae.goa](https://ftp.ebi.ac.uk/pub/databases/GO/goa/proteomes/22426.A_gambiae.goa){:target="blank"}
+  * Navigate to your organism & download the `.goa` file, e.g. [`22426.A_gambiae.goa`](https://ftp.ebi.ac.uk/pub/databases/GO/goa/proteomes/22426.A_gambiae.goa){:target="blank"}  
     *Tip: use your browser's in-page search to find the species name.*
 
 * [NCBI RefSeq](https://ftp.ncbi.nlm.nih.gov/genomes/refseq/){:target="blank"}: If your organism has a reference sequence in NCBI, GO annotations are available through NCBI's FTP server. Use these files if you want to use **Entrez Gene identifiers**. Annotation files are available for all eukaryotic genomes available at NCBI. Note that GO annotations are not currently available for archaea, bacteria or viruses.
@@ -55,4 +56,4 @@ For all other organisms we recommend downloading annotations from one of the fol
 As for any resource in GO, GO annotations are accessible through the DOI-versioned release stored in [Zenodo](https://doi.org/10.5281/zenodo.1205159){:target="blank"} and can be retrieved using BDBag. Read more about [programmatic access](/docs/tools-guide/#programmatic-download-bdbag).
 
 ## Error or omission ?
-Any errors or omissions in annotations should be reported by writing to the [GO helpdesk](http://help.geneontology.org/){:target="blank"}
+Any errors or omissions in annotations should be reported by writing to the [GO helpdesk](http://help.geneontology.org/){:target="blank"}.

From a515fe30c1658483a92a06d9b8d72425cb2badb8 Mon Sep 17 00:00:00 2001
From: suzialeksander <suzia@stanford.edu>
Date: Tue, 9 Apr 2024 16:05:21 -0600
Subject: [PATCH 21/52] Update download-go-annotations.md

awaiting approval that InterProScan is the official "last resort" and that this is an appropriate link
---
 _docs/download-go-annotations.md | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/_docs/download-go-annotations.md b/_docs/download-go-annotations.md
index 4f99a945..8b6b8635 100644
--- a/_docs/download-go-annotations.md
+++ b/_docs/download-go-annotations.md
@@ -10,15 +10,19 @@ redirect_from:
 # Download annotations 
 
 ### Getting annotations for a selected organism
+
+This page has instructions for getting GO annotations for almost any organism. If your organism is not available in the [official GO products]((http://current.geneontology.org/products/pages/downloads.html)), [UniProt GAFs by proteome](https://ftp.ebi.ac.uk/pub/databases/GO/goa/proteomes/){:target="blank"}, or [NCBI RefSeq](https://ftp.ncbi.nlm.nih.gov/genomes/refseq/){:target="blank"}, we recommend using the latest version of [InterProScan](https://interproscan-docs.readthedocs.io/en/latest/){:target="blank"} for unannotated organisms.
+
+#### Required Files
 Most tools that use GO annotations take two input files: 
 1. a file with the **annotations** (in Gene Annotation Format, or GAF)
 2. a file with the GO **ontology** structure (in Open Biomedical Ontology Format, or OBO)
    
 Because the ontology and annotations are constantly being improved over time, we recommend downloading the latest version of the annotations for your organism and the corresponding ontology file for that GO version. The version should be specified in the header of the annotation file. 
 
-#### Citing files
+#### Citing GO
 To ensure reproducibility for any publication where GO was used at any point in the research, please include:
-* [appropriate GO publication(s)](docs/go-citation-policy/)
+* [appropriate GO publication(s)- refer to the full GO citation policy](docs/go-citation-policy/)
 * the URL where the files were obtained
 * the date on the header of the GAF file
 * the ontology version number

From 36e9de933e5c5630667eccfc1e5251541a827533 Mon Sep 17 00:00:00 2001
From: suzialeksander <suzia@stanford.edu>
Date: Wed, 10 Apr 2024 15:45:40 -0600
Subject: [PATCH 22/52] Update download-ontology.md

tiny change, AGR to alliance of genome resources
---
 _docs/download-ontology.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/_docs/download-ontology.md b/_docs/download-ontology.md
index e643025b..c4257005 100644
--- a/_docs/download-ontology.md
+++ b/_docs/download-ontology.md
@@ -30,7 +30,7 @@ Files are available in the following formats:
 
 |**Subset name**|**Maintainer**|**File name**|**OBO format**|**OWL format**|**json format**|
 |------------------|-------------|-------------|-------------|------------|-------------|
-|**GO slim AGR subset**|Developed by GO Consortium for the [Alliance of Genomes Resources](https://www.alliancegenome.org/){:target="blank"} |goslim_agr  |[obo](https://current.geneontology.org/ontology/subsets/goslim_agr.obo) |[owl](https://current.geneontology.org/ontology/subsets/goslim_agr.owl){:target="blank"}  |[json](https://current.geneontology.org/ontology/subsets/goslim_agr.json){:target="blank"}  |
+|***A*lliance of *G*enome *R*esources subset**|Developed by GO Consortium for the [Alliance of Genomes Resources](https://www.alliancegenome.org/){:target="blank"} |goslim_agr  |[obo](https://current.geneontology.org/ontology/subsets/goslim_agr.obo) |[owl](https://current.geneontology.org/ontology/subsets/goslim_agr.owl){:target="blank"}  |[json](https://current.geneontology.org/ontology/subsets/goslim_agr.json){:target="blank"}  |
 |**Generic GO subset**|[GO Consortium](https://help.geneontology.org/){:target="blank"} |goslim_generic|[obo](https://current.geneontology.org/ontology/subsets/goslim_generic.obo)| [owl](https://current.geneontology.org/ontology/subsets/goslim_generic.owl){:target="blank"}  |[json](https://current.geneontology.org/ontology/subsets/goslim_generic.json){:target="blank"}  |
 |*__Aspergillus__* **subset**|[_Aspergillus_ Genome Data](http://www.aspgd.org/){:target="blank"} |goslim_aspergillus|[obo](https://current.geneontology.org/ontology/subsets/goslim_aspergillus.obo) |[owl](https://current.geneontology.org/ontology/subsets/goslim_aspergillus.owl){:target="blank"}  |[json](https://current.geneontology.org/ontology/subsets/goslim_aspergillus.json){:target="blank"}  |
 |*__Candida albicans__* **subset**|[_Candida_ Genome Database](http://www.candidagenome.org/){:target="blank"} |goslim_candida|[obo](https://current.geneontology.org/ontology/subsets/goslim_candida.obo)|[owl](https://current.geneontology.org/ontology/subsets/goslim_candida.owl){:target="blank"}  |[json](https://current.geneontology.org/ontology/subsets/goslim_candida.json){:target="blank"}  |

From 1f115b85cf993c5ed98776a4d72676a4ec560874 Mon Sep 17 00:00:00 2001
From: Patrick Kalita <pkalita@lbl.gov>
Date: Fri, 12 Apr 2024 15:07:28 -0700
Subject: [PATCH 23/52] Allow deploy.yaml workflow to run on workflow_dispatch
 trigger

---
 .github/workflows/deploy.yaml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml
index 6ee5ffeb..f87e1328 100644
--- a/.github/workflows/deploy.yaml
+++ b/.github/workflows/deploy.yaml
@@ -1,6 +1,9 @@
 name: Deploy Jekyll site to Pages
 
 on:
+  # Allow the workflow to be triggered manually. In particular, this allows it to be triggered
+  # from a workflow in the go-site repository.
+  workflow_dispatch:
   push:
     branches: ["master"]
 

From f65be947338edc08a0b4d66a8ac27ce12ad79231 Mon Sep 17 00:00:00 2001
From: suzialeksander <suzia@stanford.edu>
Date: Mon, 15 Apr 2024 17:28:27 -0600
Subject: [PATCH 24/52] Update faq.md

Removing reference to YeastMine, replacing with AllianceMine
---
 _docs/faq.md | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/_docs/faq.md b/_docs/faq.md
index 03cf36c8..c00fe2af 100644
--- a/_docs/faq.md
+++ b/_docs/faq.md
@@ -829,12 +829,15 @@ FAQ tags: 
 [ontology](/faq-tags/ontology)
 {:/comment}
 
-You can use the YeastMine Analyze tool available at SGD! This tool will return a table of GO ID, GO term name, GO term namespace (cellular component, molecular function, or biological process) and GO term description for each valid GO ID you supply. This will work for any organism, as the GO is the same! 
+You can use the AllianceMine's Upload List tool available at the Alliance website! This tool will return a table of GO ID, GO term name, and GO term description for each valid GO ID you supply. This will work for any organism, as the GO is the same! 
 
-1.  Go to the [Analyze tool on YeastMine](http://yeastmine.yeastgenome.org/yeastmine/bag.do){:target="blank"}
-2.  In the Select Type pull down, select `GO Term`
-3.  Enter your GO ids or upload a list in the full format (GO:0016020, GO:0016301...)
-4.  Click on `Create List`. The tool offers several options to download the list when you use the `Save a list of...` button.
+1.  Go to the [Upload List tool on AllianceMine](https://www.alliancegenome.org/bluegenes/alliancemine/upload/input){:target="blank"}
+2.  In the List Type pull down, select `GO Term`
+3.  Enter your GO ids or upload a file, making sure GO IDs have the correct format (GO:0016020, GO:0016301...)
+4.  Click on `Continue`, and then on the next page use the `Save List` button.
+5.  You can use the `Save list` button on the next page to use this list in AllianceMine, or use the `Export` button to see download options.
+
+If you need the aspect (cellular component, molecular function, or biological process) for each term, you can add this to the results before saving. Use the `Add Columns`, click `Namespace` to highlight that option, then click the `Add 1 columns` button in the lower right. You can also use the AllianceMine features to filter your list, for example to select only molecular_function terms in your list.
 
 If you have a list of GO terms and wish to retrieve GO IDs and/or definitions, you can use the steps above. Make sure multi-word GO terms are in double quotes (sporulation,"lactase activity","codeine metabolic process") as the tool will otherwise recognise spaces as delimiters. 
 

From e73d8d8111162c9988d86c344bfad747a4a84020 Mon Sep 17 00:00:00 2001
From: Patrick Kalita <pkalita@lbl.gov>
Date: Fri, 19 Apr 2024 16:14:20 -0700
Subject: [PATCH 25/52] Update file and folder structure of GO REF generated
 pages

---
 _config.yml                        | 3 ++-
 _includes/goref_toc_list_item.html | 6 +++++-
 _layouts/goref.html                | 2 +-
 gorefs.html                        | 6 ------
 4 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/_config.yml b/_config.yml
index 423a4966..df692334 100644
--- a/_config.yml
+++ b/_config.yml
@@ -84,4 +84,5 @@ algolia:
 page_gen:
   - data: gorefs
     template: goref
-    name: id
+    dir: GO_REF
+    name_expr: "record['id'].sub('GO_REF:', '')"
diff --git a/_includes/goref_toc_list_item.html b/_includes/goref_toc_list_item.html
index 8f16bf16..4ac86e7f 100644
--- a/_includes/goref_toc_list_item.html
+++ b/_includes/goref_toc_list_item.html
@@ -1,5 +1,9 @@
 <li>
-    <a class="u-margin-bottom-xsmall" href="{{ include.goref.id | datapage_url: 'gorefs' }}">
+    {% comment %} This href must be kept in sync with page_gen settings in _config.yaml {% endcomment %}
+    <a
+        class="u-margin-bottom-xsmall"
+        href="{{ '/GO_REF/' | append: include.goref.id | replace: 'GO_REF:', '' | relative_url }}"
+    >
         [{{ include.goref.id }}] {{ include.goref.title }}
     </a>
 </li>
diff --git a/_layouts/goref.html b/_layouts/goref.html
index f05a3bfc..a5ba5181 100644
--- a/_layouts/goref.html
+++ b/_layouts/goref.html
@@ -4,7 +4,7 @@
 
 <div class="container">
     <div class="u-margin-bottom-medium">
-        <a href="{{ '/gorefs.html' | relative_url }}">
+        <a href="{{ '/gorefs' | relative_url }}">
             <i class="fa fa-chevron-left" aria-hidden="true"></i>
             All GO REFs
         </a>
diff --git a/gorefs.html b/gorefs.html
index 97dff685..0dd5bdf8 100644
--- a/gorefs.html
+++ b/gorefs.html
@@ -1,10 +1,4 @@
 ---
-# Note that when developing locally, this page must be reached by visiting /gorefs.html. Without
-# the .html extension (/gorefs), an index page for the gorefs directory generated by the
-# jekyll-datapage-generator plugin will be displayed instead. This is a quirk of the Jekyll
-# development web server. When deployed on GitHub pages, this page will be available at /gorefs
-# without the .html extension.
-
 layout: default
 ---
 

From 60726e65afb46bb6556d4539772cb4ec95a92ed7 Mon Sep 17 00:00:00 2001
From: suzialeksander <suzia@stanford.edu>
Date: Mon, 22 Apr 2024 15:54:53 -0600
Subject: [PATCH 26/52] Update download-go-annotations.md

Adding ask us/interproscan info
---
 _docs/download-go-annotations.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/_docs/download-go-annotations.md b/_docs/download-go-annotations.md
index 8b6b8635..885b25cf 100644
--- a/_docs/download-go-annotations.md
+++ b/_docs/download-go-annotations.md
@@ -48,6 +48,14 @@ For all other organisms we recommend downloading annotations from one of the fol
   * Navigate to your organism, e.g. Anopheles_gambiae/ is in the `/invertebrate` directory
   * Open the `representative/` directory, and open the directory within that
   * Download the file with the suffix `gene_ontology.gaf.gz`, e.g. `GCF_943734735.2-RS_2023_12_gene_ontology.gaf.gz`
+ 
+### 3. If you cannot find annotations for your organism for download as described above
+[Get help from the GO helpdesk](https://help.geneontology.org/).
+
+### 4. If your organism’s genome sequence is not yet publicly available
+For examole, if you have a set of new (protein) sequences that you want to annotate with GO terms, we recommend that you generate annotations using the latest version of InterProScan.
+For most genomic analyses, your input file should have one protein sequence per protein-coding gene, though any set of protein sequences can be used.
+Download InterProScan at [https://www.ebi.ac.uk/interpro/about/interproscan/](https://www.ebi.ac.uk/interpro/about/interproscan/){:target="blank"}.
 
 ## More information on GO annotation formats
 + GO has monthly releases

From 9bae41c5c8a3519d2d89301f6ed21d6cde6dcc09 Mon Sep 17 00:00:00 2001
From: suzialeksander <suzia@stanford.edu>
Date: Mon, 22 Apr 2024 15:59:41 -0600
Subject: [PATCH 27/52] Update download-go-annotations.md

typo
---
 _docs/download-go-annotations.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/_docs/download-go-annotations.md b/_docs/download-go-annotations.md
index 885b25cf..aa6a5418 100644
--- a/_docs/download-go-annotations.md
+++ b/_docs/download-go-annotations.md
@@ -53,9 +53,9 @@ For all other organisms we recommend downloading annotations from one of the fol
 [Get help from the GO helpdesk](https://help.geneontology.org/).
 
 ### 4. If your organism’s genome sequence is not yet publicly available
-For examole, if you have a set of new (protein) sequences that you want to annotate with GO terms, we recommend that you generate annotations using the latest version of InterProScan.
+For example, if you have a set of new (protein) sequences that you want to annotate with GO terms, we recommend that you generate annotations using the latest version of InterProScan.
 For most genomic analyses, your input file should have one protein sequence per protein-coding gene, though any set of protein sequences can be used.
-Download InterProScan at [https://www.ebi.ac.uk/interpro/about/interproscan/](https://www.ebi.ac.uk/interpro/about/interproscan/){:target="blank"}.
+Download InterProScan at [https://www.ebi.ac.uk/interpro/about/interproscan](https://www.ebi.ac.uk/interpro/about/interproscan/){:target="blank"}.
 
 ## More information on GO annotation formats
 + GO has monthly releases

From 5bf3f1c1e3c8f0d1f680a1f2d439a4c31b0bfa7b Mon Sep 17 00:00:00 2001
From: suzialeksander <suzia@stanford.edu>
Date: Mon, 22 Apr 2024 18:00:01 -0600
Subject: [PATCH 28/52] Update download-go-annotations.md

typos in URLs
---
 _docs/download-go-annotations.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/_docs/download-go-annotations.md b/_docs/download-go-annotations.md
index aa6a5418..cc90517b 100644
--- a/_docs/download-go-annotations.md
+++ b/_docs/download-go-annotations.md
@@ -11,7 +11,7 @@ redirect_from:
 
 ### Getting annotations for a selected organism
 
-This page has instructions for getting GO annotations for almost any organism. If your organism is not available in the [official GO products]((http://current.geneontology.org/products/pages/downloads.html)), [UniProt GAFs by proteome](https://ftp.ebi.ac.uk/pub/databases/GO/goa/proteomes/){:target="blank"}, or [NCBI RefSeq](https://ftp.ncbi.nlm.nih.gov/genomes/refseq/){:target="blank"}, we recommend using the latest version of [InterProScan](https://interproscan-docs.readthedocs.io/en/latest/){:target="blank"} for unannotated organisms.
+This page has instructions for getting GO annotations for almost any organism. If your organism is not available in the [official GO products](http://current.geneontology.org/products/pages/downloads.html), [UniProt GAFs by proteome](https://ftp.ebi.ac.uk/pub/databases/GO/goa/proteomes/){:target="blank"}, or [NCBI RefSeq](https://ftp.ncbi.nlm.nih.gov/genomes/refseq/){:target="blank"}, we recommend using the latest version of [InterProScan](https://interproscan-docs.readthedocs.io/en/latest/){:target="blank"} for unannotated organisms.
 
 #### Required Files
 Most tools that use GO annotations take two input files: 
@@ -22,7 +22,7 @@ Because the ontology and annotations are constantly being improved over time, we
 
 #### Citing GO
 To ensure reproducibility for any publication where GO was used at any point in the research, please include:
-* [appropriate GO publication(s)- refer to the full GO citation policy](docs/go-citation-policy/)
+* [appropriate GO publication(s)- refer to the full GO citation policy](/docs/go-citation-policy/)
 * the URL where the files were obtained
 * the date on the header of the GAF file
 * the ontology version number

From 5eb19a25bd99645d68d0af5e96ebcf89535099e1 Mon Sep 17 00:00:00 2001
From: suzialeksander <suzia@stanford.edu>
Date: Wed, 24 Apr 2024 12:40:13 -0600
Subject: [PATCH 29/52] Update download-go-annotations.md

Quick links to lower sections
---
 _docs/download-go-annotations.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/_docs/download-go-annotations.md b/_docs/download-go-annotations.md
index cc90517b..1ea744f4 100644
--- a/_docs/download-go-annotations.md
+++ b/_docs/download-go-annotations.md
@@ -13,6 +13,10 @@ redirect_from:
 
 This page has instructions for getting GO annotations for almost any organism. If your organism is not available in the [official GO products](http://current.geneontology.org/products/pages/downloads.html), [UniProt GAFs by proteome](https://ftp.ebi.ac.uk/pub/databases/GO/goa/proteomes/){:target="blank"}, or [NCBI RefSeq](https://ftp.ncbi.nlm.nih.gov/genomes/refseq/){:target="blank"}, we recommend using the latest version of [InterProScan](https://interproscan-docs.readthedocs.io/en/latest/){:target="blank"} for unannotated organisms.
 
+Jump to a section:
+- [Commonly studued organisms](/docs/download-go/annotations/#1-commonly-studies-organisms)
+- [All other organsims](/docs/download-go/annotations/#2-all-other-organisms)
+
 #### Required Files
 Most tools that use GO annotations take two input files: 
 1. a file with the **annotations** (in Gene Annotation Format, or GAF)

From 26a280e8e194e00ffa4af69701c0f8a3b1f5c6de Mon Sep 17 00:00:00 2001
From: suzialeksander <suzia@stanford.edu>
Date: Wed, 24 Apr 2024 12:42:38 -0600
Subject: [PATCH 30/52] Update download-go-annotations.md

typo
---
 _docs/download-go-annotations.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/_docs/download-go-annotations.md b/_docs/download-go-annotations.md
index 1ea744f4..452b3049 100644
--- a/_docs/download-go-annotations.md
+++ b/_docs/download-go-annotations.md
@@ -15,7 +15,7 @@ This page has instructions for getting GO annotations for almost any organism. I
 
 Jump to a section:
 - [Commonly studued organisms](/docs/download-go/annotations/#1-commonly-studies-organisms)
-- [All other organsims](/docs/download-go/annotations/#2-all-other-organisms)
+- [All other organisms](/docs/download-go/annotations/#2-all-other-organisms)
 
 #### Required Files
 Most tools that use GO annotations take two input files: 

From 26c6e3621439f025654223e9f90e9a151a8ff29f Mon Sep 17 00:00:00 2001
From: suzialeksander <suzia@stanford.edu>
Date: Wed, 24 Apr 2024 12:43:15 -0600
Subject: [PATCH 31/52] Update download-go-annotations.md

typoo
---
 _docs/download-go-annotations.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/_docs/download-go-annotations.md b/_docs/download-go-annotations.md
index 452b3049..950016c4 100644
--- a/_docs/download-go-annotations.md
+++ b/_docs/download-go-annotations.md
@@ -14,7 +14,7 @@ redirect_from:
 This page has instructions for getting GO annotations for almost any organism. If your organism is not available in the [official GO products](http://current.geneontology.org/products/pages/downloads.html), [UniProt GAFs by proteome](https://ftp.ebi.ac.uk/pub/databases/GO/goa/proteomes/){:target="blank"}, or [NCBI RefSeq](https://ftp.ncbi.nlm.nih.gov/genomes/refseq/){:target="blank"}, we recommend using the latest version of [InterProScan](https://interproscan-docs.readthedocs.io/en/latest/){:target="blank"} for unannotated organisms.
 
 Jump to a section:
-- [Commonly studued organisms](/docs/download-go/annotations/#1-commonly-studies-organisms)
+- [Commonly studied organisms](/docs/download-go/annotations/#1-commonly-studies-organisms)
 - [All other organisms](/docs/download-go/annotations/#2-all-other-organisms)
 
 #### Required Files

From 26d03a32ca0500ba6f8a2d01b82bf908d983475e Mon Sep 17 00:00:00 2001
From: suzialeksander <suzia@stanford.edu>
Date: Mon, 6 May 2024 17:42:00 -0600
Subject: [PATCH 32/52] Update go-citation-policy.md

for https://github.com/geneontology/geneontology.github.io/issues/533
---
 _docs/go-citation-policy.md | 41 +++++++++++++++++++++++--------------
 1 file changed, 26 insertions(+), 15 deletions(-)

diff --git a/_docs/go-citation-policy.md b/_docs/go-citation-policy.md
index dc065f5e..11f8b894 100644
--- a/_docs/go-citation-policy.md
+++ b/_docs/go-citation-policy.md
@@ -22,33 +22,33 @@ If you used a specific software/analysis tool in your research, in addition to t
 + **GO-CAMs**: Thomas PD, Hill DP, Mi H, Osumi-Sutherland D, Van Auken K, Carbon S, Balhoff JP, Albou LP, Good B, Gaudet P, Lewis SE, Mungall CJ. Gene Ontology Causal Activity Modeling (GO-CAM) moves beyond GO annotations to structured descriptions of biological functions and systems. Nat Genet. 2019 Oct;51(10):1429-1433. DOI: [10.1038/s41588-019-0500-1](https://doi.org/10.1038/s41588-019-0500-1){:target="blank"} \[[abstract](https://pubmed.ncbi.nlm.nih.gov/31548717/){:target="blank"} \| [full text](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7012280/pdf/nihms-1067180.pdf){:target="blank"}\]
 
 
-If you produce tools and/or services that use GO data, the user should be provided with the GO release date and DOI along with the results (e.g. "2023-01-01" and "DOI: [10.5281/zenodo.7504797](https://dx.doi.org/10.5281/zenodo.7504797){:target="blank"}").
+If you produce tools and/or services that use GO data, the user should be provided with the GO release date and DOI along with the results (e.g. "2024-01-17" and "DOI:10.5281/zenodo.10536401](https://doi.org/10.5281/zenodo.10536401){:target="blank"}").
 
 ## Citing data from downloads
 
-If you wish to cite data provided by the Gene Ontology knowledgebase, either from AmiGO or the files downloaded from the GO website, please state the release date and/or version number of the data, preferably both; e.g. "2023-01-01" and "[10.5281/zenodo.7504797](https://dx.doi.org/10.5281/zenodo.7504797){:target="blank"}". Both annotation and ontology data may change over time, and to reproduce the results of an analysis, it is important that the same initial GO data is used.
+If you wish to cite data provided by the Gene Ontology knowledgebase, either from AmiGO or the files downloaded from the GO website, please state the release date and/or version number of the data, preferably both; e.g. "2024-01-17" and "[10.5281/zenodo.10536401](https://doi.org/10.5281/zenodo.10536401){:target="blank"}". Both annotation and ontology data may change over time, and to reproduce the results of an analysis, it is important that the same initial GO data is used.
 
 ## Citing data from AmiGO
 
 The data release can be found in the [AmiGO](https://amigo.geneontology.org/amigo){:target="blank"} page footer on the right-hand side. To cite the annotations of individual database groups please see the GO publications list.
 
-## GO logo
-
-If you intend to use the logo on your website, please include a link to the GO home page, https://geneontology.org. Please [contact the GO Helpdesk](http://help.geneontology.org) if you need a larger or higher resolution version. The logo is subject to our use and license.
-
-The GO logo is available in four sizes. For reuse, please do not hotlink images, but download them instead (e.g. right click the appropriate image and size, then select "Save Link As"):
-
-| **Mini (and favicon)** | **Small** | **Regular** | **Large** |
-|------|-------|---------|-------|
-| [![mini logo](/assets/go-logo.mini.png){:width="100"}](/assets/go-logo.mini.png){:target="blank"} | [![small logo](/assets/go-logo.small.png){:width="100"}](/assets/go-logo.small.png){:target="blank"} | [![regular logo](/assets/go-logo.png){:width="100"}](/assets/go-logo.png){:target="blank"} | [![full logo](/assets/go-logo.large.png){:width="100"}](/assets/go-logo.large.png){:target="blank"} |
-| [![fav icon](/assets/go-logo-favicon.ico){:width="50"}](/assets/go-logo-favicon.ico){:target="blank"} | [![mini icon](/assets/go-logo-icon.mini.png){:width="50"}](/assets/go-logo-icon.mini.png){:target="blank"} | [![small icon](/assets/go-logo-icon.small.png){:width="50"}](/assets/go-logo-icon.small.png){:target="blank"} | [![regular icon](/assets/go-logo-icon.png){:width="50"}](/assets/go-logo-icon.png){:target="blank"} |
+## Best practices for linking to GO entities
+GO uses persistent uniform resource locator (PURLs) for all the objects it describes. If you use or provide links to the following entities, please ensure you are using PURLs. Examples:
+* GO terms: https://purl.obolibrary.org/obo/GO_0022008
+* GO_REFs: https://purl.obolibrary.org/obo/go/references/0000015
+* GO-CAM models: https://model.geneontology.org/65c57c3400001018
+* GO ontology (versions)
+  * current: https://purl.obolibrary.org/obo/go/go.owl
+  * snapshot: https://purl.obolibrary.org/obo/go/snapshot/go.owl
+  * dated release: https://purl.obolibrary.org/obo/go/releases/2024-01-17/go.owl
 
+Technical documentation can be found on the [OBOFoundry GitHub site](https://github.com/OBOFoundry/purl.obolibrary.org/blob/master/README.md){:target="blank"}.
 
 ## License
 
 Gene Ontology Consortium data and data products are licensed under the [Creative Commons Attribution 4.0 Unported License](https://creativecommons.org/licenses/by/4.0/legalcode){:target="blank"}. A human-readable version and explanation is available at the [Creative Commons website](https://creativecommons.org/licenses/by/4.0/){:target="blank"}. For information about how to properly credit data use, please review the [Creative Commons FAQ](http://wiki.creativecommons.org/Frequently_Asked_Questions){:target="blank"} or contact the GO Helpdesk.
 
-We ask that when using or citing GO data that the particular release is mentioned. For example, we'd ask that the date (e.g. "2023-01-01") is included where applicable, and optionally the Zenodo DOI (e.g. "10.5281/zenodo.7504797"). Links, where applicable, would be a useful addition for end-users.
+We ask that when using or citing GO data that the particular release is mentioned. For example, we'd ask that the date (e.g. "2024-01-17") is included where applicable, and optionally the Zenodo DOI (e.g. "10.5281/zenodo.10536401"). Links, where applicable, would be a useful addition for end-users.
 
 ### Attribution
 
@@ -64,7 +64,7 @@ According to the terms of GO's [CC BY 4.0 license](https://creativecommons.org/l
 
 For example, if you are offering downloads containing GO data, have a data licensing page in your application, or refer to licensed data in your documentation, an appropriate notice may be:
 
-> [Gene Ontology](https://geneontology.org) data from the [2023-01-01 release](http://release.geneontology.org/2023-01-01) ([DOI:10.5281/zenodo.7504797](https://doi.org/10.5281/zenodo.7504797){:target="blank"}) is made available under the terms of the [CC BY 4.0 license](https://creativecommons.org/licenses/by/4.0/legalcode).
+> [Gene Ontology](https://geneontology.org) data from the [2024-01-17 release](http://release.geneontology.org/2024-01-17) ([DOI:10.5281/zenodo.10536401](https://doi.org/10.5281/zenodo.10536401){:target="blank"}) is made available under the terms of the [CC BY 4.0 license](https://creativecommons.org/licenses/by/4.0/legalcode).
 
 For further reading, suggest:
 
@@ -75,6 +75,17 @@ For further reading, suggest:
 
 GOC software and tools are under their own licenses; please see their respective homepages for further details.
 
+## GO logo
+
+If you intend to use the logo on your website, please include a link to the GO home page, https://geneontology.org. Please [contact the GO Helpdesk](http://help.geneontology.org) if you need a larger or higher resolution version. The logo is subject to our use and license.
+
+The GO logo is available in four sizes. For reuse, please do not hotlink images, but download them instead (e.g. right click the appropriate image and size, then select "Save Link As"):
+
+| **Mini (and favicon)** | **Small** | **Regular** | **Large** |
+|------|-------|---------|-------|
+| [![mini logo](/assets/go-logo.mini.png){:width="100"}](/assets/go-logo.mini.png){:target="blank"} | [![small logo](/assets/go-logo.small.png){:width="100"}](/assets/go-logo.small.png){:target="blank"} | [![regular logo](/assets/go-logo.png){:width="100"}](/assets/go-logo.png){:target="blank"} | [![full logo](/assets/go-logo.large.png){:width="100"}](/assets/go-logo.large.png){:target="blank"} |
+| [![fav icon](/assets/go-logo-favicon.ico){:width="50"}](/assets/go-logo-favicon.ico){:target="blank"} | [![mini icon](/assets/go-logo-icon.mini.png){:width="50"}](/assets/go-logo-icon.mini.png){:target="blank"} | [![small icon](/assets/go-logo-icon.small.png){:width="50"}](/assets/go-logo-icon.small.png){:target="blank"} | [![regular icon](/assets/go-logo-icon.png){:width="50"}](/assets/go-logo-icon.png){:target="blank"} |
+
 ## Website disclaimer
 
-All information on this website is copyright © 1999–2023 Gene Ontology Consortium. Permission to use the information contained in this database was given by the researchers and institutes who contributed or published the information. Users of the data are solely responsible for compliance with any copyright restrictions. Documents from this server are provided "AS-IS" without any warranty, expressed or implied.
+All information on this website is copyright © 1999–2024 Gene Ontology Consortium. Permission to use the information contained in this database was given by the researchers and institutes who contributed or published the information. Users of the data are solely responsible for compliance with any copyright restrictions. Documents from this server are provided "AS-IS" without any warranty, expressed or implied.

From 37f54ce0724080c7637b475605dba61c69439371 Mon Sep 17 00:00:00 2001
From: suzialeksander <suzia@stanford.edu>
Date: Mon, 6 May 2024 17:43:18 -0600
Subject: [PATCH 33/52] Update go-citation-policy.md

---
 _docs/go-citation-policy.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/_docs/go-citation-policy.md b/_docs/go-citation-policy.md
index 11f8b894..5f124689 100644
--- a/_docs/go-citation-policy.md
+++ b/_docs/go-citation-policy.md
@@ -22,7 +22,7 @@ If you used a specific software/analysis tool in your research, in addition to t
 + **GO-CAMs**: Thomas PD, Hill DP, Mi H, Osumi-Sutherland D, Van Auken K, Carbon S, Balhoff JP, Albou LP, Good B, Gaudet P, Lewis SE, Mungall CJ. Gene Ontology Causal Activity Modeling (GO-CAM) moves beyond GO annotations to structured descriptions of biological functions and systems. Nat Genet. 2019 Oct;51(10):1429-1433. DOI: [10.1038/s41588-019-0500-1](https://doi.org/10.1038/s41588-019-0500-1){:target="blank"} \[[abstract](https://pubmed.ncbi.nlm.nih.gov/31548717/){:target="blank"} \| [full text](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7012280/pdf/nihms-1067180.pdf){:target="blank"}\]
 
 
-If you produce tools and/or services that use GO data, the user should be provided with the GO release date and DOI along with the results (e.g. "2024-01-17" and "DOI:10.5281/zenodo.10536401](https://doi.org/10.5281/zenodo.10536401){:target="blank"}").
+If you produce tools and/or services that use GO data, the user should be provided with the GO release date and DOI along with the results (e.g. "2024-01-17" and "DOI: [10.5281/zenodo.10536401](https://doi.org/10.5281/zenodo.10536401){:target="blank"}").
 
 ## Citing data from downloads
 

From d1f42f14e4174d3bd96820fe4d19e41197c4492e Mon Sep 17 00:00:00 2001
From: suzialeksander <suzia@stanford.edu>
Date: Mon, 6 May 2024 23:32:45 -0600
Subject: [PATCH 34/52] Update gene-product-information-gpi-format-20.md

---
 _docs/gene-product-information-gpi-format-20.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/_docs/gene-product-information-gpi-format-20.md b/_docs/gene-product-information-gpi-format-20.md
index 490dd045..48644375 100644
--- a/_docs/gene-product-information-gpi-format-20.md
+++ b/_docs/gene-product-information-gpi-format-20.md
@@ -58,6 +58,12 @@ The file format comprises 10 tab-delimited fields. Fields with multiple values (
 | 10 | [DB_Xref(s)](#db_xrefs "Definition and requirements for DB_Xref(s) (column 10)") |	optional |	0 or greater | |
 | 11 | [Gene_Product_Properties](#gene-product-properties "Definition and requirements for Gene Product Properties (column 11)") |	optional |	0 or greater |	db_subset=Swiss-Prot|
 
+
+### GPI 2.0 examples
+    UniProtKB:A0AA85ABI6	A0AA85ABI6	Phospholipid scramblase		protein	NCBITaxon:taxon:48269					db_subset=TrEMBL|uniprot_proteome=UP000050790
+    RNAcentral:URS0000C3938B_6185		Schistosoma haematobium Hammerhead ribozyme (type I) ribozyme sequence		hammerhead_ribozyme	NCBITaxon:taxon:6185					
+
+
 ### Definitions and requirements for field contents
 
 #### DB:DB Object ID

From 1bbc8606a6b1a1306b07278539d4aec30fbb2da1 Mon Sep 17 00:00:00 2001
From: suzialeksander <suzia@stanford.edu>
Date: Mon, 6 May 2024 23:49:09 -0600
Subject: [PATCH 35/52] Update gene-product-information-gpi-format-20.md

---
 .../gene-product-information-gpi-format-20.md | 46 +++++++++++--------
 1 file changed, 28 insertions(+), 18 deletions(-)

diff --git a/_docs/gene-product-information-gpi-format-20.md b/_docs/gene-product-information-gpi-format-20.md
index 48644375..20591b08 100644
--- a/_docs/gene-product-information-gpi-format-20.md
+++ b/_docs/gene-product-information-gpi-format-20.md
@@ -68,23 +68,26 @@ The file format comprises 10 tab-delimited fields. Fields with multiple values (
 
 #### DB:DB Object ID
 The **DB** prefix is the database abbreviation (namespace) from which the unique identifier **DB Object ID** is drawn and must be one of the values from the set of GO database cross-references. The **DB:DB Object ID** is the combined identifier for the database object.
-This field is mandatory, cardinality 1.\
+This field is mandatory, cardinality 1.
 
 <!--In GPI 1.0 format, the identifier may reference a top-level primary gene or gene product identifier, or an identified variant of a gene or gene product, for example identifiers that specify distinct proteins produced by differential splicing, alternative translational starts, post-translational cleavage, or post-translational modification. Identifiers for functional RNAs and protein complexes can also be included in this column. 
     If the gene product is not a top-level gene or gene product identifier, the **Parent_Object_ID** field should contain the canonical form of the gene or gene product. 
     Note that while the **DB_Object_ID** is the identifier for a database object that may be used for annotation, it may or may not correspond exactly to what is described in a paper. For example, a paper describing functional characterization of a protein may result in annotations to the gene encoding the protein (gene ID in **DB_Object_ID**) or annotations to the protein (protein ID in **DB_Object_ID**), depending on annotation practice of the contributing group. 
 -->
 #### DB Object Symbol
-A (unique and valid) symbol to which the **DB:DB_Object_ID** is matched. No white spaces allowed.\
-This field is mandatory, cardinality 1.\
+A (unique and valid) symbol to which the **DB:DB_Object_ID** is matched. No white spaces allowed.
+
+This field is mandatory, cardinality 1.
 The **DB_Object_Symbol** field should contain a symbol that is recognizable to a biologist wherever possible (an abbreviation widely used in the literature, for example). It is not a unique identifier or an accession number (unlike the **DB:DB_Object_ID**), although IDs can be used as a **DB_Object_Symbol** if there is no more biologically meaningful symbol available (e.g., when an unnamed gene is annotated). ORF names can be used for otherwise unnamed genes or proteins. If gene products are annotated, the gene product symbol can be used if available. Many gene product annotation entries may share a gene symbol. 
 The text entered in the **DB_Object_Symbol** should refer to the entity in **DB:DB_Object_ID**. For example, several alternative transcripts from one gene may be annotated separately, each with specific gene product identifiers in **DB:DB_Object_ID**, but with the same gene symbol in the **DB_Object_Symbol** column. 
 #### DB Object Name
-The name of the gene or gene product in **DB:DB_Object_ID**.\
-This field is not mandatory, cardinality 0, 1 [white space allowed]\
+The name of the gene or gene product in **DB:DB_Object_ID**.
+
+This field is not mandatory, cardinality 0, 1 [white space allowed]
 The text entered in the **DB_Object_Name** and **DB_Object_Symbol** should refer to the entity in **DB:DBB_Object_ID**. 
 #### DB Object Synonym
-These entries may be a gene symbol or other text. Note that we strongly recommend that synonyms are included in the GPI file, as this aids the searching of GO.\
+These entries may be a gene symbol or other text. Note that we strongly recommend that synonyms are included in the GPI file, as this aids the searching of GO.
+
 This field is not mandatory, cardinality 0, 1, >1 [white space allowed]; for cardinality >1 use a pipe to separate entries (e.g. YFL039C|ABY1|END7|actin gene). 
 #### DB Object Type
 An ontology identifier for the type of gene or gene product being annotated. This field uses Sequence Ontology, Protein Ontology, and GO labels and must correspond to one of the [permitted GPI entity types](https://github.com/geneontology/go-annotation/blob/master/specs/gpad-gpi-2-0.md#gpi-entity-types) or a more granular child term: SO:0001217 (protein-coding gene)
@@ -95,28 +98,35 @@ protein 	protein 	PR:000000001 (protein)
 protein-containing complex 	protein-containing complex 	GO:0032991 (protein-containing complex)
 
 
-marker or uncloned locus 	genetic_marker 	SO:0001645; or any subtype of ncRNA in the Sequence Ontology. If the precise product type is unknown, gene_product should be used.\
-This field is mandatory, cardinality 1.\
+marker or uncloned locus 	genetic_marker 	SO:0001645; or any subtype of ncRNA in the Sequence Ontology. If the precise product type is unknown, gene_product should be used.
+
+This field is mandatory, cardinality 1.
 The object type (gene, transcript, protein, protein_complex, etc.) listed in the **DB_Object_Type** field must match the database entry identified by the **DB:DB_Object_ID**. Note that **DB_Object_Type** refers to the database entry (i.e. it represents a protein, functional RNA, etc.); this column does not reflect anything about the GO term or the evidence on which the annotation is based. 
 #### DB Object Taxon
-The NCBI taxon ID of the species encoding the gene product.\
-This field is mandatory, cardinality 1.\
+The NCBI taxon ID of the species encoding the gene product.
+
+This field is mandatory, cardinality 1.
 The taxon should be specified as a number with the prefix "taxon". 
 #### Encoded by
 For proteins and transcripts, **Encoded by** refers to the gene id that encodes those entities.
+
 This field is not mandatory, cardinality 0, 1, >1 ; for cardinality >1 use a pipe to separate entries. 
-#### Parent Protein CHANGE THIS TEXT
-If the **DB:DB_Object_ID** refers to a variant of a gene product, this column will hold the identifier of the gene product from which it was derived.\
-This field is mandatory, cardinality 1, when variant forms of a gene product (e.g. identifiers that specify distinct proteins produced by differential splicing, alternative translational starts, post-translational cleavage or post-translational modification) are represented in **DB:DB_Object_ID**. If the **DB:DB_Object_ID** refers to the canonical form of a gene product, this column should be blank.\
-The identifier used must be a standard 2-part global identifier, e.g. UniProtKB:OK0206 
-The entity in the **Parent_Object_ID** column may not necessarily be the canonical form of the gene product; the canonical form would be identifiable as an entry for that gene product in the GPI file that would have the **Parent_Object_ID** blank. 
+#### Parent Protein
+
+
+This field is optional, cardinality 0+; multiple identifiers should be pipe-separated.
 #### Protein Containing Complex Members
 
+
+This field is optional, cardinality 0+; multiple identifiers should be pipe-separated.
+
 #### DB_Xrefs
-Identifiers for the object in **DB:DB_Object_ID** found in other databases.\
-This field is optional, cardinality 0+; multiple identifiers should be pipe-separated.\
+Identifiers for the object in **DB:DB_Object_ID** found in other databases.
+
+This field is optional, cardinality 0+; multiple identifiers should be pipe-separated.
 Identifiers used must be a standard 2-part global identifiers, e.g. UniProtKB:OK0206 
+
 This column should be used to record IDs for this object in other databases; for gene products in model organism databases, this must include the UniProtKB ID, and may also include NCBI gene or protein IDs, etc. 
 #### Gene Product Properties
-This field is optional, cardinality 0+; multiple properties should be pipe-separated.\
+This field is optional, cardinality 0+; multiple properties should be pipe-separated.
 The Properties column can be filled with a pipe separated list of values in the format "property_name = property_value". There is a fixed vocabulary for the property names and this list can be extended when necessary. Supported properties will include: 'GO annotation complete', "Phenotype annotation complete' (the value for these two properties would be a date), 'Target set' (e.g. Reference Genome, Kidney etc.), 'Database subset' (e.g. Swiss-Prot, TrEMBL). 

From 72cd9f44ae9bfe14ae430cb891284237a4e2876e Mon Sep 17 00:00:00 2001
From: suzialeksander <suzia@stanford.edu>
Date: Mon, 6 May 2024 23:51:13 -0600
Subject: [PATCH 36/52] Update gene-product-association-data-gpad-format-20.md

---
 _docs/gene-product-association-data-gpad-format-20.md | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/_docs/gene-product-association-data-gpad-format-20.md b/_docs/gene-product-association-data-gpad-format-20.md
index 71f53abc..1d4742f3 100644
--- a/_docs/gene-product-association-data-gpad-format-20.md
+++ b/_docs/gene-product-association-data-gpad-format-20.md
@@ -37,10 +37,6 @@ Submitting groups may choose to include optional additional information, for exa
 ### Annotation file fields
 The GPAD format comprises 12 tab-delimited fields.  Some fields are optional, some fields are mandatory and cardinality varies by field and other conditions.  For fields that permit multiple values, values should be separated by pipes (\|) for `OR` statements and commas (,) for `AND` statements.
 
-GPAD 2.0 sample line:
-
-    SGD:S000002164	NOT	RO:0002331	GO:0043409	PMID:26546002	ECO:0000316	SGD:S000003631		2018-01-19	SGD	RO:0002233(UniProtKB:Q00772),BFO:0000050(GO:0071852)	noctua-model-id=gomodel:6086f4f200000223|model-state=production|contributor=orcid:0000-0003-3212-6364
-
 | **Column** 	| **Content** 	| **Required?** 	| **Cardinality** 	| **Example** |
 |----------|---------|-------------|---------|--------|
 |1 |	[DB:DB_Object_ID ](#1-db-db-object-id "Definition and requirements for DB:DB Object ID (column 1)") | 	required | 1 |	SGD:S000002164 | 
@@ -56,6 +52,12 @@ GPAD 2.0 sample line:
 |11 |	[Annotation Extension](#11-annotation-extension "Definition and requirements for Annotation Extension (column 11)") |	optional |	0 or greater |	RO:0002233(UniProtKB:Q00772),BFO:0000050(GO:0071852)|
 |12 |	[Annotation Properties](#12-annotation-properties "Definition and requirements for Annotation Properties (column 12)") |	optional |	0 or greater |	noctua-model-id=gomodel:6086f4f200000223\|model-state=production\|contributor=orcid:0000-0003-3212-6364|
 
+### GPAD 2.0 examples
+
+    SGD:S000002164	NOT	RO:0002331	GO:0043409	PMID:26546002	ECO:0000316	SGD:S000003631		2018-01-19	SGD	RO:0002233(UniProtKB:Q00772),BFO:0000050(GO:0071852)	noctua-model-id=gomodel:6086f4f200000223|model-state=production|contributor=orcid:0000-0003-3212-6364
+    UniProtKB:A0AA85ABI6		RO:0002327	GO:0017128	GO_REF:0000118	ECO:0007826	PANTHER:PTHR23248:SF9		2024-04-08	TreeGrafter		id=GOA:8034655976|comment=go_evidence:IEA
+
+
 ### Definitions and requirements for field contents
 
 #### 1. DB:DB Object ID

From 9efab8bbca5e0681dac2173aa6ce9c8899f06792 Mon Sep 17 00:00:00 2001
From: suzialeksander <suzia@stanford.edu>
Date: Tue, 7 May 2024 19:08:27 -0600
Subject: [PATCH 37/52] Update gene-product-information-gpi-format-20.md

---
 .../gene-product-information-gpi-format-20.md | 68 +++++++++++--------
 1 file changed, 39 insertions(+), 29 deletions(-)

diff --git a/_docs/gene-product-information-gpi-format-20.md b/_docs/gene-product-information-gpi-format-20.md
index 20591b08..3b71cde2 100644
--- a/_docs/gene-product-information-gpi-format-20.md
+++ b/_docs/gene-product-information-gpi-format-20.md
@@ -9,36 +9,36 @@ permalink: /docs/gene-product-information-gpi-format-2.0/
 
 # Gene Product Information (GPI) files
 
+The Gene Ontology Consortium stores annotation data, the representation of gene product attributes using GO terms, in tab-delimited text files. Each non-header line in an annotation file represents a single association between a gene product and a GO term with a certain evidence code and the reference to support the link. 
+
 This guide lays out the format specifications for the *G*ene *P*roduct *I*nformation (GPI) 2.0 format.
 **Note that the GPI file is the companion file for the [GPAD file](/docs/gene-product-association-data-gpad-format/).
-Both files should be submitted together using the same version.**
-
-The Gene Ontology Consortium stores annotation data, the representation of gene product attributes using GO terms, in tab-delimited text files. Each line in the file represents a single association between a gene product and a GO term with a certain evidence code and the reference to support the link.
+Both files should be submitted together using the same version.** 
+GPAD/GPI is intended for internal GO use. GO also provides annotations as [GAF files](/docs/go-annotation-file-gaf-format-2.2/) and reccommends use of the GAF format for most use cases. 
 
-GO also provides annotations as [GAF files](/docs/go-annotation-file-gaf-format-2.2/). For more general information on annotation, please see the [Introduction to GO annotation](/docs/go-annotations/).
+For more general information on annotation, please see the [Introduction to GO annotation](/docs/go-annotations/).
 
 # Changes from the GPI 1.2 to GPI 2.0
 **Header**
 * **The `gpi-version` header must read `2.0` for this format.**
   
 **Columns**
-* Columns 1 & 2 from the GPI 1.2 are now combined in a single column containing an id in CURIE syntax, e.g. UniProtKB:P56704.**
-* **NCBI taxon ids are to be prefixed with 'NCBITaxon:' to indicate the source of the id, e.g. NCBITaxon:6239**
-* **Dates must now follow the ISO-8601 format, e.g. YYYY-MM-DD; time may be included as YYYY-MM-DDTHH:MM:SS**
+* **Columns 1 & 2 from the GPI 1.2 are now combined in a single column containing an id in CURIE syntax, e.g. `UniProtKB:P56704`.**
+* **NCBI taxon ids are to be prefixed with `NCBITaxon:` to indicate the source of the id, e.g. `NCBITaxon:6239`**
+* **Dates must now follow the ISO-8601 format YYYY-MM-DD; time may be included as YYYY-MM-DDTHH:MM:SS**
 <!-- does col 5 have to be an ontology ID or are ontology labels, entity types ok? -->
+
 # Gene Product Information (GPI) 2.0 format
 
 ## GPI Header
-All annotation files must start with a single line denoting the file format. For GPI it is as follows:
+### Required information to provide in the header:
+All annotation files must start with a single line denoting the file format. The database/group generating the file (as listed in dbxrefs.yaml) & the ISO-8601 formatted date the file was generated must also be included in the header. Example for GPI 2.0:
 
     !gpi-version: 2.0
-
-Other information, such as contact details for the submitter or database group, useful links, etc., can be included in an association file by prefixing the line with an exclamation mark (**!**); such lines will be ignored by parsers.
-
-Required information to provide in the header:
-
-    !generated-by: database listed in dbxrefs.yaml
-    !date-generated: YYYY-MM-DD or YYYY-MM-DDTHH:MM
+    !generated-by: SGD 
+    !date-generated: 2024-05-01
+    
+Other information, such as contact details for the submitter or database group, database URLs, etc. can be included in an association file header by prefixing the line with an exclamation mark (`!`); such lines will be ignored by parsers.
 
 ## GPI fields
 
@@ -51,16 +51,20 @@ The file format comprises 10 tab-delimited fields. Fields with multiple values (
 | 3 | [DB_Object_Name](#db-object-name "Definition and requirements for DB Object Name (column 3)") |	optional |	0 or greater |	Angiomotin|
 | 4 | [DB_Object_Synonym(s)](#db-object-synonym "Definition and requirements for DB Object Synonym(s) (column 4)") |	optional |	0 or greater |	E230009N18Rik|KIAA1071|
 | 5 | [DB_Object_Type](#db-object-type "Definition and requirements for DB Object Type (column 5)") |	required |	1 |	PR:000000001|
-| 6 | [DB_Object_Taxon](#db-object-taxon "Definition and requirements for DB Object Taxon (column 6)") |	required |	1 |	taxon:9606|
+| 6 | [DB_Object_Taxon](#db-object-taxon "Definition and requirements for DB Object Taxon (column 6)") |	required |	1 |	NCBItaxon:9606|
 | 7 | [Encoded_by](#encoded-by "Definition and requirements for Encoded by (column 7)") | optional | 0 or greater | ***EXAMPLE NEEDED***|
 | 8 | [Parent_Protein](#parent-protein "Definition and requirements for Parent Protein (column 8)") |	optional |	0 or 1 |	UniProtKB:Q4VCS5|
-| 9 | [Protein_Containing_Complex_Members](#protein-containing-complex-members "Definition and requirements for Protein Containing Complex Members (column 9)") | optional | 0 or greater | ***EXAMPLE NEEDED***|
-| 10 | [DB_Xref(s)](#db_xrefs "Definition and requirements for DB_Xref(s) (column 10)") |	optional |	0 or greater | |
+| 9 | [Protein_Containing_Complex_Members](#protein-containing-complex-members "Definition and requirements for Protein Containing Complex Members (column 9)") | optional | 0 or greater | SGD:S000003821,SGD:S000001456,SGD:S000005047|
+| 10 | [DB_Xref(s)](#db_xrefs "Definition and requirements for DB_Xref(s) (column 10)") |	optional |	0 or greater | ***EXAMPLE NEEDED*** |
 | 11 | [Gene_Product_Properties](#gene-product-properties "Definition and requirements for Gene Product Properties (column 11)") |	optional |	0 or greater |	db_subset=Swiss-Prot|
 
 
 ### GPI 2.0 examples
-    UniProtKB:A0AA85ABI6	A0AA85ABI6	Phospholipid scramblase		protein	NCBITaxon:taxon:48269					db_subset=TrEMBL|uniprot_proteome=UP000050790
+    SGD:S000005027  Sal1  ADP/ATP transporter  YNL083W  PR:000000001  NCBItaxon:559292  SAL1          
+    SGD:S000217643  CBF1:MET4:MET28CBF1-MET4-MET28 sulfur metabolism transcription factor complex    GO:0032991  NCBItaxon:559292      SGD:S000003821,SGD:S000001456,SGD:S000005047  CPX-1016  
+    
+    A0AA85ABI6	Phospholipid scramblase		protein	NCBITaxon:taxon:48269					db_subset=TrEMBL|uniprot_proteome=UP000050790
+    
     RNAcentral:URS0000C3938B_6185		Schistosoma haematobium Hammerhead ribozyme (type I) ribozyme sequence		hammerhead_ribozyme	NCBITaxon:taxon:6185					
 
 
@@ -80,6 +84,7 @@ A (unique and valid) symbol to which the **DB:DB_Object_ID** is matched. No whit
 This field is mandatory, cardinality 1.
 The **DB_Object_Symbol** field should contain a symbol that is recognizable to a biologist wherever possible (an abbreviation widely used in the literature, for example). It is not a unique identifier or an accession number (unlike the **DB:DB_Object_ID**), although IDs can be used as a **DB_Object_Symbol** if there is no more biologically meaningful symbol available (e.g., when an unnamed gene is annotated). ORF names can be used for otherwise unnamed genes or proteins. If gene products are annotated, the gene product symbol can be used if available. Many gene product annotation entries may share a gene symbol. 
 The text entered in the **DB_Object_Symbol** should refer to the entity in **DB:DB_Object_ID**. For example, several alternative transcripts from one gene may be annotated separately, each with specific gene product identifiers in **DB:DB_Object_ID**, but with the same gene symbol in the **DB_Object_Symbol** column. 
+
 #### DB Object Name
 The name of the gene or gene product in **DB:DB_Object_ID**.
 
@@ -89,19 +94,22 @@ The text entered in the **DB_Object_Name** and **DB_Object_Symbol** should refer
 These entries may be a gene symbol or other text. Note that we strongly recommend that synonyms are included in the GPI file, as this aids the searching of GO.
 
 This field is not mandatory, cardinality 0, 1, >1 [white space allowed]; for cardinality >1 use a pipe to separate entries (e.g. YFL039C|ABY1|END7|actin gene). 
-#### DB Object Type
-An ontology identifier for the type of gene or gene product being annotated. This field uses Sequence Ontology, Protein Ontology, and GO labels and must correspond to one of the [permitted GPI entity types](https://github.com/geneontology/go-annotation/blob/master/specs/gpad-gpi-2-0.md#gpi-entity-types) or a more granular child term: SO:0001217 (protein-coding gene)
-ncRNA-coding gene 	ncRNA_gene 	SO:0001263 (ncRNA-coding gene)
-mRNA 	mRNA 	SO:0000234 (mRNA)
-ncRNA 	ncRNA 	SO:0000655 (ncRNA)
-protein 	protein 	PR:000000001 (protein)
-protein-containing complex 	protein-containing complex 	GO:0032991 (protein-containing complex)
 
+#### DB Object Type
+An ontology identifier for the type of gene or gene product being annotated. This field uses Sequence Ontology, Protein Ontology, and GO labels and must correspond to one of the [permitted GPI entity types](https://github.com/geneontology/go-annotation/blob/master/specs/gpad-gpi-2-0.md#gpi-entity-types) or a more granular child term. Acceptable entries include: 
 
-marker or uncloned locus 	genetic_marker 	SO:0001645; or any subtype of ncRNA in the Sequence Ontology. If the precise product type is unknown, gene_product should be used.
+* protein-coding gene SO:0001217
+* ncRNA-coding gene 	 	SO:0001263
+* mRNA 	 SO:0000234
+* ncRNA  SO:0000655
+* protein  	PR:000000001
+* protein-containing complex 	GO:0032991
+* marker or uncloned locus 	SO:0001645
+* any subtype of ncRNA in the Sequence Ontology
 
 This field is mandatory, cardinality 1.
 The object type (gene, transcript, protein, protein_complex, etc.) listed in the **DB_Object_Type** field must match the database entry identified by the **DB:DB_Object_ID**. Note that **DB_Object_Type** refers to the database entry (i.e. it represents a protein, functional RNA, etc.); this column does not reflect anything about the GO term or the evidence on which the annotation is based. 
+
 #### DB Object Taxon
 The NCBI taxon ID of the species encoding the gene product.
 
@@ -111,12 +119,13 @@ The taxon should be specified as a number with the prefix "taxon".
 For proteins and transcripts, **Encoded by** refers to the gene id that encodes those entities.
 
 This field is not mandatory, cardinality 0, 1, >1 ; for cardinality >1 use a pipe to separate entries. 
-#### Parent Protein
 
+#### Parent Protein
+When column 1 refers to a protein isoform or modified protein, this column refers to the gene-centric reference protein accession of the column 1 entry.
 
 This field is optional, cardinality 0+; multiple identifiers should be pipe-separated.
 #### Protein Containing Complex Members
-
+When column 1 references a protein-containing complex, this column contains the gene-centric reference protein accessions
 
 This field is optional, cardinality 0+; multiple identifiers should be pipe-separated.
 
@@ -127,6 +136,7 @@ This field is optional, cardinality 0+; multiple identifiers should be pipe-sepa
 Identifiers used must be a standard 2-part global identifiers, e.g. UniProtKB:OK0206 
 
 This column should be used to record IDs for this object in other databases; for gene products in model organism databases, this must include the UniProtKB ID, and may also include NCBI gene or protein IDs, etc. 
+
 #### Gene Product Properties
 This field is optional, cardinality 0+; multiple properties should be pipe-separated.
 The Properties column can be filled with a pipe separated list of values in the format "property_name = property_value". There is a fixed vocabulary for the property names and this list can be extended when necessary. Supported properties will include: 'GO annotation complete', "Phenotype annotation complete' (the value for these two properties would be a date), 'Target set' (e.g. Reference Genome, Kidney etc.), 'Database subset' (e.g. Swiss-Prot, TrEMBL). 

From 5cad4f7c84b3be4567bb5cefeb34d245780982d2 Mon Sep 17 00:00:00 2001
From: suzialeksander <suzia@stanford.edu>
Date: Tue, 7 May 2024 19:35:33 -0600
Subject: [PATCH 38/52] Update gene-product-information-gpi-format-20.md

---
 .../gene-product-information-gpi-format-20.md | 30 ++++++++++---------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/_docs/gene-product-information-gpi-format-20.md b/_docs/gene-product-information-gpi-format-20.md
index 3b71cde2..b040ebd0 100644
--- a/_docs/gene-product-information-gpi-format-20.md
+++ b/_docs/gene-product-information-gpi-format-20.md
@@ -72,6 +72,7 @@ The file format comprises 10 tab-delimited fields. Fields with multiple values (
 
 #### DB:DB Object ID
 The **DB** prefix is the database abbreviation (namespace) from which the unique identifier **DB Object ID** is drawn and must be one of the values from the set of GO database cross-references. The **DB:DB Object ID** is the combined identifier for the database object.
+
 This field is mandatory, cardinality 1.
 
 <!--In GPI 1.0 format, the identifier may reference a top-level primary gene or gene product identifier, or an identified variant of a gene or gene product, for example identifiers that specify distinct proteins produced by differential splicing, alternative translational starts, post-translational cleavage, or post-translational modification. Identifiers for functional RNAs and protein complexes can also be included in this column. 
@@ -81,15 +82,15 @@ This field is mandatory, cardinality 1.
 #### DB Object Symbol
 A (unique and valid) symbol to which the **DB:DB_Object_ID** is matched. No white spaces allowed.
 
+The text entered in the **DB_Object_Symbol** should refer to the entity in **DB:DB_Object_ID**. The **DB_Object_Symbol** field should contain a symbol that is recognizable to a biologist wherever possible (gene product symbol, abbreviation widely used in the literature, ORF name, etc.). It is not a unique identifier or an accession number (unlike the **DB:DB_Object_ID**), although IDs can be used as a **DB_Object_Symbol** if there is no more biologically meaningful symbol available (e.g., when an unnamed gene is annotated). For example, several alternative transcripts from one gene may be annotated separately, each with specific gene product identifiers in **DB:DB_Object_ID**, but with the same gene symbol in the **DB_Object_Symbol** column. 
+
 This field is mandatory, cardinality 1.
-The **DB_Object_Symbol** field should contain a symbol that is recognizable to a biologist wherever possible (an abbreviation widely used in the literature, for example). It is not a unique identifier or an accession number (unlike the **DB:DB_Object_ID**), although IDs can be used as a **DB_Object_Symbol** if there is no more biologically meaningful symbol available (e.g., when an unnamed gene is annotated). ORF names can be used for otherwise unnamed genes or proteins. If gene products are annotated, the gene product symbol can be used if available. Many gene product annotation entries may share a gene symbol. 
-The text entered in the **DB_Object_Symbol** should refer to the entity in **DB:DB_Object_ID**. For example, several alternative transcripts from one gene may be annotated separately, each with specific gene product identifiers in **DB:DB_Object_ID**, but with the same gene symbol in the **DB_Object_Symbol** column. 
 
 #### DB Object Name
-The name of the gene or gene product in **DB:DB_Object_ID**.
+The name of the gene or gene product in **DB:DB_Object_ID**. The text entered in the **DB_Object_Name** should refer to the entity in **DB:DBB_Object_ID**. White spaces are allowed in this field. 
+
+This field is not mandatory, cardinality 0, 1.
 
-This field is not mandatory, cardinality 0, 1 [white space allowed]
-The text entered in the **DB_Object_Name** and **DB_Object_Symbol** should refer to the entity in **DB:DBB_Object_ID**. 
 #### DB Object Synonym
 These entries may be a gene symbol or other text. Note that we strongly recommend that synonyms are included in the GPI file, as this aids the searching of GO.
 
@@ -107,16 +108,18 @@ An ontology identifier for the type of gene or gene product being annotated. Thi
 * marker or uncloned locus 	SO:0001645
 * any subtype of ncRNA in the Sequence Ontology
 
-This field is mandatory, cardinality 1.
 The object type (gene, transcript, protein, protein_complex, etc.) listed in the **DB_Object_Type** field must match the database entry identified by the **DB:DB_Object_ID**. Note that **DB_Object_Type** refers to the database entry (i.e. it represents a protein, functional RNA, etc.); this column does not reflect anything about the GO term or the evidence on which the annotation is based. 
 
+
+This field is mandatory, cardinality 1.
+
 #### DB Object Taxon
-The NCBI taxon ID of the species encoding the gene product.
+The NCBI taxon ID of the species encoding the gene product, specified as a number with the prefix `NCBItaxon:`. 
 
 This field is mandatory, cardinality 1.
-The taxon should be specified as a number with the prefix "taxon". 
+
 #### Encoded by
-For proteins and transcripts, **Encoded by** refers to the gene id that encodes those entities.
+For proteins and transcripts, **Encoded by** refers to the gene ID that encodes those entities.
 
 This field is not mandatory, cardinality 0, 1, >1 ; for cardinality >1 use a pipe to separate entries. 
 
@@ -124,19 +127,18 @@ This field is not mandatory, cardinality 0, 1, >1 ; for cardinality >1 use a pip
 When column 1 refers to a protein isoform or modified protein, this column refers to the gene-centric reference protein accession of the column 1 entry.
 
 This field is optional, cardinality 0+; multiple identifiers should be pipe-separated.
+
 #### Protein Containing Complex Members
 When column 1 references a protein-containing complex, this column contains the gene-centric reference protein accessions
 
 This field is optional, cardinality 0+; multiple identifiers should be pipe-separated.
 
 #### DB_Xrefs
-Identifiers for the object in **DB:DB_Object_ID** found in other databases.
+Identifiers for the object in **DB:DB_Object_ID** found in other databases. Identifiers used must be standard 2-part global identifiers, e.g. UniProtKB:OK0206. For gene products in model organism databases, **DB_Xrefs** must include the UniProtKB ID, and may also include NCBI gene or protein IDs, etc. 
 
 This field is optional, cardinality 0+; multiple identifiers should be pipe-separated.
-Identifiers used must be a standard 2-part global identifiers, e.g. UniProtKB:OK0206 
-
-This column should be used to record IDs for this object in other databases; for gene products in model organism databases, this must include the UniProtKB ID, and may also include NCBI gene or protein IDs, etc. 
 
 #### Gene Product Properties
-This field is optional, cardinality 0+; multiple properties should be pipe-separated.
 The Properties column can be filled with a pipe separated list of values in the format "property_name = property_value". There is a fixed vocabulary for the property names and this list can be extended when necessary. Supported properties will include: 'GO annotation complete', "Phenotype annotation complete' (the value for these two properties would be a date), 'Target set' (e.g. Reference Genome, Kidney etc.), 'Database subset' (e.g. Swiss-Prot, TrEMBL). 
+
+This field is optional, cardinality 0+; multiple properties should be pipe-separated.

From 8074d15238acdcd7e2bea594fc0c647c9a416abd Mon Sep 17 00:00:00 2001
From: suzialeksander <suzia@stanford.edu>
Date: Tue, 7 May 2024 20:23:00 -0600
Subject: [PATCH 39/52] Update gene-product-information-gpi-format-20.md

small edits
---
 _docs/gene-product-information-gpi-format-20.md | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/_docs/gene-product-information-gpi-format-20.md b/_docs/gene-product-information-gpi-format-20.md
index b040ebd0..b1c4d0fc 100644
--- a/_docs/gene-product-information-gpi-format-20.md
+++ b/_docs/gene-product-information-gpi-format-20.md
@@ -14,6 +14,7 @@ The Gene Ontology Consortium stores annotation data, the representation of gene
 This guide lays out the format specifications for the *G*ene *P*roduct *I*nformation (GPI) 2.0 format.
 **Note that the GPI file is the companion file for the [GPAD file](/docs/gene-product-association-data-gpad-format/).
 Both files should be submitted together using the same version.** 
+
 GPAD/GPI is intended for internal GO use. GO also provides annotations as [GAF files](/docs/go-annotation-file-gaf-format-2.2/) and reccommends use of the GAF format for most use cases. 
 
 For more general information on annotation, please see the [Introduction to GO annotation](/docs/go-annotations/).
@@ -32,7 +33,7 @@ For more general information on annotation, please see the [Introduction to GO a
 
 ## GPI Header
 ### Required information to provide in the header:
-All annotation files must start with a single line denoting the file format. The database/group generating the file (as listed in dbxrefs.yaml) & the ISO-8601 formatted date the file was generated must also be included in the header. Example for GPI 2.0:
+All annotation files must start with a single line denoting the file format. The database/group generating the file as listed in dbxrefs.yaml and the ISO-8601 formatted date the file was generated must be included in the header. Example for GPI 2.0:
 
     !gpi-version: 2.0
     !generated-by: SGD 
@@ -42,14 +43,14 @@ Other information, such as contact details for the submitter or database group,
 
 ## GPI fields
 
-The file format comprises 10 tab-delimited fields. Fields with multiple values (for example, gene product synonyms) should separate values by pipes.
+The file format comprises 11 tab-delimited fields. Fields with multiple values (for example, gene product synonyms) should separate values by pipes.
 
 | **Column** | **Content** | **Required?**	| **Cardinality** | **Example**|
 |----------|---------|-------------|---------|--------|
 | 1 | [DB:DB_Object_ID](#dbdb-object-id "Definition and requirements for DB:DB Object ID (column 1)") |	required |	1 |	UniProtKB:Q4VCS5|
 | 2 | [DB_Object_Symbol](#db-object-symbol "Definition and requirements for DB Object Symbol (column 2)") |	required |	1 |	AMOT|
 | 3 | [DB_Object_Name](#db-object-name "Definition and requirements for DB Object Name (column 3)") |	optional |	0 or greater |	Angiomotin|
-| 4 | [DB_Object_Synonym(s)](#db-object-synonym "Definition and requirements for DB Object Synonym(s) (column 4)") |	optional |	0 or greater |	E230009N18Rik|KIAA1071|
+| 4 | [DB_Object_Synonym(s)](#db-object-synonym "Definition and requirements for DB Object Synonym(s) (column 4)") |	optional |	0 or greater |	E230009N18Rik\|KIAA1071|
 | 5 | [DB_Object_Type](#db-object-type "Definition and requirements for DB Object Type (column 5)") |	required |	1 |	PR:000000001|
 | 6 | [DB_Object_Taxon](#db-object-taxon "Definition and requirements for DB Object Taxon (column 6)") |	required |	1 |	NCBItaxon:9606|
 | 7 | [Encoded_by](#encoded-by "Definition and requirements for Encoded by (column 7)") | optional | 0 or greater | ***EXAMPLE NEEDED***|
@@ -60,10 +61,9 @@ The file format comprises 10 tab-delimited fields. Fields with multiple values (
 
 
 ### GPI 2.0 examples
-    SGD:S000005027  Sal1  ADP/ATP transporter  YNL083W  PR:000000001  NCBItaxon:559292  SAL1          
-    SGD:S000217643  CBF1:MET4:MET28CBF1-MET4-MET28 sulfur metabolism transcription factor complex    GO:0032991  NCBItaxon:559292      SGD:S000003821,SGD:S000001456,SGD:S000005047  CPX-1016  
+    SGD:S000005027  Sal1  ADP/ATP transporter  YNL083W  PR:000000001  NCBItaxon:559292    UniProtKB:D6W196          
     
-    A0AA85ABI6	Phospholipid scramblase		protein	NCBITaxon:taxon:48269					db_subset=TrEMBL|uniprot_proteome=UP000050790
+    SGD:S000217643  CBF1:MET4:MET28CBF1-MET4-MET28 sulfur metabolism transcription factor complex    GO:0032991  NCBItaxon:559292      SGD:S000003821,SGD:S000001456,SGD:S000005047  CPX-1016  
     
     RNAcentral:URS0000C3938B_6185		Schistosoma haematobium Hammerhead ribozyme (type I) ribozyme sequence		hammerhead_ribozyme	NCBITaxon:taxon:6185					
 

From a2bbaf423774e73db765bbe6a91384527dcb0d1d Mon Sep 17 00:00:00 2001
From: suzialeksander <suzia@stanford.edu>
Date: Mon, 13 May 2024 19:06:04 -0600
Subject: [PATCH 40/52] Update gene-product-information-gpi-format-20.md

formatting
---
 _docs/gene-product-information-gpi-format-20.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/_docs/gene-product-information-gpi-format-20.md b/_docs/gene-product-information-gpi-format-20.md
index b1c4d0fc..f295e914 100644
--- a/_docs/gene-product-information-gpi-format-20.md
+++ b/_docs/gene-product-information-gpi-format-20.md
@@ -94,7 +94,7 @@ This field is not mandatory, cardinality 0, 1.
 #### DB Object Synonym
 These entries may be a gene symbol or other text. Note that we strongly recommend that synonyms are included in the GPI file, as this aids the searching of GO.
 
-This field is not mandatory, cardinality 0, 1, >1 [white space allowed]; for cardinality >1 use a pipe to separate entries (e.g. YFL039C|ABY1|END7|actin gene). 
+This field is not mandatory, cardinality 0, 1, >1 [white space allowed]; for cardinality >1 use a pipe to separate entries (e.g. YFL039C\|ABY1\|END7\|actin gene). 
 
 #### DB Object Type
 An ontology identifier for the type of gene or gene product being annotated. This field uses Sequence Ontology, Protein Ontology, and GO labels and must correspond to one of the [permitted GPI entity types](https://github.com/geneontology/go-annotation/blob/master/specs/gpad-gpi-2-0.md#gpi-entity-types) or a more granular child term. Acceptable entries include: 
@@ -129,7 +129,7 @@ When column 1 refers to a protein isoform or modified protein, this column refer
 This field is optional, cardinality 0+; multiple identifiers should be pipe-separated.
 
 #### Protein Containing Complex Members
-When column 1 references a protein-containing complex, this column contains the gene-centric reference protein accessions
+When column 1 references a protein-containing complex, this column contains the gene-centric reference protein accessions.
 
 This field is optional, cardinality 0+; multiple identifiers should be pipe-separated.
 

From 7cd4a2a4d6cd09b3cbc262f5311acd0ea4a6d034 Mon Sep 17 00:00:00 2001
From: suzialeksander <suzia@stanford.edu>
Date: Tue, 21 May 2024 09:24:06 -0600
Subject: [PATCH 41/52] Update download-go-annotations.md

typo broke url
---
 _docs/download-go-annotations.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/_docs/download-go-annotations.md b/_docs/download-go-annotations.md
index 950016c4..31135164 100644
--- a/_docs/download-go-annotations.md
+++ b/_docs/download-go-annotations.md
@@ -14,7 +14,7 @@ redirect_from:
 This page has instructions for getting GO annotations for almost any organism. If your organism is not available in the [official GO products](http://current.geneontology.org/products/pages/downloads.html), [UniProt GAFs by proteome](https://ftp.ebi.ac.uk/pub/databases/GO/goa/proteomes/){:target="blank"}, or [NCBI RefSeq](https://ftp.ncbi.nlm.nih.gov/genomes/refseq/){:target="blank"}, we recommend using the latest version of [InterProScan](https://interproscan-docs.readthedocs.io/en/latest/){:target="blank"} for unannotated organisms.
 
 Jump to a section:
-- [Commonly studied organisms](/docs/download-go/annotations/#1-commonly-studies-organisms)
+- [Commonly studied organisms](/docs/download-go/annotations/#1-commonly-studied-organisms)
 - [All other organisms](/docs/download-go/annotations/#2-all-other-organisms)
 
 #### Required Files

From c876979a534533ab2881a93debc118c5982066b1 Mon Sep 17 00:00:00 2001
From: suzialeksander <suzia@stanford.edu>
Date: Tue, 21 May 2024 09:31:07 -0600
Subject: [PATCH 42/52] Update download-go-annotations.md

fixing same typo
---
 _docs/download-go-annotations.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/_docs/download-go-annotations.md b/_docs/download-go-annotations.md
index 31135164..6794c5c5 100644
--- a/_docs/download-go-annotations.md
+++ b/_docs/download-go-annotations.md
@@ -14,8 +14,8 @@ redirect_from:
 This page has instructions for getting GO annotations for almost any organism. If your organism is not available in the [official GO products](http://current.geneontology.org/products/pages/downloads.html), [UniProt GAFs by proteome](https://ftp.ebi.ac.uk/pub/databases/GO/goa/proteomes/){:target="blank"}, or [NCBI RefSeq](https://ftp.ncbi.nlm.nih.gov/genomes/refseq/){:target="blank"}, we recommend using the latest version of [InterProScan](https://interproscan-docs.readthedocs.io/en/latest/){:target="blank"} for unannotated organisms.
 
 Jump to a section:
-- [Commonly studied organisms](/docs/download-go/annotations/#1-commonly-studied-organisms)
-- [All other organisms](/docs/download-go/annotations/#2-all-other-organisms)
+- [Commonly studied organisms](/docs/download-go-annotations/#1-commonly-studied-organisms)
+- [All other organisms](/docs/download-go-annotations/#2-all-other-organisms)
 
 #### Required Files
 Most tools that use GO annotations take two input files: 

From f3890cdbeca7e9405c57e7e2900fddfa06fa6aff Mon Sep 17 00:00:00 2001
From: suzialeksander <suzia@stanford.edu>
Date: Tue, 28 May 2024 14:21:53 -0600
Subject: [PATCH 43/52] Update gene-product-information-gpi-format-20.md

---
 _docs/gene-product-information-gpi-format-20.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/_docs/gene-product-information-gpi-format-20.md b/_docs/gene-product-information-gpi-format-20.md
index f295e914..4d73bce1 100644
--- a/_docs/gene-product-information-gpi-format-20.md
+++ b/_docs/gene-product-information-gpi-format-20.md
@@ -92,12 +92,12 @@ The name of the gene or gene product in **DB:DB_Object_ID**. The text entered in
 This field is not mandatory, cardinality 0, 1.
 
 #### DB Object Synonym
-These entries may be a gene symbol or other text. Note that we strongly recommend that synonyms are included in the GPI file, as this aids the searching of GO.
+Alternative names for the entity in **DB:DB_Object_ID**. These entries may be a gene symbol or other text. Note that we strongly recommend that synonyms are included in the GPI file, as this aids the searching of GO.
 
 This field is not mandatory, cardinality 0, 1, >1 [white space allowed]; for cardinality >1 use a pipe to separate entries (e.g. YFL039C\|ABY1\|END7\|actin gene). 
 
 #### DB Object Type
-An ontology identifier for the type of gene or gene product being annotated. This field uses Sequence Ontology, Protein Ontology, and GO labels and must correspond to one of the [permitted GPI entity types](https://github.com/geneontology/go-annotation/blob/master/specs/gpad-gpi-2-0.md#gpi-entity-types) or a more granular child term. Acceptable entries include: 
+An ontology identifier for the biological entity in **DB:DB_Object_ID** which is annotated with GO. This field uses Sequence Ontology, Protein Ontology, and GO IDs and must correspond to one of the [permitted GPI entity types](https://github.com/geneontology/go-annotation/blob/master/specs/gpad-gpi-2-0.md#gpi-entity-types) or a more granular child term. Acceptable entries include: 
 
 * protein-coding gene SO:0001217
 * ncRNA-coding gene 	 	SO:0001263

From 7c0a55c53ab59b1a77244f8e20b3b91ca4dd51b8 Mon Sep 17 00:00:00 2001
From: suzialeksander <suzia@stanford.edu>
Date: Tue, 28 May 2024 15:18:29 -0600
Subject: [PATCH 44/52] Update gene-product-information-gpi-format-20.md

---
 _docs/gene-product-information-gpi-format-20.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/_docs/gene-product-information-gpi-format-20.md b/_docs/gene-product-information-gpi-format-20.md
index b1c4d0fc..ac112630 100644
--- a/_docs/gene-product-information-gpi-format-20.md
+++ b/_docs/gene-product-information-gpi-format-20.md
@@ -56,16 +56,16 @@ The file format comprises 11 tab-delimited fields. Fields with multiple values (
 | 7 | [Encoded_by](#encoded-by "Definition and requirements for Encoded by (column 7)") | optional | 0 or greater | ***EXAMPLE NEEDED***|
 | 8 | [Parent_Protein](#parent-protein "Definition and requirements for Parent Protein (column 8)") |	optional |	0 or 1 |	UniProtKB:Q4VCS5|
 | 9 | [Protein_Containing_Complex_Members](#protein-containing-complex-members "Definition and requirements for Protein Containing Complex Members (column 9)") | optional | 0 or greater | SGD:S000003821,SGD:S000001456,SGD:S000005047|
-| 10 | [DB_Xref(s)](#db_xrefs "Definition and requirements for DB_Xref(s) (column 10)") |	optional |	0 or greater | ***EXAMPLE NEEDED*** |
+| 10 | [DB_Xref(s)](#db-xrefs "Definition and requirements for DB_Xref(s) (column 10)") |	optional |	0 or greater | ***EXAMPLE NEEDED*** |
 | 11 | [Gene_Product_Properties](#gene-product-properties "Definition and requirements for Gene Product Properties (column 11)") |	optional |	0 or greater |	db_subset=Swiss-Prot|
 
 
-### GPI 2.0 examples
+### GPI 2.0 example content
     SGD:S000005027  Sal1  ADP/ATP transporter  YNL083W  PR:000000001  NCBItaxon:559292    UniProtKB:D6W196          
     
-    SGD:S000217643  CBF1:MET4:MET28CBF1-MET4-MET28 sulfur metabolism transcription factor complex    GO:0032991  NCBItaxon:559292      SGD:S000003821,SGD:S000001456,SGD:S000005047  CPX-1016  
+    SGD:S000217643  CBF1:MET4:MET28CBF1-MET4-MET28 sulfur metabolism transcription factor complex    GO:0032991  NCBItaxon:559292      SGD:S000003821,SGD:S000001456,SGD:S000005047  ComplexPortal:CPX-1016  
     
-    RNAcentral:URS0000C3938B_6185		Schistosoma haematobium Hammerhead ribozyme (type I) ribozyme sequence		hammerhead_ribozyme	NCBITaxon:taxon:6185					
+    RNAcentral:URS0000C3938B_6185		Schistosoma haematobium Hammerhead ribozyme (type I) ribozyme sequence		hammerhead_ribozyme	NCBITaxon:6185					
 
 
 ### Definitions and requirements for field contents
@@ -133,7 +133,7 @@ When column 1 references a protein-containing complex, this column contains the
 
 This field is optional, cardinality 0+; multiple identifiers should be pipe-separated.
 
-#### DB_Xrefs
+#### DB Xrefs
 Identifiers for the object in **DB:DB_Object_ID** found in other databases. Identifiers used must be standard 2-part global identifiers, e.g. UniProtKB:OK0206. For gene products in model organism databases, **DB_Xrefs** must include the UniProtKB ID, and may also include NCBI gene or protein IDs, etc. 
 
 This field is optional, cardinality 0+; multiple identifiers should be pipe-separated.

From c22390712b085ff5cc9f961d195fb04993c2e751 Mon Sep 17 00:00:00 2001
From: suzialeksander <suzia@stanford.edu>
Date: Tue, 28 May 2024 16:43:41 -0600
Subject: [PATCH 45/52] Update gene-product-association-data-gpad-format-20.md

---
 _docs/gene-product-association-data-gpad-format-20.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/_docs/gene-product-association-data-gpad-format-20.md b/_docs/gene-product-association-data-gpad-format-20.md
index 1d4742f3..8bfc36d0 100644
--- a/_docs/gene-product-association-data-gpad-format-20.md
+++ b/_docs/gene-product-association-data-gpad-format-20.md
@@ -121,7 +121,7 @@ One of the codes from the [Evidence & Conclusion Ontology](http://www.evidenceon
 This field is mandatory, cardinality 1.
 
 #### 7. With [or] From
-Also referred to as **With, From** or the **With/From** column
+Also referred to as **With, From** or the **With/From** column.
 
 This field is used to hold an identifier for annotations using certain evidence codes: ECO:0000305 ([IC](https://wiki.geneontology.org/index.php/Inferred_by_Curator_(IC)));
 ECO:0000203, ECO:0000256, and ECO:0000265 ([IEA & child terms](https://wiki.geneontology.org/index.php/Inferred_from_Electronic_Annotation_(IEA))); ECO:00000316 ([IGI](https://wiki.geneontology.org/Inferred_from_Genetic_Interaction_(IGI))); ECO:0000021 ([IPI](https://wiki.geneontology.org/Inferred_from_Physical_Interaction_(IPI))); ECO:0000031, ECO:0000250 and ECO:0000255 ([ISS & child terms](https://wiki.geneontology.org/Inferred_from_Sequence_or_structural_Similarity_(ISS))). 

From 523bc97e474a3d0469139119587deb62c4275eed Mon Sep 17 00:00:00 2001
From: suzialeksander <suzia@stanford.edu>
Date: Tue, 28 May 2024 17:47:43 -0600
Subject: [PATCH 46/52] Update gene-product-information-gpi-format-20.md

---
 _docs/gene-product-information-gpi-format-20.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/_docs/gene-product-information-gpi-format-20.md b/_docs/gene-product-information-gpi-format-20.md
index b1b99adb..165242a7 100644
--- a/_docs/gene-product-information-gpi-format-20.md
+++ b/_docs/gene-product-information-gpi-format-20.md
@@ -24,8 +24,8 @@ For more general information on annotation, please see the [Introduction to GO a
 * **The `gpi-version` header must read `2.0` for this format.**
   
 **Columns**
-* **Columns 1 & 2 from the GPI 1.2 are now combined in a single column containing an id in CURIE syntax, e.g. `UniProtKB:P56704`.**
-* **NCBI taxon ids are to be prefixed with `NCBITaxon:` to indicate the source of the id, e.g. `NCBITaxon:6239`**
+* **Columns 1 & 2 from the GPI 1.2 are now combined in a single column containing an ID in CURIE syntax, e.g. `UniProtKB:P56704`.**
+* **NCBI taxon IDs are to be prefixed with `NCBITaxon:` to indicate the source of the ID, e.g. `NCBITaxon:6239`**
 * **Dates must now follow the ISO-8601 format YYYY-MM-DD; time may be included as YYYY-MM-DDTHH:MM:SS**
 <!-- does col 5 have to be an ontology ID or are ontology labels, entity types ok? -->
 
@@ -139,6 +139,6 @@ Identifiers for the object in **DB:DB_Object_ID** found in other databases. Iden
 This field is optional, cardinality 0+; multiple identifiers should be pipe-separated.
 
 #### Gene Product Properties
-The Properties column can be filled with a pipe separated list of values in the format "property_name = property_value". There is a fixed vocabulary for the property names and this list can be extended when necessary. Supported properties will include: 'GO annotation complete', "Phenotype annotation complete' (the value for these two properties would be a date), 'Target set' (e.g. Reference Genome, Kidney etc.), 'Database subset' (e.g. Swiss-Prot, TrEMBL). 
+The Properties column can be filled with a pipe separated list of values in the format "property_name = property_value". There is a fixed vocabulary for the property names and this list can be extended when necessary. Supported properties will include: 'GO annotation complete', "Phenotype annotation complete' (the value for these two properties would be a date), 'Target set' (e.g. Reference Genome, kidney, etc.), 'Database subset' (e.g. Swiss-Prot, TrEMBL). 
 
 This field is optional, cardinality 0+; multiple properties should be pipe-separated.

From f8307a0869147ae6688c02572240fa525f1ad5a8 Mon Sep 17 00:00:00 2001
From: suzialeksander <suzia@stanford.edu>
Date: Thu, 30 May 2024 10:30:12 -0600
Subject: [PATCH 47/52] Update gene-product-information-gpi-format-20.md

updates
---
 .../gene-product-information-gpi-format-20.md | 30 +++++++++----------
 1 file changed, 14 insertions(+), 16 deletions(-)

diff --git a/_docs/gene-product-information-gpi-format-20.md b/_docs/gene-product-information-gpi-format-20.md
index 165242a7..270f0105 100644
--- a/_docs/gene-product-information-gpi-format-20.md
+++ b/_docs/gene-product-information-gpi-format-20.md
@@ -47,16 +47,16 @@ The file format comprises 11 tab-delimited fields. Fields with multiple values (
 
 | **Column** | **Content** | **Required?**	| **Cardinality** | **Example**|
 |----------|---------|-------------|---------|--------|
-| 1 | [DB:DB_Object_ID](#dbdb-object-id "Definition and requirements for DB:DB Object ID (column 1)") |	required |	1 |	UniProtKB:Q4VCS5|
+| 1 | [DB:DB_Object_ID](#dbdb-object-id "Definition and requirements for DB:DB Object ID (column 1)") |	required |	1 |	UniProtKB:Q4VCS5-1|
 | 2 | [DB_Object_Symbol](#db-object-symbol "Definition and requirements for DB Object Symbol (column 2)") |	required |	1 |	AMOT|
 | 3 | [DB_Object_Name](#db-object-name "Definition and requirements for DB Object Name (column 3)") |	optional |	0 or greater |	Angiomotin|
-| 4 | [DB_Object_Synonym(s)](#db-object-synonym "Definition and requirements for DB Object Synonym(s) (column 4)") |	optional |	0 or greater |	E230009N18Rik\|KIAA1071|
+| 4 | [DB_Object_Synonym(s)](#db-object-synonym "Definition and requirements for DB Object Synonym(s) (column 4)") |	optional |	0 or greater |	KIAA1071|
 | 5 | [DB_Object_Type](#db-object-type "Definition and requirements for DB Object Type (column 5)") |	required |	1 |	PR:000000001|
-| 6 | [DB_Object_Taxon](#db-object-taxon "Definition and requirements for DB Object Taxon (column 6)") |	required |	1 |	NCBItaxon:9606|
-| 7 | [Encoded_by](#encoded-by "Definition and requirements for Encoded by (column 7)") | optional | 0 or greater | ***EXAMPLE NEEDED***|
+| 6 | [DB_Object_Taxon](#db-object-taxon "Definition and requirements for DB Object Taxon (column 6)") |	required |	1 |	NCBITaxon:9606|
+| 7 | [Encoded_by](#encoded-by "Definition and requirements for Encoded by (column 7)") | optional | 0 or greater | HGNC:17810 |
 | 8 | [Parent_Protein](#parent-protein "Definition and requirements for Parent Protein (column 8)") |	optional |	0 or 1 |	UniProtKB:Q4VCS5|
 | 9 | [Protein_Containing_Complex_Members](#protein-containing-complex-members "Definition and requirements for Protein Containing Complex Members (column 9)") | optional | 0 or greater | SGD:S000003821,SGD:S000001456,SGD:S000005047|
-| 10 | [DB_Xref(s)](#db-xrefs "Definition and requirements for DB_Xref(s) (column 10)") |	optional |	0 or greater | ***EXAMPLE NEEDED*** |
+| 10 | [DB_Xref(s)](#db-xrefs "Definition and requirements for DB_Xref(s) (column 10)") |	optional |	0 or greater | NCBIGene:154796\|ENSEMBL:ENSG00000126016 |
 | 11 | [Gene_Product_Properties](#gene-product-properties "Definition and requirements for Gene Product Properties (column 11)") |	optional |	0 or greater |	db_subset=Swiss-Prot|
 
 
@@ -65,7 +65,7 @@ The file format comprises 11 tab-delimited fields. Fields with multiple values (
     
     SGD:S000217643  CBF1:MET4:MET28CBF1-MET4-MET28 sulfur metabolism transcription factor complex    GO:0032991  NCBItaxon:559292      SGD:S000003821,SGD:S000001456,SGD:S000005047  ComplexPortal:CPX-1016  
     
-    RNAcentral:URS0000C3938B_6185		Schistosoma haematobium Hammerhead ribozyme (type I) ribozyme sequence		hammerhead_ribozyme	NCBITaxon:6185					
+    RNAcentral:URS0000527F89_9606		Homo sapiens (human) hsa-miR-145-5p    SO:0000276		NCBITaxon:9606  HGNC:31532      NCBIGene:406937\|ENSEMBL:ENSG00000276365  
 
 
 ### Definitions and requirements for field contents
@@ -87,7 +87,7 @@ The text entered in the **DB_Object_Symbol** should refer to the entity in **DB:
 This field is mandatory, cardinality 1.
 
 #### DB Object Name
-The name of the gene or gene product in **DB:DB_Object_ID**. The text entered in the **DB_Object_Name** should refer to the entity in **DB:DBB_Object_ID**. White spaces are allowed in this field. 
+The name of the gene or gene product in **DB:DB_Object_ID**. The text entered in the **DB_Object_Name** should refer to the entity in **DB:DB_Object_ID**. White spaces are allowed in this field. 
 
 This field is not mandatory, cardinality 0, 1.
 
@@ -97,24 +97,22 @@ Alternative names for the entity in **DB:DB_Object_ID**. These entries may be a
 This field is not mandatory, cardinality 0, 1, >1 [white space allowed]; for cardinality >1 use a pipe to separate entries (e.g. YFL039C\|ABY1\|END7\|actin gene). 
 
 #### DB Object Type
-An ontology identifier for the biological entity in **DB:DB_Object_ID** which is annotated with GO. This field uses Sequence Ontology, Protein Ontology, and GO IDs and must correspond to one of the [permitted GPI entity types](https://github.com/geneontology/go-annotation/blob/master/specs/gpad-gpi-2-0.md#gpi-entity-types) or a more granular child term. Acceptable entries include: 
+An ontology identifier for the biological entity in **DB:DB_Object_ID** which is annotated with GO. This field uses Sequence Ontology, Protein Ontology, and GO IDs and must correspond to one of the [permitted GPI entity types](https://github.com/geneontology/go-annotation/blob/master/specs/gpad-gpi-2-0.md#gpi-entity-types) or a more granular child term. Common entries include: 
 
+* protein  	PR:000000001
 * protein-coding gene SO:0001217
-* ncRNA-coding gene 	 	SO:0001263
-* mRNA 	 SO:0000234
+* gene  SO:0000704
 * ncRNA  SO:0000655
-* protein  	PR:000000001
+  ** any subtype of ncRNA in the Sequence Ontology, including ncRNA-coding gene 	 	SO:0001263
 * protein-containing complex 	GO:0032991
-* marker or uncloned locus 	SO:0001645
-* any subtype of ncRNA in the Sequence Ontology
 
-The object type (gene, transcript, protein, protein_complex, etc.) listed in the **DB_Object_Type** field must match the database entry identified by the **DB:DB_Object_ID**. Note that **DB_Object_Type** refers to the database entry (i.e. it represents a protein, functional RNA, etc.); this column does not reflect anything about the GO term or the evidence on which the annotation is based. 
+The object type listed in the **DB_Object_Type** field must match the database entry identified by the **DB:DB_Object_ID**.
 
 
 This field is mandatory, cardinality 1.
 
 #### DB Object Taxon
-The NCBI taxon ID of the species encoding the gene product, specified as a number with the prefix `NCBItaxon:`. 
+The NCBI taxon ID of the species encoding the **DB:DB_Object_ID**, including the prefix `NCBItaxon:`. 
 
 This field is mandatory, cardinality 1.
 
@@ -134,7 +132,7 @@ When column 1 references a protein-containing complex, this column contains the
 This field is optional, cardinality 0+; multiple identifiers should be pipe-separated.
 
 #### DB Xrefs
-Identifiers for the object in **DB:DB_Object_ID** found in other databases. Identifiers used must be standard 2-part global identifiers, e.g. UniProtKB:OK0206. For gene products in model organism databases, **DB_Xrefs** must include the UniProtKB ID, and may also include NCBI gene or protein IDs, etc. 
+Identifiers for the object in **DB:DB_Object_ID** found in other databases. Identifiers used must be standard 2-part global identifiers, e.g. UniProtKB:Q60FP0. For gene products in model organism databases, **DB_Xrefs** must include the UniProtKB ID, and may also include NCBI gene or protein IDs, etc. 
 
 This field is optional, cardinality 0+; multiple identifiers should be pipe-separated.
 

From e534f8cbe68f7eedf642f8bafdf423c51a0992df Mon Sep 17 00:00:00 2001
From: suzialeksander <suzia@stanford.edu>
Date: Mon, 1 Jul 2024 11:38:11 -0600
Subject: [PATCH 48/52] Update download-go-annotations.md

Changes suggested from NCBI
---
 _docs/download-go-annotations.md | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/_docs/download-go-annotations.md b/_docs/download-go-annotations.md
index 6794c5c5..70b20989 100644
--- a/_docs/download-go-annotations.md
+++ b/_docs/download-go-annotations.md
@@ -47,10 +47,12 @@ For all other organisms we recommend downloading annotations from one of the fol
   * Navigate to your organism & download the `.goa` file, e.g. [`22426.A_gambiae.goa`](https://ftp.ebi.ac.uk/pub/databases/GO/goa/proteomes/22426.A_gambiae.goa){:target="blank"}  
     *Tip: use your browser's in-page search to find the species name.*
 
-* [NCBI RefSeq](https://ftp.ncbi.nlm.nih.gov/genomes/refseq/){:target="blank"}: If your organism has a reference sequence in NCBI, GO annotations are available through NCBI's FTP server. Use these files if you want to use **Entrez Gene identifiers**. Annotation files are available for all eukaryotic genomes available at NCBI. Note that GO annotations are not currently available for archaea, bacteria or viruses.
-  * Go to [https://ftp.ncbi.nlm.nih.gov/genomes/refseq/](https://ftp.ncbi.nlm.nih.gov/genomes/refseq/){:target="blank"}
-  * Navigate to your organism, e.g. Anopheles_gambiae/ is in the `/invertebrate` directory
-  * Open the `representative/` directory, and open the directory within that
+* [NCBI RefSeq](https://ftp.ncbi.nlm.nih.gov/genomes/refseq/){:target="blank"}: If your organism has a reference genome assembly in NCBI, GO annotations are available in GAF format through NCBI Gene identifiers. Annotation files are available for all eukaryotic genomes available at NCBI RefSeq. Note that GO annotations are not currently available for archaea, bacteria or viruses.
+  * Go to [NCBI](https://www.ncbi.nlm.nih.gov/){:target="blank"}
+  * Navigate to your organism, e.g. Anopheles gambiae  [https://www.ncbi.nlm.nih.gov/search/all/?term=Anopheles%20gambiae](https://www.ncbi.nlm.nih.gov/search/all/?term=Anopheles%20gambiae){:target="blank"}
+  * Follow the ["Genomes" link](https://www.ncbi.nlm.nih.gov/datasets/genome/?taxon=7165){:target="blank"}
+  * Select the [reference assembly](https://www.ncbi.nlm.nih.gov/datasets/genome/GCF_943734735.2/) at the top of the list; this entry is indicated with a green "reference genome" icon and a GCF identifer listed in the RefSeq column
+  * Click on the [FTP link](https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/943/734/735/GCF_943734735.2_idAnoGambNW_F1_1/){:target="blank"}
   * Download the file with the suffix `gene_ontology.gaf.gz`, e.g. `GCF_943734735.2-RS_2023_12_gene_ontology.gaf.gz`
  
 ### 3. If you cannot find annotations for your organism for download as described above

From b88484d8ebbde2a4c5b565d4abebe296c4824a80 Mon Sep 17 00:00:00 2001
From: suzialeksander <suzia@stanford.edu>
Date: Mon, 1 Jul 2024 11:44:18 -0600
Subject: [PATCH 49/52] Update download-go-annotations.md

---
 _docs/download-go-annotations.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/_docs/download-go-annotations.md b/_docs/download-go-annotations.md
index 70b20989..b7e55316 100644
--- a/_docs/download-go-annotations.md
+++ b/_docs/download-go-annotations.md
@@ -49,7 +49,7 @@ For all other organisms we recommend downloading annotations from one of the fol
 
 * [NCBI RefSeq](https://ftp.ncbi.nlm.nih.gov/genomes/refseq/){:target="blank"}: If your organism has a reference genome assembly in NCBI, GO annotations are available in GAF format through NCBI Gene identifiers. Annotation files are available for all eukaryotic genomes available at NCBI RefSeq. Note that GO annotations are not currently available for archaea, bacteria or viruses.
   * Go to [NCBI](https://www.ncbi.nlm.nih.gov/){:target="blank"}
-  * Navigate to your organism, e.g. Anopheles gambiae  [https://www.ncbi.nlm.nih.gov/search/all/?term=Anopheles%20gambiae](https://www.ncbi.nlm.nih.gov/search/all/?term=Anopheles%20gambiae){:target="blank"}
+  * Navigate to your organism, e.g. [Anopheles gambiae](https://www.ncbi.nlm.nih.gov/search/all/?term=Anopheles%20gambiae){:target="blank"}
   * Follow the ["Genomes" link](https://www.ncbi.nlm.nih.gov/datasets/genome/?taxon=7165){:target="blank"}
   * Select the [reference assembly](https://www.ncbi.nlm.nih.gov/datasets/genome/GCF_943734735.2/) at the top of the list; this entry is indicated with a green "reference genome" icon and a GCF identifer listed in the RefSeq column
   * Click on the [FTP link](https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/943/734/735/GCF_943734735.2_idAnoGambNW_F1_1/){:target="blank"}

From e8099cfd3ad4f3b3064ec09b134ba9c86451d841 Mon Sep 17 00:00:00 2001
From: suzialeksander <suzia@stanford.edu>
Date: Wed, 3 Jul 2024 14:12:07 -0600
Subject: [PATCH 50/52] Update gene-product-information-gpi-format-20.md

---
 _docs/gene-product-information-gpi-format-20.md | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/_docs/gene-product-information-gpi-format-20.md b/_docs/gene-product-information-gpi-format-20.md
index 270f0105..aa948e5e 100644
--- a/_docs/gene-product-information-gpi-format-20.md
+++ b/_docs/gene-product-information-gpi-format-20.md
@@ -61,12 +61,14 @@ The file format comprises 11 tab-delimited fields. Fields with multiple values (
 
 
 ### GPI 2.0 example content
-    SGD:S000005027  Sal1  ADP/ATP transporter  YNL083W  PR:000000001  NCBItaxon:559292    UniProtKB:D6W196          
-    
-    SGD:S000217643  CBF1:MET4:MET28CBF1-MET4-MET28 sulfur metabolism transcription factor complex    GO:0032991  NCBItaxon:559292      SGD:S000003821,SGD:S000001456,SGD:S000005047  ComplexPortal:CPX-1016  
-    
-    RNAcentral:URS0000527F89_9606		Homo sapiens (human) hsa-miR-145-5p    SO:0000276		NCBITaxon:9606  HGNC:31532      NCBIGene:406937\|ENSEMBL:ENSG00000276365  
 
+> SGD:S000005027  Sal1  ADP/ATP transporter  YNL083W  PR:000000001  NCBItaxon:559292    UniProtKB:D6W196
+  
+Complex:  
+> SGD:S000217643  CBF1:MET4:MET28CBF1-MET4-MET28 sulfur metabolism transcription factor complex    GO:0032991  NCBItaxon:559292      SGD:S000003821,SGD:S000001456,SGD:S000005047  ComplexPortal:CPX-1016  
+
+ncRNA:    
+> RNAcentral:URS0000527F89_9606		Homo sapiens (human) hsa-miR-145-5p    SO:0000276		NCBITaxon:9606  HGNC:31532      NCBIGene:406937|ENSEMBL:ENSG00000276365  
 
 ### Definitions and requirements for field contents
 
@@ -117,7 +119,7 @@ The NCBI taxon ID of the species encoding the **DB:DB_Object_ID**, including the
 This field is mandatory, cardinality 1.
 
 #### Encoded by
-For proteins and transcripts, **Encoded by** refers to the gene ID that encodes those entities.
+For proteins and transcripts, **Encoded by** refers to the gene ID that encodes those entities, e.g. ENSG00000197153.
 
 This field is not mandatory, cardinality 0, 1, >1 ; for cardinality >1 use a pipe to separate entries. 
 

From 3c6b98cb7c8243fa1bdf54c5aeb100f590d45ae9 Mon Sep 17 00:00:00 2001
From: suzialeksander <suzia@stanford.edu>
Date: Wed, 3 Jul 2024 14:22:24 -0600
Subject: [PATCH 51/52] Update download-go-annotations.md

cleaning up for #514
---
 _docs/download-go-annotations.md | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/_docs/download-go-annotations.md b/_docs/download-go-annotations.md
index b7e55316..2cdac214 100644
--- a/_docs/download-go-annotations.md
+++ b/_docs/download-go-annotations.md
@@ -1,13 +1,13 @@
 ---
 title: Download annotations
 permalink: /docs/download-go-annotations/
-redirect_from: 
+redirect_from:
 - download-go-annotations
 - GO.downloads.annotations.shtml
 - /page/download-annotations
 ---
 
-# Download annotations 
+# Download annotations
 
 ### Getting annotations for a selected organism
 
@@ -67,11 +67,11 @@ Download InterProScan at [https://www.ebi.ac.uk/interpro/about/interproscan](htt
 + GO has monthly releases
 + Annotation files are taxon-specific, with a few exceptions including the Reactome and *Candida* Genome Database files
 + Current format guides:
-  + [GAF format 2.2](/docs/go-annotation-file-gaf-format-2.2/) 
+  + [GAF format 2.2](/docs/go-annotation-file-gaf-format-2.2/)
   + [GPAD](/docs/gene-product-association-data-gpad-format/) + [GPI](/docs/gene-product-information-gpi-format/) companion files
-  
+
 ## Programmatic access to GO annotations
-As for any resource in GO, GO annotations are accessible through the DOI-versioned release stored in [Zenodo](https://doi.org/10.5281/zenodo.1205159){:target="blank"} and can be retrieved using BDBag. Read more about [programmatic access](/docs/tools-guide/#programmatic-download-bdbag).
+As for any resource in GO, GO annotations are accessible through the DOI-versioned release stored in [Zenodo](https://doi.org/10.5281/zenodo.1205159){:target="blank"}.
 
-## Error or omission ?
+## Error or omission?
 Any errors or omissions in annotations should be reported by writing to the [GO helpdesk](http://help.geneontology.org/){:target="blank"}.

From 366a8c606ddf3e12a2c16b8293d6886cb53fcb07 Mon Sep 17 00:00:00 2001
From: suzialeksander <suzia@stanford.edu>
Date: Wed, 3 Jul 2024 14:24:03 -0600
Subject: [PATCH 52/52] Update download-go-cams.md

for #514
---
 _docs/download-go-cams.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/_docs/download-go-cams.md b/_docs/download-go-cams.md
index 4442d1bb..52c8ddfc 100644
--- a/_docs/download-go-cams.md
+++ b/_docs/download-go-cams.md
@@ -18,12 +18,12 @@ permalink: /docs/download-go-cams/
 + [GO-CAM JNL](http://current.geneontology.org/products/blazegraph/blazegraph-production.jnl.gz)
 + [GO-CAM SIFs](https://s3.amazonaws.com/geneontology-public/gocam/GO-CAMs.sif.zip)
 
-**Notes**: 
+**Notes**:
 * Individual TTLs (1 TTL for 1 GO-CAM) can also be retrieved from the GitHub repository [noctua-models](https://github.com/geneontology/noctua-models/tree/master/models){:target="blank"}.
 * When using SIFs with [Cytoscape](https://cytoscape.org/){:target="blank"}, we also provide a [gocam-styles.xml](https://s3.amazonaws.com/geneontology-public/gocam/gocam-styles.xml). Read more in the [GO-CAM documentation](/docs/gocam-overview/#visualization-in-cytoscape).
 
 ## Programmatic access to GO-CAMs
-As for any resource in GO, GO-CAMs are accessible through the DOI-versioned release stored in [Zenodo](https://doi.org/10.5281/zenodo.1205159){:target="blank"}. Read more about [programmatic access](/docs/tools-guide/#programmatic-download-bdbag).
+As for any resource in GO, GO-CAMs are accessible through the DOI-versioned release stored in [Zenodo](https://doi.org/10.5281/zenodo.1205159){:target="blank"}.
 
 ## Error or omission ?
-Any errors or omissions in annotations should be reported by writing to the [GO helpdesk](https://help.geneontology.org/){:target="blank"}
+Any errors or omissions in annotations should be reported by writing to the [GO helpdesk](https://help.geneontology.org/){:target="blank"}.