From 6cc5c848782d9737c7647dee0eca51d7174430dd Mon Sep 17 00:00:00 2001
From: Spencer Bliven <spencer.bliven@gmail.com>
Date: Mon, 14 Oct 2024 15:38:22 +0200
Subject: [PATCH 1/5] Add start:dev command

Update .gitignore
---
 .gitignore   | 4 ++++
 package.json | 1 +
 2 files changed, 5 insertions(+)

diff --git a/.gitignore b/.gitignore
index 4843378f..03d88c59 100644
--- a/.gitignore
+++ b/.gitignore
@@ -17,3 +17,7 @@ docs
 *.pdf
 .idea
 cat
+
+# Website output
+_site
+.jekyll-cache
\ No newline at end of file
diff --git a/package.json b/package.json
index 7bb0cb8d..9163ad71 100644
--- a/package.json
+++ b/package.json
@@ -4,6 +4,7 @@
   "description": "",
   "main": "index.js",
   "scripts": {
+    "start:dev": "honkit serve --port 4500",
     "test": "echo \"Error: no test specified\" && exit 1"
   },
   "keywords": [],

From 47d2467f89c58162c394509ea6f9f61ec7fb8a6b Mon Sep 17 00:00:00 2001
From: Spencer Bliven <spencer.bliven@gmail.com>
Date: Mon, 14 Oct 2024 15:44:56 +0200
Subject: [PATCH 2/5] Reformat backend/configuration.md

- Remove trailing spaces
- Wrap long lines
- A couple minor markdown syntax fixes
---
 Development/v4.x/backend/configuration.md | 809 +++++++++++-----------
 1 file changed, 420 insertions(+), 389 deletions(-)

diff --git a/Development/v4.x/backend/configuration.md b/Development/v4.x/backend/configuration.md
index 0513e7b0..e7618a0a 100644
--- a/Development/v4.x/backend/configuration.md
+++ b/Development/v4.x/backend/configuration.md
@@ -1,401 +1,432 @@
 # Configuration
 
-When using the official release image, Backend configuration can be achieved by setting the environmental variables listed below through an orchestration/containerization system, the .env file or with a suitable method compatible with your environment.  
-The current source code contains an example .env file, named _.env.sample_ listing all the environment variable available to configure the backend.
+When using the official release image, Backend configuration can be achieved by setting
+the environmental variables listed below through an orchestration/containerization
+system, the .env file or with a suitable method compatible with your environment. The
+current source code contains an example .env file, named _.env.sample_ listing all the
+environment variable available to configure the backend.
 
-If you are compiling the application from source, you can edit the file _serc/config/configuration.ts_ with the correct values for your infrastructure. This option is still undocumented, although it is our intention to provide a detailed how-to guide as soon as we can.
+If you are compiling the application from source, you can edit the file
+_serc/config/configuration.ts_ with the correct values for your infrastructure. This
+option is still undocumented, although it is our intention to provide a detailed how-to
+guide as soon as we can.
 
 ## Environment Variables
 
-This is complete the list of environment variable that can be used to configure SciCat backend.
-The list is compiled according to the configuration class defined in _src/config/configuration.ts_
-
-- ADMIN\_GROUPS:  
-  list of groups that have admin priviliges  
-  _default_: ""  
-  _format_: comma separated list of strings. Leading and trailing spaces are trimmed  
-
-- DELETE\_GROUPS:  
-  list of groups that are allowed to delete content  
-  _default_: ""  
-  _format_: comma separated list of strings. Leading and trailing spaces are trimmed  
-
-- CREATE\_DATASET\_GROUPS:  
-  list of non admin groups that are allowed to create datasets without pid. The pid is assigned by the system. If set to "all", all users can create a dataset belonging to any of the groups they belong to.    
-  _default_: "#all"  
-  _format_: comma separated list of strings. Leading and trailing spaces are trimmed  
-
-- CREATE\_DATASET\_WITH\_PID\_GROUPS:  
-  list of non admin groups that are allowed to create datasets with explicit pid. If set to "#all", all users can create a dataset belonging to any of the groups they belong to and with esplicit pid.  
-  If the pid verification is enabled, pid will be validated agains the specification passed.  
-  _default_: ""  
-  _format_: comma separated list of strings. Leading and trailing spaces are trimmed  
-
-- CREATE\_DATASET\_PRIVILEGED\_GROUPS:  
-  list of non admin groups that are allowed to create datasets for groups they do not belong to. If set to "#all", all users can create a dataset belonging to any group with explicit pid.  
-  If the pid verification is enabled, pid will be validated agains the specification passed.  
-  _default_: ""  
-  _format_: comma separated list of strings. Leading and trailing spaces are trimmed  
-
-- PROPOSAL\_GROUPS:  
-  list of non admin groups that are allowed to create and update proposals for groups they do not belong to. If set to "#all", all users can create a dataset belonging to any group with explicit pid.  
-  _default_: ""  
-  _format_: comma separated list of strings. Leading and trailing spaces are trimmed  
-
-- SAMPLE\_GROUPS:  
-  list of non admin groups that are allowed to create and update samples for the groups they belong to. If set to "#all", all users can create a dataset belonging to their group.  
-  _default_: ""  
-  _format_: comma separated list of strings. Leading and trailing spaces are trimmed  
-
-- SAMPLE\_PRIVILEGED\_GROUPS:  
-  list of non admin groups that are allowed to create samples for any groups, but can only update samples belonging to groups they belong to.
-  _default_: ""  
-  _format_: comma separated list of strings. Leading and trailing spaces are trimmed  
-
-
-- ACCESS\_GROUPS\_STATIC\_VALUES:  
-  List of groups assigned by default to all users. Used in the vanilla implementation for easy configuration.  
-  If you do not want or need to assign any default group, it should be set to empty string "".  
-  Default value: ""  
-  _format_: Comman separated list of strings. Leading and trailing spaces are trimmed  
-  _example_: "group1,group2,group3,..."  
+This is complete the list of environment variable that can be used to configure SciCat
+backend. The list is compiled according to the configuration class defined in
+_src/config/configuration.ts_
+
+- ADMIN\_GROUPS:
+  list of groups that have admin priviliges
+  _default_: ""
+  _format_: comma separated list of strings. Leading and trailing spaces are trimmed
+
+- DELETE\_GROUPS:
+  list of groups that are allowed to delete content
+  _default_: ""
+  _format_: comma separated list of strings. Leading and trailing spaces are trimmed
+
+- CREATE\_DATASET\_GROUPS:
+  list of non admin groups that are allowed to create datasets without pid. The pid is
+  assigned by the system. If set to "all", all users can create a dataset belonging to
+  any of the groups they belong to.
+  _default_: "#all"
+  _format_: comma separated list of strings. Leading and trailing spaces are trimmed
+
+- CREATE\_DATASET\_WITH\_PID\_GROUPS:
+  list of non admin groups that are allowed to create datasets with explicit pid. If set
+  to "#all", all users can create a dataset belonging to any of the groups they belong
+  to and with esplicit pid.
+  If the pid verification is enabled, pid will be validated agains the specification passed.
+  _default_: ""
+  _format_: comma separated list of strings. Leading and trailing spaces are trimmed
+
+- CREATE\_DATASET\_PRIVILEGED\_GROUPS:
+  list of non admin groups that are allowed to create datasets for groups they do not
+  belong to. If set to "#all", all users can create a dataset belonging to any group
+  with explicit pid.
+  If the pid verification is enabled, pid will be validated agains the specification passed.
+  _default_: ""
+  _format_: comma separated list of strings. Leading and trailing spaces are trimmed
+
+- PROPOSAL\_GROUPS:
+  list of non admin groups that are allowed to create and update proposals for groups
+  they do not belong to. If set to "#all", all users can create a dataset belonging to
+  any group with explicit pid.
+  _default_: ""
+  _format_: comma separated list of strings. Leading and trailing spaces are trimmed
+
+- SAMPLE\_GROUPS:
+  list of non admin groups that are allowed to create and update samples for the groups
+  they belong to. If set to "#all", all users can create a dataset belonging to their
+  group.
+  _default_: ""
+  _format_: comma separated list of strings. Leading and trailing spaces are trimmed
+
+- SAMPLE\_PRIVILEGED\_GROUPS:
+  list of non admin groups that are allowed to create samples for any groups, but can
+  only update samples belonging to groups they belong to.
+  _default_: ""
+  _format_: comma separated list of strings. Leading and trailing spaces are trimmed
+
+- ACCESS\_GROUPS\_STATIC\_VALUES:
+  List of groups assigned by default to all users. Used in the vanilla implementation
+  for easy configuration. If you do not want or need to assign any default group, it
+  should be set to empty string "".
+  _default_: ""
+  _format_: Comman separated list of strings. Leading and trailing spaces are trimmed
+  _example_: "group1,group2,group3,..."
 
 - ACCESS\_GROUP\_SERVICE\_TOKEN:
-  Access token needed to access the API specified in ACCESS\_GROUP\_SERVICE\_API\_URL, used to retrieve access groups from a third party system.  
-  _format*: string 
-
-- ACCESS\_GROUP\_SERVICE\_API\_URL:  
-  Well formed url of the service API used to provide access groups. Only one value is allowed.  
-  _format_: string  
+  Access token needed to access the API specified in ACCESS\_GROUP\_SERVICE\_API\_URL,
+  used to retrieve access groups from a third party system.
+  _format_: string
+
+- ACCESS\_GROUP\_SERVICE\_API\_URL:
+  Well formed url of the service API used to provide access groups. Only one value is
+  allowed.
+  _format_: string
   _example_: "https://my.access.group/service/api/url"
-  
-- DOI_PREFIX:  
-  The facility DOI prefix, with trailing slash.  
-  _default_: ""  
-  _format_: string  
-
-- EXPRESS\_SESSION\_SECRET:  
-  Secret used to set up express session.  
-  _default_: ""  
-  _format_: string  
-
-- LOGOUT\_URL:  
-  URL specified upon successful logout. It is returned in the json object for the frontend, or third party UI, to be used locally.  
-  _default_: ""  
-  _format_: string  
-
-- HTTP\_MAX\_REDIRECTS:  
-  Max number of redirects for http requests.  
-  _default_: 5  
-  _format_: integer  
-
-- HTTP\_TIMEOUT:  
-  Timeout from http requests in ms.  
-  _default_: 5000  
+
+- DOI_PREFIX:
+  The facility DOI prefix, with trailing slash.
+  _default_: ""
+  _format_: string
+
+- EXPRESS\_SESSION\_SECRET:
+  Secret used to set up express session.
+  _default_: ""
+  _format_: string
+
+- LOGOUT\_URL:
+  URL specified upon successful logout. It is returned in the json object for the frontend, or third party UI, to be used locally.
+  _default_: ""
+  _format_: string
+
+- HTTP\_MAX\_REDIRECTS:
+  Max number of redirects for http requests.
+  _default_: 5
   _format_: integer
-  
-- JWT_SECRET:  
-  The secret used to create any JWT token, used for authorization.  
-  _default_: ""  
-  _format_: string  
-
-- JWT\_EXPIRES\_IN:  
-  Expiration time of any JWT token in seconds.  
-  _default_: 3600 (s)  
-  _format_: integer  
-
-- JWT\_NEVER\_EXPIRES:  
-  Length of time that the never expiring jwt token will last.  
-  _default_: 100y  
-  _format_: string as in number of years  
-
-- LDAP\_URL:  
-  Full URI (including port) of your local LDAP server, if this is your selected authentication method.  
-  _default_: No default  
-  _example_: ldaps://ldap.server.com:636/   
-  _format_: string  
-
-- LDAP\_BIND\_DN:  
-  Bind DN to access information on your LDAP server.  
-  _default_: No default  
-  _format_: string  
-
-- LDAP\_BIND\_CREDENTIALS:  
-  Credentials associated with your bind DN to acccess your LDAP server.  
-  _default_: No default  
-  _format_: string  
-
-- LDAP\_SEARCH\_BASE:  
-  Search base for your LDAP server.  
-  _default_: No default   
-  _format_: string  
-
-- LDAP\_SEARCH\_FILTER:  
-  Search filter for you LDAP server.  
-  _default_: No default  
-  _format_: string   
-  _example_: "(LDAPUsername={{username}})"  
-
-- LDAP\_MODE:  
-  type of ldap server we are communicating with  
-  **_NEEDS TO BE UPDATED. Not sure which other values are accepted_**  
-  _default_: ad  
-  _format_: string  
-  _acceptable values_: ad  
-  
-- LDAP\_EXTERNAL\_ID:  
-  LDAP matching field that provides the external id  
-  _default_: sAMAccountName  
-  _format_: string  
-
-- LDAP\_USERNAME:  
-  LDAP field providing the username  
-  _default_: displayName  
-  _format_: string  
-
-- OIDC\_ISSUER:  
-  Full URL of your OIDC identity provider  
-  _default_: No default  
-  _format_: string  
-  _example_: "https://identity.your.facility/your/realm"  
-
-- OIDC\_CLIENT\_ID:  
-  Client id used to convert OIDC code to OIDC token. This is assigned in the OIDC service when the token is generated  
-  _default_: No default  
-  _format_: string  
-  _example_: "scicat"  
-
-- OIDC\_CLIENT\_SECRET:   
-  Token used to convert OIDC code to OIDC token. This is assigned in the OIDC service when the token is generated  
-  _example_: "90f1268..."  
-
-- OIDC\_CALLBACK\_URL:  
-  URL of the endpoint that is called when the authentication has been executed with the OIDC service.   
-  _default_: No default   
-  _format_: string    
-  _example_: "http://localhost:3000/api/v3/oidc/callback"  
-
-- OIDC\_SCOPE:  
-  Information returned by the OIDC service together with token  
-  _default_: No default  
-  _format_: string   
-  _example_: "openid profile email"  
-
-- OIDC\_SUCCESS\_URL:  
-  Frontend URL that the user is directed to after a successful authentication. It must be a valid frontend URL.  
-  _default_: No default  
-  _format_: string  
-  _example_: "http://localhost:3000/Datasets"  
-
-- OIDC\_ACCESS\_GROUPS:  
-  field used to retrieve access groups from the OIDC service. It is not used in the vanilla implementation.  
-  _default_: No default  
-  _format_: string  
-  _example_: "access_groups"  
-
-- OIDC\_ACCESS\_GROUPS\_PROPERTY:  
-  name of the OIDC property used to retrieve the users groups from OIDC.  
-  _default_: none  
-  _format_: string  
-
-- OIDC\_AUTO\_LOGOUT:  
-  if enabled, when login out from SciCat, we logout from OIDC also.  
-  _default_: false  
-  _format_: boolean  
-
-- OIDC\_RETURN\_URL:  
-  URL the user is redirected after a successful logout  
-  _default_: none  
-  _format_: string  
-
-- LOGBOOK\_ENABLED:  
-  Flag to enable/disable the Logbook endpoints.  
-  accept values: "yes", "no"  
-  _default_: no   
-  _format_: string  
-
-- LOGBOOK\_BASE\_URL:  
-  The base URL to the SciChat wrapper API. Only required if Logbook is enabled.  
-  _default_: "http://localhost:3030/scichatapi"  
-  _format_: string  
-
-- LOGBOOK\_USERNAME:  
-  The username used to authenticate to the SciChat wrapper API. Only required if Logbook is enabled.  
-  _default_: No default  
-  _format_: string  
-
-- LOGBOOK\_PASSWORD:  
-  The password used to authenticate to the SciChat wrapper API. Only required if Logbook is enabled.  
-  _default_: No default  
-  _format_: string  
-
-- METADATA\_KEYS\_RETURN\_LIMIT:  
-  The maximum number of keys returned by the `/Datasets/metadataKeys` endpoint.  
-  _default_: No default  
-  _format_: integer  
-
-- METADATA\_PARENT\_INSTANCES\_RETURN\_LIMIT:  
-  The maximum number of Datasets used to extract metadata keys in the `/Datasets/metadataKeys` endpoint.  
-  _default_: No default  
-  _format_: integer  
-
-- MONGODB\_URI:  
-  The URI for your MongoDB instance.  
-  _default_: No default  
-  _format_: string "mongodb://<USERNAME>:<PASSWORD>@<HOST>:27017/<DB_NAME>"  
-  
-- OAI\_PROVIDER\_ROUTE:  
-  URI to OAI provider, which is used in the `/publisheddata/:id/resync` endpoint.  
-  _default_: no default  
-  _format_: string  
-
-- PID\_PREFIX:  
-  The facility PID prefix, with trailing slash.  
-  _default_: no default  
-  _format_: string  
-
-- PUBLIC\_URL\_PREFIX:  
-  The base URL to the facility Landing Page.  
-  _default_: No default  
-  _format_: string  
-  _example_: "https://doi.ess.eu/detail/"  
-
-- PORT:  
-  The port on which the backend listen on.  
-  _default_: 3000  
-  _format_: integer  
-
-- RABBITMQ\_ENABLED:  
-  Flag to enable/disable RabbitMQ consumer.  
-  accepted values: "yes", "no"  
-  _deprecated_. Will be removed in future releases.  
-  _default_: no  
-  _format_: string  
-  
-- RABBITMQ\_HOSTNAME:  
-  The hostname of the RabbitMQ message broker. Only required if RabbitMQ is enabled.  
-  _deprecated_. Will be removed in future releases.  
-  _default_: no default  
-  _default_: string  
-
-- RABBITMQ\_USERNAME:  
-  The username used to authenticate to the RabbitMQ message broker. Only required if RabbitMQ is enabled.  
-  _deprecated_. Will be removed in future releases.  
-  _default_: no default  
-  _format_: string  
-
-- RABBITMQ\_PASSWORD:  
-  The password used to authenticate to the RabbitMQ message broker. Only required if RabbitMQ is
-  enabled.  
-  _deprecated_. Will be removed in future releases.  
-  _default_: no default  
-  _format_: string  
-
-- REGISTER\_DOI\_URI:  
-  URI to the organization that registers the facilities DOIs.  
-  _default_: no default  
-  _format_: string  
-  _example_: "https://mds.test.datacite.org/doi"  
-
-- REGISTER\_METADATA\_URI:  
-  URI to the organization that registers the facilities published data metadata.  
-  _default_: no default  
-  _format_: string  
-  _example_: ="https://mds.test.datacite.org/metadata"  
+
+- HTTP\_TIMEOUT:
+  Timeout from http requests in ms.
+  _default_: 5000
+  _format_: integer
+
+- JWT_SECRET:
+  The secret used to create any JWT token, used for authorization.
+  _default_: ""
+  _format_: string
+
+- JWT\_EXPIRES\_IN:
+  Expiration time of any JWT token in seconds.
+  _default_: 3600 (s)
+  _format_: integer
+
+- JWT\_NEVER\_EXPIRES:
+  Length of time that the never expiring jwt token will last.
+  _default_: 100y
+  _format_: string as in number of years
+
+- LDAP\_URL:
+  Full URI (including port) of your local LDAP server, if this is your selected
+  authentication method.
+  _default_: No default
+  _example_: ldaps://ldap.server.com:636/
+  _format_: string
+
+- LDAP\_BIND\_DN:
+  Bind DN to access information on your LDAP server.
+  _default_: No default
+  _format_: string
+
+- LDAP\_BIND\_CREDENTIALS:
+  Credentials associated with your bind DN to acccess your LDAP server.
+  _default_: No default
+  _format_: string
+
+- LDAP\_SEARCH\_BASE:
+  Search base for your LDAP server.
+  _default_: No default
+  _format_: string
+
+- LDAP\_SEARCH\_FILTER:
+  Search filter for you LDAP server.
+  _default_: No default
+  _format_: string
+  _example_: "(LDAPUsername={{username}})"
+
+- LDAP\_MODE:
+  type of ldap server we are communicating with.
+  **_(NEEDS TO BE UPDATED. Not sure which other values are accepted)_**
+  _default_: ad
+  _format_: string
+  _acceptable values_: ad
+
+- LDAP\_EXTERNAL\_ID:
+  LDAP matching field that provides the external id
+  _default_: sAMAccountName
+  _format_: string
+
+- LDAP\_USERNAME:
+  LDAP field providing the username
+  _default_: displayName
+  _format_: string
+
+- OIDC\_ISSUER:
+  Full URL of your OIDC identity provider
+  _default_: No default
+  _format_: string
+  _example_: "https://identity.your.facility/your/realm"
+
+- OIDC\_CLIENT\_ID:
+  Client id used to convert OIDC code to OIDC token. This is assigned in the OIDC
+  service when the token is generated
+  _default_: No default
+  _format_: string
+  _example_: "scicat"
+
+- OIDC\_CLIENT\_SECRET:
+  Token used to convert OIDC code to OIDC token. This is assigned in the OIDC service
+  when the token is generated
+  _example_: "90f1268..."
+
+- OIDC\_CALLBACK\_URL:
+  URL of the endpoint that is called when the authentication has been executed with the
+  OIDC service.
+  _default_: No default
+  _format_: string
+  _example_: "http://localhost:3000/api/v3/oidc/callback"
+
+- OIDC\_SCOPE:
+  Information returned by the OIDC service together with token
+  _default_: No default
+  _format_: string
+  _example_: "openid profile email"
+
+- OIDC\_SUCCESS\_URL:
+  Frontend URL that the user is directed to after a successful authentication. It must
+  be a valid frontend URL.
+  _default_: No default
+  _format_: string
+  _example_: "http://localhost:3000/Datasets"
+
+- OIDC\_ACCESS\_GROUPS:
+  field used to retrieve access groups from the OIDC service. It is not used in the
+  vanilla implementation.
+  _default_: No default
+  _format_: string
+  _example_: "access_groups"
+
+- OIDC\_ACCESS\_GROUPS\_PROPERTY:
+  name of the OIDC property used to retrieve the users groups from OIDC.
+  _default_: none
+  _format_: string
+
+- OIDC\_AUTO\_LOGOUT:
+  if enabled, when login out from SciCat, we logout from OIDC also.
+  _default_: false
+  _format_: boolean
+
+- OIDC\_RETURN\_URL:
+  URL the user is redirected after a successful logout
+  _default_: none
+  _format_: string
+
+- LOGBOOK\_ENABLED:
+  Flag to enable/disable the Logbook endpoints.
+  accept values: "yes", "no"
+  _default_: no
+  _format_: string
+
+- LOGBOOK\_BASE\_URL:
+  The base URL to the SciChat wrapper API. Only required if Logbook is enabled.
+  _default_: "http://localhost:3030/scichatapi"
+  _format_: string
+
+- LOGBOOK\_USERNAME:
+  The username used to authenticate to the SciChat wrapper API. Only required if Logbook
+  is enabled.
+  _default_: No default
+  _format_: string
+
+- LOGBOOK\_PASSWORD:
+  The password used to authenticate to the SciChat wrapper API. Only required if Logbook
+  is enabled.
+  _default_: No default
+  _format_: string
+
+- METADATA\_KEYS\_RETURN\_LIMIT:
+  The maximum number of keys returned by the `/Datasets/metadataKeys` endpoint.
+  _default_: No default
+  _format_: integer
+
+- METADATA\_PARENT\_INSTANCES\_RETURN\_LIMIT:
+  The maximum number of Datasets used to extract metadata keys in the
+  `/Datasets/metadataKeys` endpoint.
+  _default_: No default
+  _format_: integer
+
+- MONGODB\_URI:
+  The URI for your MongoDB instance.
+  _default_: No default
+  _format_: string "mongodb://<USERNAME>:<PASSWORD>@<HOST>:27017/<DB_NAME>"
+
+- OAI\_PROVIDER\_ROUTE:
+  URI to OAI provider, which is used in the `/publisheddata/:id/resync` endpoint.
+  _default_: no default
+  _format_: string
+
+- PID\_PREFIX:
+  The facility PID prefix, with trailing slash.
+  _default_: no default
+  _format_: string
+
+- PUBLIC\_URL\_PREFIX:
+  The base URL to the facility Landing Page.
+  _default_: No default
+  _format_: string
+  _example_: "https://doi.ess.eu/detail/"
+
+- PORT:
+  The port on which the backend listen on.
+  _default_: 3000
+  _format_: integer
+
+- RABBITMQ\_ENABLED:
+  Flag to enable/disable RabbitMQ consumer.
+  accepted values: "yes", "no"
+  _deprecated_. Will be removed in future releases.
+  _default_: no
+  _format_: string
+
+- RABBITMQ\_HOSTNAME:
+  The hostname of the RabbitMQ message broker. Only required if RabbitMQ is enabled.
+  _deprecated_. Will be removed in future releases.
+  _default_: no default
+  _default_: string
+
+- RABBITMQ\_USERNAME:
+  The username used to authenticate to the RabbitMQ message broker. Only required if
+  RabbitMQ is enabled.
+  _deprecated_. Will be removed in future releases.
+  _default_: no default
+  _format_: string
+
+- RABBITMQ\_PASSWORD:
+  The password used to authenticate to the RabbitMQ message broker. Only required if
+  RabbitMQ is enabled.
+  _deprecated_. Will be removed in future releases.
+  _default_: no default
+  _format_: string
+
+- REGISTER\_DOI\_URI:
+  URI to the organization that registers the facilities DOIs.
+  _default_: no default
+  _format_: string
+  _example_: "https://mds.test.datacite.org/doi"
+
+- REGISTER\_METADATA\_URI:
+  URI to the organization that registers the facilities published data metadata.
+  _default_: no default
+  _format_: string
+  _example_: ="https://mds.test.datacite.org/metadata"
 
 - DOI\_USERNAME:
-  Username used to authenticate on the DOI site  
-  _default_: no default  
-  _format_: string  
-
-- DOI\_PASSWORD:  
-  Password used to authenticate on the DOI site  
-  _default_: no default  
-  _format_: string  
-
-- SITE:  
-  The name of your site.  
-  _default_: no default  
-  _format_: string  
-
-- SMTP\_HOST:  
-  Host of SMTP server.  
-  _deprecated_. Will be removed in future releases.  
-  _default_: no default  
-  _format_: string  
-
-- SMTP\_MESSAGE\_FROM:  
-  Email address that emails should be sent from.  
-  _deprecated_. Will be removed in future releases.  
-  _default_: no default  
-  _format_: string, email  
-
-- SMTP\_PORT:  
-  Port of SMTP server.  
-  _deprecated_. Will be removed in future releases.  
-  _default_: no default  
-  _format_: string  
-
-- SMTP\_SECURE:  
-  Secure of SMTP server.  
-  _deprecated_. Will be removed in future releases.  
-  _default_: no default  
-  _format_: string  
-
-- POLICY\_PUBLICATION\_SHIFT:  
-  Number of years that needs to elapse before the dataset is made publicly acceessible  
-  _default_: 3  
-  _format_: integer  
-
-- POLICY\_RETENTION\_SHIFT:  
-  Number of years that the datasets are kept online before are archived or deleted. A negative value means that they are never archived/deleted  
-  _default_: -1  
-  _format_: integer  
-
-- ELASTICSEARCH\_ENABLED:  
-  Flag to enable/disable the ElasticSearch service  
-  accept values: "yes", "no"  
-  _default_: no default  
-  _format_: string  
-
-- ES\_HOST:  
-  The base URL to the Elasticsearch cluster. Use `http` if xpack.security is disabled  
-  _default_: no default  
-  _format_: string   
-  _example_: "https://localhost:9200" or "http://localhost:9200"  
-
-- MONGODB\_COLLECTION:  
-  Collection name to be mapped into specified Elasticsearch index  
-  _default_: no default  
-  _format_: string  
- 
-- ES\_MAX\_RESULT:   
-  Maximum records can be indexed into Elasticsearch.  
-  _default_: 10000  
-  _format_: number  
-
-- ES\_FIELDS\_LIMIT:   
-  The total number of fields in an index.  
-  _default_: 1000  
-  _format_: number  
-
-- ES\_INDEX:  
-  The total number of fields in an index.  
-  _default_: no default  
-  _format_: string  
-
-- ES\_REFRESH:  
-  The total number of fields in an index.  
-  accept values: true, false, "wait_for"  
-  _default_: false  
-  _format_: boolean or string  
-
-- ES\_USERNAME:  
-  Elasticsearch cluster username.  
-  _default_: no default, optional.  
-  _format_: string  
-
-- ELASTIC\_PASSWORD:   
-  Elasticsearch cluster password.  
-  _default_: no default.  
-  _format_: string  
+  Username used to authenticate on the DOI site
+  _default_: no default
+  _format_: string
+
+- DOI\_PASSWORD:
+  Password used to authenticate on the DOI site
+  _default_: no default
+  _format_: string
+
+- SITE:
+  The name of your site.
+  _default_: no default
+  _format_: string
+
+- SMTP\_HOST:
+  Host of SMTP server.
+  _deprecated_. Will be removed in future releases.
+  _default_: no default
+  _format_: string
+
+- SMTP\_MESSAGE\_FROM:
+  Email address that emails should be sent from.
+  _deprecated_. Will be removed in future releases.
+  _default_: no default
+  _format_: string, email
+
+- SMTP\_PORT:
+  Port of SMTP server.
+  _deprecated_. Will be removed in future releases.
+  _default_: no default
+  _format_: string
+
+- SMTP\_SECURE:
+  Secure of SMTP server.
+  _deprecated_. Will be removed in future releases.
+  _default_: no default
+  _format_: string
+
+- POLICY\_PUBLICATION\_SHIFT:
+  Number of years that needs to elapse before the dataset is made publicly acceessible
+  _default_: 3
+  _format_: integer
+
+- POLICY\_RETENTION\_SHIFT:
+  Number of years that the datasets are kept online before are archived or deleted. A
+  negative value means that they are never archived/deleted
+  _default_: -1
+  _format_: integer
+
+- ELASTICSEARCH\_ENABLED:
+  Flag to enable/disable the ElasticSearch service
+  accept values: "yes", "no"
+  _default_: no default
+  _format_: string
+
+- ES\_HOST:
+  The base URL to the Elasticsearch cluster. Use `http` if xpack.security is disabled
+  _default_: no default
+  _format_: string
+  _example_: "https://localhost:9200" or "http://localhost:9200"
+
+- MONGODB\_COLLECTION:
+  Collection name to be mapped into specified Elasticsearch index
+  _default_: no default
+  _format_: string
+
+- ES\_MAX\_RESULT:
+  Maximum records can be indexed into Elasticsearch.
+  _default_: 10000
+  _format_: number
+
+- ES\_FIELDS\_LIMIT:
+  The total number of fields in an index.
+  _default_: 1000
+  _format_: number
+
+- ES\_INDEX:
+  The total number of fields in an index.
+  _default_: no default
+  _format_: string
+
+- ES\_REFRESH:
+  The total number of fields in an index.
+  accept values: true, false, "wait_for"
+  _default_: false
+  _format_: boolean or string
+
+- ES\_USERNAME:
+  Elasticsearch cluster username.
+  _default_: no default, optional.
+  _format_: string
+
+- ELASTIC\_PASSWORD:
+  Elasticsearch cluster password.
+  _default_: no default.
+  _format_: string

From caf189456705fde534f7d282fd8b2768e561d25d Mon Sep 17 00:00:00 2001
From: Spencer Bliven <spencer.bliven@gmail.com>
Date: Mon, 14 Oct 2024 15:47:50 +0200
Subject: [PATCH 3/5] Add jobconfig documentation

---
 Development/v4.x/backend/configuration.md     |   5 +
 .../v4.x/backend/configuration/jobconfig.md   | 185 ++++++++++++++++++
 README.md                                     |   2 +-
 SUMMARY.md                                    |   1 +
 4 files changed, 192 insertions(+), 1 deletion(-)
 create mode 100644 Development/v4.x/backend/configuration/jobconfig.md

diff --git a/Development/v4.x/backend/configuration.md b/Development/v4.x/backend/configuration.md
index e7618a0a..f3a6eed7 100644
--- a/Development/v4.x/backend/configuration.md
+++ b/Development/v4.x/backend/configuration.md
@@ -430,3 +430,8 @@ _src/config/configuration.ts_
   Elasticsearch cluster password.
   _default_: no default.
   _format_: string
+
+- JOB\_CONFIGURATION\_FILE:
+  Configuration file for [job actions](configuration/jobconfig.md).
+  _default_: "jobConfig.json", optional.
+  _format_: string
diff --git a/Development/v4.x/backend/configuration/jobconfig.md b/Development/v4.x/backend/configuration/jobconfig.md
new file mode 100644
index 00000000..1d16c41c
--- /dev/null
+++ b/Development/v4.x/backend/configuration/jobconfig.md
@@ -0,0 +1,185 @@
+# Job Configuration
+
+> _**Development Feature**. This section documents features that are still under
+> development as part of the `release-jobs` branch._
+
+## Overview
+
+The SciCat job system is used for any interactions between SciCat and external services.
+
+Example jobs include:
+
+- Request an **archive system** to *archive* or *retrieve* data from tape storage
+- Move data to a *public* location (e.g. to access data from a [DOI landing
+  page](https://github.com/SciCatProject/LandingPageServer))
+- Run maintenance tasks such as emailing users
+
+### Job lifecycle
+
+Jobs follow a standard Create-Read-Update-Delete (CRUD) lifecycle:
+
+1. Jobs are _created_ via a `POST` request. This can be the result of a frontend
+   interaction (eg selecting a dataset for publishing) or through the REST API.
+
+   The body of the request should follow the CreateJobDto (Data Transfer Object):
+   ```
+   {
+    "type": "archive",
+    "ownerUser": "owner",
+    "ownerGroup": "group",
+    "contactEmail": "email"
+    "jobParams": {},
+    }
+   ```
+2. Jobs can be _read_ via a `GET` request to `/jobs/:id` or through the `/jobs/fullquery`
+   search endpoint. The frontend uses this to display the list of jobs.
+3. Jobs are _updated_ via a `PATCH` or `PUT` request to `/jobs/:id`. This is usually
+   used by facility services to update the job status and provide feedback.
+
+   The body of the request should follow the UpdateJobDto:
+   ```
+   {
+     "statusCode": "string",
+     "statusMessage": "string",
+     "jobResultObject": {}
+   }
+   ```
+4. Jobs may be _deleted_ periodically during maintenance. This is usually not done by
+   users.
+
+### Actions
+
+After _create_ and _update_ stages a series of actions can be performed by SciCat. This
+can be things like sending an email, posting a message to a message broker, or calling
+an API. The `jobParams` and `jobResultObject` are used to add additional information
+that the actions may need, such as the list of datasets the job refers to.
+
+A full list of built-in actions is given below. A plugin mechanism for registering new
+actions is also planned for a future SciCat release.
+
+## Configuration
+
+In SciCat v3.x, a limited number of jobs were hard-coded into the code base. This was
+changed in v4.x to allow each site to configure their own set of jobs and customize
+actions based on the job status.
+
+The available jobs are configured in the file `jobConfig.json` (or can be overridden
+with the `JOB_CONFIGURATION_FILE` [environment
+variable](../configuration.md#environment-variables)). An example `jobConfig.json` file
+is available
+[here](https://github.com/SciCatProject/scicat-backend-next/blob/release-jobs/src/jobs/config/jobConfig.example.json).
+
+### Configuration overview
+The top-level configuration is structured like this:
+
+```
+{
+  "configVersion": "v1.0",
+  "jobs": [
+    {
+      "jobType": "archive",
+      "create": {
+        "auth": "#all",
+        "actions": [...]
+      },
+      "update": {
+        "auth": "archivemanager",
+        "actions": [...]
+      }
+    }
+  ]
+}
+```
+
+- `configVersion` is a string that indicates the version of this configuration file. It
+  is not used by SciCat itself, but is useful for migrating jobs if the configuration
+  changes. SciCat will log a warning if a job was updated with a different config
+  version than it was created with.
+- `jobs` is an array allowing the configuration of different job types
+- `jobType` can be defined for each SciCat instance, but the names `archive`,
+  `retrieve`, and `public` are traditionally the most common jobs. Only jobs matching a
+  configured jobType will be accepted by the backend.
+- `create` and `update` correspond to `POST` and `PATCH` requests to the `/jobs`
+  endpoint. These configure 'actions' which are run when at different phases of the job
+  lifecycle. The actions are defined in the [job actions section](#job-actions).
+- `auth` configures the roles authorized to use the endpoint for each job operation.
+
+### Authorization
+
+Values for `auth` are described in [Jobs Authorization](../authorization/authorization_jobs.md). Some authorization values may require certain information to be passed in the request body; for instance, `"#datasetOwner"` requires that a dataset be passed.
+
+### Actions Configuration
+
+
+#### URLAction
+
+**Configuration**:
+```
+{
+  "actionType": "url",
+  "url": "http://localhost:3000/api/v3/health?jobid={{id}}",
+  "method": "GET",
+  "headers": {
+    "accept": "application/json"
+  }
+},
+```
+
+#### Validate
+
+**Configuration**:
+```
+{
+  "actionType": "validate",
+  "request": {
+    "jobParams.datasetIds[*]": {
+      "type": "object",
+      "required": ["pid","files"]
+    }
+  }
+}
+```
+
+#### Email
+
+**Configuration**:
+```
+{
+  "actionType": "email",
+  "auth": {
+    "user": "user",
+    "password": "password"
+  },
+  "to": "{{contactEmail}}",
+  "from": "from",
+  "subject": "[SciCat] Your {{type}} job was submitted successfully",
+  "bodyTemplateFile": "src/common/email-templates/job-template-simplified.html"
+}
+```
+
+#### RabbitMQ
+
+**Configuration**:
+```
+{
+  "actionType": "rabbitmq",
+  "hostname": "rabbitmq",
+  "port": 5672,
+  "username": "guest",
+  "password": "guest",
+  "exchange": "jobs.write",
+  "queue": "client.jobs.write",
+  "key": "jobqueue"
+}
+```
+
+#### Log
+
+**Configuration**:
+```
+{
+  "actionType": "log"
+}
+```
+
+This is a dummy action, useful for debugging. It adds a log entry when executed.
diff --git a/README.md b/README.md
index 10298715..f0adbfd4 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@ See the [SciCat Home Webpage](https://scicatproject.github.io) for an overview o
 
 ## Structure of Documentation
 
-The documentaion is split into the following chapters:
+The documentation is split into the following chapters:
 
 * [User Guide](Users) - Users of the system can come here to see screen captures, FAQs and find resources on how to better understand SciCat.
 * [Operator Guide](Operator) - System admins read this part to set up SciCat for their location
diff --git a/SUMMARY.md b/SUMMARY.md
index 44f2f741..1c2deea2 100644
--- a/SUMMARY.md
+++ b/SUMMARY.md
@@ -43,6 +43,7 @@
     - [Running the Components](Development/v4.x/running.md)
     - [Configuration](Development/v4.x/configuration.md)
       - [Backend](Development/v4.x/backend/configuration.md)
+        - [Jobs](Development/v4.x/backend/configuration/jobconfig.md)
       - [Authorization](Development/v4.x/backend/authorization.md)
         - [Datasets](Development/v4.x/backend/authorization/authorization_datasets.md)
         - [OrigDatablocks](Development/v4.x/backend/authorization/authorization_origdatablocks.md)

From 23ac019791d0c9e94ec057255d01047112203260 Mon Sep 17 00:00:00 2001
From: Spencer Bliven <spencer.bliven@gmail.com>
Date: Tue, 29 Oct 2024 15:08:44 +0100
Subject: [PATCH 4/5] Convert json job configuration to yaml

Documents scicat-backend-next#1463
---
 Development/v4.x/backend/configuration.md     |   2 +-
 .../v4.x/backend/configuration/jobconfig.md   | 126 ++++++++++--------
 2 files changed, 69 insertions(+), 59 deletions(-)

diff --git a/Development/v4.x/backend/configuration.md b/Development/v4.x/backend/configuration.md
index f3a6eed7..f7baed0d 100644
--- a/Development/v4.x/backend/configuration.md
+++ b/Development/v4.x/backend/configuration.md
@@ -433,5 +433,5 @@ _src/config/configuration.ts_
 
 - JOB\_CONFIGURATION\_FILE:
   Configuration file for [job actions](configuration/jobconfig.md).
-  _default_: "jobConfig.json", optional.
+  _default_: "jobConfig.yaml", optional.
   _format_: string
diff --git a/Development/v4.x/backend/configuration/jobconfig.md b/Development/v4.x/backend/configuration/jobconfig.md
index 1d16c41c..c1a93b1b 100644
--- a/Development/v4.x/backend/configuration/jobconfig.md
+++ b/Development/v4.x/backend/configuration/jobconfig.md
@@ -73,22 +73,17 @@ is available
 The top-level configuration is structured like this:
 
 ```
-{
-  "configVersion": "v1.0",
-  "jobs": [
-    {
-      "jobType": "archive",
-      "create": {
-        "auth": "#all",
-        "actions": [...]
-      },
-      "update": {
-        "auth": "archivemanager",
-        "actions": [...]
-      }
-    }
-  ]
-}
+configVersion: v1.0
+jobs:
+  - jobType: archive
+    create:
+      auth: "#all"
+      actions:
+        - ...
+    update:
+      auth: archivemanager
+      actions:
+        - ...
 ```
 
 - `configVersion` is a string that indicates the version of this configuration file. It
@@ -110,76 +105,91 @@ Values for `auth` are described in [Jobs Authorization](../authorization/authori
 
 ### Actions Configuration
 
+The following actions are built-in to SciCat and can be included in the `actions` array.
 
 #### URLAction
 
+Makes a URL request. Most fields can be templated with the job DTO and body.
+
 **Configuration**:
 ```
-{
-  "actionType": "url",
-  "url": "http://localhost:3000/api/v3/health?jobid={{id}}",
-  "method": "GET",
-  "headers": {
-    "accept": "application/json"
-  }
-},
+- actionType: url
+  url: http://localhost:3000/api/v3/health?jobid={{id}}
+  method: GET
+  headers:
+    accept: application/json
 ```
 
 #### Validate
 
+The `validate` action is used to check validate requests to the job endpoints. It is
+used to enforce custom constraints on `jobParams` or `jobResultObject` for each job
+type. If other actions rely on custom fields in their templates they should first be
+validated with this action.
+
 **Configuration**:
+ValidateAction is configured with a single parameter, `request`, which is checked
+against the request body (aka the DTO). The config file will look like this:
 ```
-{
-  "actionType": "validate",
-  "request": {
-    "jobParams.datasetIds[*]": {
-      "type": "object",
-      "required": ["pid","files"]
-    }
-  }
-}
+- actionType: validate
+  request:
+    <path>: <typecheck>
+    ...
+```
+
+Usually `<path>` will be a dot-delimited field in the DTO, eg. "jobParams.name".
+Technically it is a [JSONPath-Plus](https://github.com/JSONPath-Plus/JSONPath)
+expression, which is applied to the request body to extract any matching items.
+When writing a jobconfig file it may be helpful to test an expected request body
+against the [JSONPath demo](https://jsonpath-plus.github.io/JSONPath/demo/).
+
+The `<typecheck>` expression is a JSON Schema. While complicated schemas are possible,
+the combination with JSONPath makes common type checks very concise and legible.
+Here are some example `<typecheck>` expressions:
+
+```
+- actionType: validate
+  request:
+    jobParams.name: # match simple types
+      type: string
+    jobParams.answers[*]: # literal values (here applied to an array)
+      enum: ["yes", "no"]
+    jobResultObject.archivable: # enforce a value
+      const: true
+    "jobParams": # Apply external JSON Schema to all params
+      $ref: https://json.schemastore.org/schema-org-thing.json
 ```
 
 #### Email
 
 **Configuration**:
 ```
-{
-  "actionType": "email",
-  "auth": {
-    "user": "user",
-    "password": "password"
-  },
-  "to": "{{contactEmail}}",
-  "from": "from",
-  "subject": "[SciCat] Your {{type}} job was submitted successfully",
-  "bodyTemplateFile": "src/common/email-templates/job-template-simplified.html"
-}
+- actionType: email
+  to: "{{contactEmail}}"
+  subject: "[SciCat] Your {{type}} job was submitted successfully"
+  bodyTemplateFile: src/common/email-templates/job-template-simplified.html
 ```
 
 #### RabbitMQ
 
 **Configuration**:
 ```
-{
-  "actionType": "rabbitmq",
-  "hostname": "rabbitmq",
-  "port": 5672,
-  "username": "guest",
-  "password": "guest",
-  "exchange": "jobs.write",
-  "queue": "client.jobs.write",
-  "key": "jobqueue"
-}
+- actionType: rabbitmq
+  exchange: jobs.write
+  queue: client.jobs.write
+  key: jobqueue
 ```
 
+The RabbitMQ connection must first be configured through environmental variables
+as described in [configuration](./configuration.md).
+
 #### Log
 
+This is a dummy action, useful for debugging. It adds a log entry when executed.
+
 **Configuration**:
 ```
-{
-  "actionType": "log"
-}
+- actionType: log
 ```
 
-This is a dummy action, useful for debugging. It adds a log entry when executed.
+The log action does not have any configuration options.

From 0da65eb9934ebc2b5c61876c49be202cdbd905c5 Mon Sep 17 00:00:00 2001
From: Spencer Bliven <spencer.bliven@gmail.com>
Date: Sun, 3 Nov 2024 23:37:42 +0100
Subject: [PATCH 5/5] Update job documentation

- Document ValidateAction changes (#1473)
- Convert examples to YAML (#1463)
---
 .../v4.x/backend/configuration/jobconfig.md   | 194 +++++++++++++++---
 1 file changed, 170 insertions(+), 24 deletions(-)

diff --git a/Development/v4.x/backend/configuration/jobconfig.md b/Development/v4.x/backend/configuration/jobconfig.md
index c1a93b1b..d3068d25 100644
--- a/Development/v4.x/backend/configuration/jobconfig.md
+++ b/Development/v4.x/backend/configuration/jobconfig.md
@@ -12,24 +12,37 @@ Example jobs include:
 - Request an **archive system** to *archive* or *retrieve* data from tape storage
 - Move data to a *public* location (e.g. to access data from a [DOI landing
   page](https://github.com/SciCatProject/LandingPageServer))
-- Run maintenance tasks such as emailing users
+- Run maintenance tasks such as emailing users.
+
+If you just plan to use SciCat for cataloging data and don't plan to use its data
+management features then you may not need any job types. If no job types are configured then SciCat will reject any backend requests to create jobs. In this case [frontend features](../../frontend/configuration.md) for archiving (`archiveWorkflowEnabled: false`) and retrieval should be disabled.
+
+### Migration Notes
+
+In v3.x the `archive`, `retrieve`, and `public` jobs were hard-coded. In v4.x the job
+types can be arbitrary strings; however we recommend using the standard job names to
+avoid confusion.
+
+Also note that some checks that were preformed by default in v3.x for certain job types
+must now be configured explicitly as actions. These are included in the provided
+`jobConfig.example.yaml` file and are also noted below.
 
 ### Job lifecycle
 
 Jobs follow a standard Create-Read-Update-Delete (CRUD) lifecycle:
 
-1. Jobs are _created_ via a `POST` request. This can be the result of a frontend
+1. Jobs are _created_ via a `POST` request to `/jobs`. This can be the result of a frontend
    interaction (eg selecting a dataset for publishing) or through the REST API.
 
    The body of the request should follow the CreateJobDto (Data Transfer Object):
-   ```
+   ```json
    {
-    "type": "archive",
-    "ownerUser": "owner",
-    "ownerGroup": "group",
-    "contactEmail": "email"
-    "jobParams": {},
-    }
+     "type": "archive",
+     "ownerUser": "owner",
+     "ownerGroup": "group",
+     "contactEmail": "email@example.com"
+     "jobParams": {}
+   }
    ```
 2. Jobs can be _read_ via a `GET` request to `/jobs/:id` or through the `/jobs/fullquery`
    search endpoint. The frontend uses this to display the list of jobs.
@@ -63,11 +76,11 @@ In SciCat v3.x, a limited number of jobs were hard-coded into the code base. Thi
 changed in v4.x to allow each site to configure their own set of jobs and customize
 actions based on the job status.
 
-The available jobs are configured in the file `jobConfig.json` (or can be overridden
+The available jobs are configured in the file `jobConfig.yaml` (or can be overridden
 with the `JOB_CONFIGURATION_FILE` [environment
-variable](../configuration.md#environment-variables)). An example `jobConfig.json` file
+variable](../configuration.md#environment-variables)). An example `jobConfig.example.yaml` file
 is available
-[here](https://github.com/SciCatProject/scicat-backend-next/blob/release-jobs/src/jobs/config/jobConfig.example.json).
+[here](https://github.com/SciCatProject/scicat-backend-next/blob/release-jobs/jobConfig.example.yaml).
 
 ### Configuration overview
 The top-level configuration is structured like this:
@@ -77,7 +90,7 @@ configVersion: v1.0
 jobs:
   - jobType: archive
     create:
-      auth: "#all"
+      auth: "#datasetOwner"
       actions:
         - ...
     update:
@@ -98,11 +111,14 @@ jobs:
   endpoint. These configure 'actions' which are run when at different phases of the job
   lifecycle. The actions are defined in the [job actions section](#job-actions).
 - `auth` configures the roles authorized to use the endpoint for each job operation.
+- `actions` give a list of actions to run when the endpoint is called.
 
 ### Authorization
 
 Values for `auth` are described in [Jobs Authorization](../authorization/authorization_jobs.md). Some authorization values may require certain information to be passed in the request body; for instance, `"#datasetOwner"` requires that a dataset be passed.
 
+> **Caution** Setting `auth` to a permissive value (eg `#all`) could expose archiving services to external users. Please consider the security model carefully when configuring jobs.
+
 ### Actions Configuration
 
 The following actions are built-in to SciCat and can be included in the `actions` array.
@@ -112,7 +128,8 @@ The following actions are built-in to SciCat and can be included in the `actions
 Makes a URL request. Most fields can be templated with the job DTO and body.
 
 **Configuration**:
-```
+
+```yml
 - actionType: url
   url: http://localhost:3000/api/v3/health?jobid={{id}}
   method: GET
@@ -120,6 +137,8 @@ Makes a URL request. Most fields can be templated with the job DTO and body.
     accept: application/json
 ```
 
+> **TODO** Expand this section.
+
 #### Validate
 
 The `validate` action is used to check validate requests to the job endpoints. It is
@@ -127,20 +146,139 @@ used to enforce custom constraints on `jobParams` or `jobResultObject` for each
 type. If other actions rely on custom fields in their templates they should first be
 validated with this action.
 
-**Configuration**:
-ValidateAction is configured with a single parameter, `request`, which is checked
-against the request body (aka the DTO). The config file will look like this:
+ValidateAction is configured with a series of `<path>: <typecheck>` pairs which describe
+a constraint to be validated. These can be applied to different contexts:
+- **`request`** - Checks the incoming request body (aka the DTO).
+- **`datasets`** - (CREATE only) requires that a list of datasets be included in
+  `jobParams.datasetList`. Checks are applied to each dataset
+
+Validation occurs before the job gets created in the database, while most other actions
+are performed after the job is created. This means that correctly configuring validation
+is important to detect user errors early.
+
+Configuration is described in detail below. However, a few illustrative examples are
+provided first.
+
+##### Example 1: Require extra template data
+
+Consider a case where you want to pass a value from the request body through to other
+actions. For example, you want to allow the requestor to specify the subject of an email
+to be sent in the request body like this:
+
+```json
+POST /jobs
+{
+  "type": "email_demo",
+  "jobParams": {
+    "subject": "Thanks for using scicat!"
+  }
+}
 ```
+
+In this case an `email` action would be configured using handlebars to insert the
+`jobParams.subject` value. However, a `validate` action should also be configured to
+catch errors early where the subject is not specified:
+
+`jobConfig.yaml`:
+```
+jobs:
+  - jobType: email_demo
+    create:
+      auth: admin
+      actions:
+        - actionType: validate
+          request:
+            jobParams.subject:
+              type: string
+        - actionType: email
+          to: "{{contactEmail}}"
+          subject: "[SciCat] {{jobParams.subject}}"
+          bodyTemplate: demo_email.html
+    update:
+      auth: admin
+      actions: []
+```
+
+##### Example 2: Enforce datasetLifecycle state
+
+Many job types require a dataset to be include. These are specified in
+`jobParams.datasetList` like this:
+
+```json
+POST /jobs
+{
+  "owner"...
+  "jobParams": {
+    "datasetList": [
+      {
+        "pid": "examplePID",
+        "files": []
+      }
+    ]
+  }
+}
+```
+
+Permission to access to the datasets is checked with the `#dataset*` authentication
+methods, but other properties need to be enforced with a `validate` action.
+
+The following validate actions are recommended for `archive`, `retrieve` and `publish`
+jobs:
+
+`jobConfig.yaml`:
+```yml
+jobs:
+  - jobType: archive
+    create:
+      auth: "#datasetOwner"
+      actions:
+        - actionType: validate
+          datasets:
+            datasetlifecycle.archivable:
+              const: true
+    statusUpdate:
+      auth: archivemanager
+      actions: []
+  - jobType: retrieve
+    create:
+      auth: "#datasetOwner"
+      actions:
+        - actionType: validate
+          datasets:
+            datasetlifecycle.retrievable:
+              const: true
+    statusUpdate:
+      auth: archivemanager
+      actions: []
+  - jobType: public
+    create:
+      auth: "#all"
+      actions:
+        - actionType: validate
+          datasets:
+            isPublished:
+              const: true
+    statusUpdate:
+      auth: archivemanager
+```
+
+
+##### Configuration
+
+The config file for a validate action will look like this:
+
+```yml
 - actionType: validate
   request:
     <path>: <typecheck>
-    ...
+  datasets:
+    <path>: <typecheck>
 ```
 
 Usually `<path>` will be a dot-delimited field in the DTO, eg. "jobParams.name".
 Technically it is a [JSONPath-Plus](https://github.com/JSONPath-Plus/JSONPath)
-expression, which is applied to the request body to extract any matching items.
-When writing a jobconfig file it may be helpful to test an expected request body
+expression, which is applied to the request body or dataset to extract any matching
+items. When writing a jobconfig file it may be helpful to test an expected request body
 against the [JSONPath demo](https://jsonpath-plus.github.io/JSONPath/demo/).
 
 The `<typecheck>` expression is a JSON Schema. While complicated schemas are possible,
@@ -149,18 +287,25 @@ Here are some example `<typecheck>` expressions:
 
 ```
 - actionType: validate
-  request:
-    jobParams.name: # match simple types
+  request: # applies to the request body
+    jobParams.stringVal: # match simple types
       type: string
-    jobParams.answers[*]: # literal values (here applied to an array)
+    jobParams.enumVal: # literal values
       enum: ["yes", "no"]
-    jobResultObject.archivable: # enforce a value
+    jobResultObject.mustBeTrue: # enforce a value
       const: true
     "jobParams": # Apply external JSON Schema to all params
       $ref: https://json.schemastore.org/schema-org-thing.json
+  dataset: # applies to all datasets
+    datasetLifecycle.archivable:
+      const: true
 ```
 
+Validation will result in a `400 Bad Request` response if either the path is not found
+or if any values matching the path do not validate against the provided schema.
+
 #### Email
+> **TODO** Expand this section.
 
 **Configuration**:
 ```
@@ -171,6 +316,7 @@ Here are some example `<typecheck>` expressions:
 ```
 
 #### RabbitMQ
+> **TODO** Expand this section.
 
 **Configuration**:
 ```