From 6cc5c848782d9737c7647dee0eca51d7174430dd Mon Sep 17 00:00:00 2001 From: Spencer Bliven Date: Mon, 14 Oct 2024 15:38:22 +0200 Subject: [PATCH 1/5] Add start:dev command Update .gitignore --- .gitignore | 4 ++++ package.json | 1 + 2 files changed, 5 insertions(+) diff --git a/.gitignore b/.gitignore index 4843378f..03d88c59 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,7 @@ docs *.pdf .idea cat + +# Website output +_site +.jekyll-cache \ No newline at end of file diff --git a/package.json b/package.json index 7bb0cb8d..9163ad71 100644 --- a/package.json +++ b/package.json @@ -4,6 +4,7 @@ "description": "", "main": "index.js", "scripts": { + "start:dev": "honkit serve --port 4500", "test": "echo \"Error: no test specified\" && exit 1" }, "keywords": [], From 47d2467f89c58162c394509ea6f9f61ec7fb8a6b Mon Sep 17 00:00:00 2001 From: Spencer Bliven Date: Mon, 14 Oct 2024 15:44:56 +0200 Subject: [PATCH 2/5] Reformat backend/configuration.md - Remove trailing spaces - Wrap long lines - A couple minor markdown syntax fixes --- Development/v4.x/backend/configuration.md | 809 +++++++++++----------- 1 file changed, 420 insertions(+), 389 deletions(-) diff --git a/Development/v4.x/backend/configuration.md b/Development/v4.x/backend/configuration.md index 0513e7b0..e7618a0a 100644 --- a/Development/v4.x/backend/configuration.md +++ b/Development/v4.x/backend/configuration.md @@ -1,401 +1,432 @@ # Configuration -When using the official release image, Backend configuration can be achieved by setting the environmental variables listed below through an orchestration/containerization system, the .env file or with a suitable method compatible with your environment. -The current source code contains an example .env file, named _.env.sample_ listing all the environment variable available to configure the backend. +When using the official release image, Backend configuration can be achieved by setting +the environmental variables listed below through an orchestration/containerization +system, the .env file or with a suitable method compatible with your environment. The +current source code contains an example .env file, named _.env.sample_ listing all the +environment variable available to configure the backend. -If you are compiling the application from source, you can edit the file _serc/config/configuration.ts_ with the correct values for your infrastructure. This option is still undocumented, although it is our intention to provide a detailed how-to guide as soon as we can. +If you are compiling the application from source, you can edit the file +_serc/config/configuration.ts_ with the correct values for your infrastructure. This +option is still undocumented, although it is our intention to provide a detailed how-to +guide as soon as we can. ## Environment Variables -This is complete the list of environment variable that can be used to configure SciCat backend. -The list is compiled according to the configuration class defined in _src/config/configuration.ts_ - -- ADMIN\_GROUPS: - list of groups that have admin priviliges - _default_: "" - _format_: comma separated list of strings. Leading and trailing spaces are trimmed - -- DELETE\_GROUPS: - list of groups that are allowed to delete content - _default_: "" - _format_: comma separated list of strings. Leading and trailing spaces are trimmed - -- CREATE\_DATASET\_GROUPS: - list of non admin groups that are allowed to create datasets without pid. The pid is assigned by the system. If set to "all", all users can create a dataset belonging to any of the groups they belong to. - _default_: "#all" - _format_: comma separated list of strings. Leading and trailing spaces are trimmed - -- CREATE\_DATASET\_WITH\_PID\_GROUPS: - list of non admin groups that are allowed to create datasets with explicit pid. If set to "#all", all users can create a dataset belonging to any of the groups they belong to and with esplicit pid. - If the pid verification is enabled, pid will be validated agains the specification passed. - _default_: "" - _format_: comma separated list of strings. Leading and trailing spaces are trimmed - -- CREATE\_DATASET\_PRIVILEGED\_GROUPS: - list of non admin groups that are allowed to create datasets for groups they do not belong to. If set to "#all", all users can create a dataset belonging to any group with explicit pid. - If the pid verification is enabled, pid will be validated agains the specification passed. - _default_: "" - _format_: comma separated list of strings. Leading and trailing spaces are trimmed - -- PROPOSAL\_GROUPS: - list of non admin groups that are allowed to create and update proposals for groups they do not belong to. If set to "#all", all users can create a dataset belonging to any group with explicit pid. - _default_: "" - _format_: comma separated list of strings. Leading and trailing spaces are trimmed - -- SAMPLE\_GROUPS: - list of non admin groups that are allowed to create and update samples for the groups they belong to. If set to "#all", all users can create a dataset belonging to their group. - _default_: "" - _format_: comma separated list of strings. Leading and trailing spaces are trimmed - -- SAMPLE\_PRIVILEGED\_GROUPS: - list of non admin groups that are allowed to create samples for any groups, but can only update samples belonging to groups they belong to. - _default_: "" - _format_: comma separated list of strings. Leading and trailing spaces are trimmed - - -- ACCESS\_GROUPS\_STATIC\_VALUES: - List of groups assigned by default to all users. Used in the vanilla implementation for easy configuration. - If you do not want or need to assign any default group, it should be set to empty string "". - Default value: "" - _format_: Comman separated list of strings. Leading and trailing spaces are trimmed - _example_: "group1,group2,group3,..." +This is complete the list of environment variable that can be used to configure SciCat +backend. The list is compiled according to the configuration class defined in +_src/config/configuration.ts_ + +- ADMIN\_GROUPS: + list of groups that have admin priviliges + _default_: "" + _format_: comma separated list of strings. Leading and trailing spaces are trimmed + +- DELETE\_GROUPS: + list of groups that are allowed to delete content + _default_: "" + _format_: comma separated list of strings. Leading and trailing spaces are trimmed + +- CREATE\_DATASET\_GROUPS: + list of non admin groups that are allowed to create datasets without pid. The pid is + assigned by the system. If set to "all", all users can create a dataset belonging to + any of the groups they belong to. + _default_: "#all" + _format_: comma separated list of strings. Leading and trailing spaces are trimmed + +- CREATE\_DATASET\_WITH\_PID\_GROUPS: + list of non admin groups that are allowed to create datasets with explicit pid. If set + to "#all", all users can create a dataset belonging to any of the groups they belong + to and with esplicit pid. + If the pid verification is enabled, pid will be validated agains the specification passed. + _default_: "" + _format_: comma separated list of strings. Leading and trailing spaces are trimmed + +- CREATE\_DATASET\_PRIVILEGED\_GROUPS: + list of non admin groups that are allowed to create datasets for groups they do not + belong to. If set to "#all", all users can create a dataset belonging to any group + with explicit pid. + If the pid verification is enabled, pid will be validated agains the specification passed. + _default_: "" + _format_: comma separated list of strings. Leading and trailing spaces are trimmed + +- PROPOSAL\_GROUPS: + list of non admin groups that are allowed to create and update proposals for groups + they do not belong to. If set to "#all", all users can create a dataset belonging to + any group with explicit pid. + _default_: "" + _format_: comma separated list of strings. Leading and trailing spaces are trimmed + +- SAMPLE\_GROUPS: + list of non admin groups that are allowed to create and update samples for the groups + they belong to. If set to "#all", all users can create a dataset belonging to their + group. + _default_: "" + _format_: comma separated list of strings. Leading and trailing spaces are trimmed + +- SAMPLE\_PRIVILEGED\_GROUPS: + list of non admin groups that are allowed to create samples for any groups, but can + only update samples belonging to groups they belong to. + _default_: "" + _format_: comma separated list of strings. Leading and trailing spaces are trimmed + +- ACCESS\_GROUPS\_STATIC\_VALUES: + List of groups assigned by default to all users. Used in the vanilla implementation + for easy configuration. If you do not want or need to assign any default group, it + should be set to empty string "". + _default_: "" + _format_: Comman separated list of strings. Leading and trailing spaces are trimmed + _example_: "group1,group2,group3,..." - ACCESS\_GROUP\_SERVICE\_TOKEN: - Access token needed to access the API specified in ACCESS\_GROUP\_SERVICE\_API\_URL, used to retrieve access groups from a third party system. - _format*: string - -- ACCESS\_GROUP\_SERVICE\_API\_URL: - Well formed url of the service API used to provide access groups. Only one value is allowed. - _format_: string + Access token needed to access the API specified in ACCESS\_GROUP\_SERVICE\_API\_URL, + used to retrieve access groups from a third party system. + _format_: string + +- ACCESS\_GROUP\_SERVICE\_API\_URL: + Well formed url of the service API used to provide access groups. Only one value is + allowed. + _format_: string _example_: "https://my.access.group/service/api/url" - -- DOI_PREFIX: - The facility DOI prefix, with trailing slash. - _default_: "" - _format_: string - -- EXPRESS\_SESSION\_SECRET: - Secret used to set up express session. - _default_: "" - _format_: string - -- LOGOUT\_URL: - URL specified upon successful logout. It is returned in the json object for the frontend, or third party UI, to be used locally. - _default_: "" - _format_: string - -- HTTP\_MAX\_REDIRECTS: - Max number of redirects for http requests. - _default_: 5 - _format_: integer - -- HTTP\_TIMEOUT: - Timeout from http requests in ms. - _default_: 5000 + +- DOI_PREFIX: + The facility DOI prefix, with trailing slash. + _default_: "" + _format_: string + +- EXPRESS\_SESSION\_SECRET: + Secret used to set up express session. + _default_: "" + _format_: string + +- LOGOUT\_URL: + URL specified upon successful logout. It is returned in the json object for the frontend, or third party UI, to be used locally. + _default_: "" + _format_: string + +- HTTP\_MAX\_REDIRECTS: + Max number of redirects for http requests. + _default_: 5 _format_: integer - -- JWT_SECRET: - The secret used to create any JWT token, used for authorization. - _default_: "" - _format_: string - -- JWT\_EXPIRES\_IN: - Expiration time of any JWT token in seconds. - _default_: 3600 (s) - _format_: integer - -- JWT\_NEVER\_EXPIRES: - Length of time that the never expiring jwt token will last. - _default_: 100y - _format_: string as in number of years - -- LDAP\_URL: - Full URI (including port) of your local LDAP server, if this is your selected authentication method. - _default_: No default - _example_: ldaps://ldap.server.com:636/ - _format_: string - -- LDAP\_BIND\_DN: - Bind DN to access information on your LDAP server. - _default_: No default - _format_: string - -- LDAP\_BIND\_CREDENTIALS: - Credentials associated with your bind DN to acccess your LDAP server. - _default_: No default - _format_: string - -- LDAP\_SEARCH\_BASE: - Search base for your LDAP server. - _default_: No default - _format_: string - -- LDAP\_SEARCH\_FILTER: - Search filter for you LDAP server. - _default_: No default - _format_: string - _example_: "(LDAPUsername={{username}})" - -- LDAP\_MODE: - type of ldap server we are communicating with - **_NEEDS TO BE UPDATED. Not sure which other values are accepted_** - _default_: ad - _format_: string - _acceptable values_: ad - -- LDAP\_EXTERNAL\_ID: - LDAP matching field that provides the external id - _default_: sAMAccountName - _format_: string - -- LDAP\_USERNAME: - LDAP field providing the username - _default_: displayName - _format_: string - -- OIDC\_ISSUER: - Full URL of your OIDC identity provider - _default_: No default - _format_: string - _example_: "https://identity.your.facility/your/realm" - -- OIDC\_CLIENT\_ID: - Client id used to convert OIDC code to OIDC token. This is assigned in the OIDC service when the token is generated - _default_: No default - _format_: string - _example_: "scicat" - -- OIDC\_CLIENT\_SECRET: - Token used to convert OIDC code to OIDC token. This is assigned in the OIDC service when the token is generated - _example_: "90f1268..." - -- OIDC\_CALLBACK\_URL: - URL of the endpoint that is called when the authentication has been executed with the OIDC service. - _default_: No default - _format_: string - _example_: "http://localhost:3000/api/v3/oidc/callback" - -- OIDC\_SCOPE: - Information returned by the OIDC service together with token - _default_: No default - _format_: string - _example_: "openid profile email" - -- OIDC\_SUCCESS\_URL: - Frontend URL that the user is directed to after a successful authentication. It must be a valid frontend URL. - _default_: No default - _format_: string - _example_: "http://localhost:3000/Datasets" - -- OIDC\_ACCESS\_GROUPS: - field used to retrieve access groups from the OIDC service. It is not used in the vanilla implementation. - _default_: No default - _format_: string - _example_: "access_groups" - -- OIDC\_ACCESS\_GROUPS\_PROPERTY: - name of the OIDC property used to retrieve the users groups from OIDC. - _default_: none - _format_: string - -- OIDC\_AUTO\_LOGOUT: - if enabled, when login out from SciCat, we logout from OIDC also. - _default_: false - _format_: boolean - -- OIDC\_RETURN\_URL: - URL the user is redirected after a successful logout - _default_: none - _format_: string - -- LOGBOOK\_ENABLED: - Flag to enable/disable the Logbook endpoints. - accept values: "yes", "no" - _default_: no - _format_: string - -- LOGBOOK\_BASE\_URL: - The base URL to the SciChat wrapper API. Only required if Logbook is enabled. - _default_: "http://localhost:3030/scichatapi" - _format_: string - -- LOGBOOK\_USERNAME: - The username used to authenticate to the SciChat wrapper API. Only required if Logbook is enabled. - _default_: No default - _format_: string - -- LOGBOOK\_PASSWORD: - The password used to authenticate to the SciChat wrapper API. Only required if Logbook is enabled. - _default_: No default - _format_: string - -- METADATA\_KEYS\_RETURN\_LIMIT: - The maximum number of keys returned by the `/Datasets/metadataKeys` endpoint. - _default_: No default - _format_: integer - -- METADATA\_PARENT\_INSTANCES\_RETURN\_LIMIT: - The maximum number of Datasets used to extract metadata keys in the `/Datasets/metadataKeys` endpoint. - _default_: No default - _format_: integer - -- MONGODB\_URI: - The URI for your MongoDB instance. - _default_: No default - _format_: string "mongodb://:@:27017/" - -- OAI\_PROVIDER\_ROUTE: - URI to OAI provider, which is used in the `/publisheddata/:id/resync` endpoint. - _default_: no default - _format_: string - -- PID\_PREFIX: - The facility PID prefix, with trailing slash. - _default_: no default - _format_: string - -- PUBLIC\_URL\_PREFIX: - The base URL to the facility Landing Page. - _default_: No default - _format_: string - _example_: "https://doi.ess.eu/detail/" - -- PORT: - The port on which the backend listen on. - _default_: 3000 - _format_: integer - -- RABBITMQ\_ENABLED: - Flag to enable/disable RabbitMQ consumer. - accepted values: "yes", "no" - _deprecated_. Will be removed in future releases. - _default_: no - _format_: string - -- RABBITMQ\_HOSTNAME: - The hostname of the RabbitMQ message broker. Only required if RabbitMQ is enabled. - _deprecated_. Will be removed in future releases. - _default_: no default - _default_: string - -- RABBITMQ\_USERNAME: - The username used to authenticate to the RabbitMQ message broker. Only required if RabbitMQ is enabled. - _deprecated_. Will be removed in future releases. - _default_: no default - _format_: string - -- RABBITMQ\_PASSWORD: - The password used to authenticate to the RabbitMQ message broker. Only required if RabbitMQ is - enabled. - _deprecated_. Will be removed in future releases. - _default_: no default - _format_: string - -- REGISTER\_DOI\_URI: - URI to the organization that registers the facilities DOIs. - _default_: no default - _format_: string - _example_: "https://mds.test.datacite.org/doi" - -- REGISTER\_METADATA\_URI: - URI to the organization that registers the facilities published data metadata. - _default_: no default - _format_: string - _example_: ="https://mds.test.datacite.org/metadata" + +- HTTP\_TIMEOUT: + Timeout from http requests in ms. + _default_: 5000 + _format_: integer + +- JWT_SECRET: + The secret used to create any JWT token, used for authorization. + _default_: "" + _format_: string + +- JWT\_EXPIRES\_IN: + Expiration time of any JWT token in seconds. + _default_: 3600 (s) + _format_: integer + +- JWT\_NEVER\_EXPIRES: + Length of time that the never expiring jwt token will last. + _default_: 100y + _format_: string as in number of years + +- LDAP\_URL: + Full URI (including port) of your local LDAP server, if this is your selected + authentication method. + _default_: No default + _example_: ldaps://ldap.server.com:636/ + _format_: string + +- LDAP\_BIND\_DN: + Bind DN to access information on your LDAP server. + _default_: No default + _format_: string + +- LDAP\_BIND\_CREDENTIALS: + Credentials associated with your bind DN to acccess your LDAP server. + _default_: No default + _format_: string + +- LDAP\_SEARCH\_BASE: + Search base for your LDAP server. + _default_: No default + _format_: string + +- LDAP\_SEARCH\_FILTER: + Search filter for you LDAP server. + _default_: No default + _format_: string + _example_: "(LDAPUsername={{username}})" + +- LDAP\_MODE: + type of ldap server we are communicating with. + **_(NEEDS TO BE UPDATED. Not sure which other values are accepted)_** + _default_: ad + _format_: string + _acceptable values_: ad + +- LDAP\_EXTERNAL\_ID: + LDAP matching field that provides the external id + _default_: sAMAccountName + _format_: string + +- LDAP\_USERNAME: + LDAP field providing the username + _default_: displayName + _format_: string + +- OIDC\_ISSUER: + Full URL of your OIDC identity provider + _default_: No default + _format_: string + _example_: "https://identity.your.facility/your/realm" + +- OIDC\_CLIENT\_ID: + Client id used to convert OIDC code to OIDC token. This is assigned in the OIDC + service when the token is generated + _default_: No default + _format_: string + _example_: "scicat" + +- OIDC\_CLIENT\_SECRET: + Token used to convert OIDC code to OIDC token. This is assigned in the OIDC service + when the token is generated + _example_: "90f1268..." + +- OIDC\_CALLBACK\_URL: + URL of the endpoint that is called when the authentication has been executed with the + OIDC service. + _default_: No default + _format_: string + _example_: "http://localhost:3000/api/v3/oidc/callback" + +- OIDC\_SCOPE: + Information returned by the OIDC service together with token + _default_: No default + _format_: string + _example_: "openid profile email" + +- OIDC\_SUCCESS\_URL: + Frontend URL that the user is directed to after a successful authentication. It must + be a valid frontend URL. + _default_: No default + _format_: string + _example_: "http://localhost:3000/Datasets" + +- OIDC\_ACCESS\_GROUPS: + field used to retrieve access groups from the OIDC service. It is not used in the + vanilla implementation. + _default_: No default + _format_: string + _example_: "access_groups" + +- OIDC\_ACCESS\_GROUPS\_PROPERTY: + name of the OIDC property used to retrieve the users groups from OIDC. + _default_: none + _format_: string + +- OIDC\_AUTO\_LOGOUT: + if enabled, when login out from SciCat, we logout from OIDC also. + _default_: false + _format_: boolean + +- OIDC\_RETURN\_URL: + URL the user is redirected after a successful logout + _default_: none + _format_: string + +- LOGBOOK\_ENABLED: + Flag to enable/disable the Logbook endpoints. + accept values: "yes", "no" + _default_: no + _format_: string + +- LOGBOOK\_BASE\_URL: + The base URL to the SciChat wrapper API. Only required if Logbook is enabled. + _default_: "http://localhost:3030/scichatapi" + _format_: string + +- LOGBOOK\_USERNAME: + The username used to authenticate to the SciChat wrapper API. Only required if Logbook + is enabled. + _default_: No default + _format_: string + +- LOGBOOK\_PASSWORD: + The password used to authenticate to the SciChat wrapper API. Only required if Logbook + is enabled. + _default_: No default + _format_: string + +- METADATA\_KEYS\_RETURN\_LIMIT: + The maximum number of keys returned by the `/Datasets/metadataKeys` endpoint. + _default_: No default + _format_: integer + +- METADATA\_PARENT\_INSTANCES\_RETURN\_LIMIT: + The maximum number of Datasets used to extract metadata keys in the + `/Datasets/metadataKeys` endpoint. + _default_: No default + _format_: integer + +- MONGODB\_URI: + The URI for your MongoDB instance. + _default_: No default + _format_: string "mongodb://:@:27017/" + +- OAI\_PROVIDER\_ROUTE: + URI to OAI provider, which is used in the `/publisheddata/:id/resync` endpoint. + _default_: no default + _format_: string + +- PID\_PREFIX: + The facility PID prefix, with trailing slash. + _default_: no default + _format_: string + +- PUBLIC\_URL\_PREFIX: + The base URL to the facility Landing Page. + _default_: No default + _format_: string + _example_: "https://doi.ess.eu/detail/" + +- PORT: + The port on which the backend listen on. + _default_: 3000 + _format_: integer + +- RABBITMQ\_ENABLED: + Flag to enable/disable RabbitMQ consumer. + accepted values: "yes", "no" + _deprecated_. Will be removed in future releases. + _default_: no + _format_: string + +- RABBITMQ\_HOSTNAME: + The hostname of the RabbitMQ message broker. Only required if RabbitMQ is enabled. + _deprecated_. Will be removed in future releases. + _default_: no default + _default_: string + +- RABBITMQ\_USERNAME: + The username used to authenticate to the RabbitMQ message broker. Only required if + RabbitMQ is enabled. + _deprecated_. Will be removed in future releases. + _default_: no default + _format_: string + +- RABBITMQ\_PASSWORD: + The password used to authenticate to the RabbitMQ message broker. Only required if + RabbitMQ is enabled. + _deprecated_. Will be removed in future releases. + _default_: no default + _format_: string + +- REGISTER\_DOI\_URI: + URI to the organization that registers the facilities DOIs. + _default_: no default + _format_: string + _example_: "https://mds.test.datacite.org/doi" + +- REGISTER\_METADATA\_URI: + URI to the organization that registers the facilities published data metadata. + _default_: no default + _format_: string + _example_: ="https://mds.test.datacite.org/metadata" - DOI\_USERNAME: - Username used to authenticate on the DOI site - _default_: no default - _format_: string - -- DOI\_PASSWORD: - Password used to authenticate on the DOI site - _default_: no default - _format_: string - -- SITE: - The name of your site. - _default_: no default - _format_: string - -- SMTP\_HOST: - Host of SMTP server. - _deprecated_. Will be removed in future releases. - _default_: no default - _format_: string - -- SMTP\_MESSAGE\_FROM: - Email address that emails should be sent from. - _deprecated_. Will be removed in future releases. - _default_: no default - _format_: string, email - -- SMTP\_PORT: - Port of SMTP server. - _deprecated_. Will be removed in future releases. - _default_: no default - _format_: string - -- SMTP\_SECURE: - Secure of SMTP server. - _deprecated_. Will be removed in future releases. - _default_: no default - _format_: string - -- POLICY\_PUBLICATION\_SHIFT: - Number of years that needs to elapse before the dataset is made publicly acceessible - _default_: 3 - _format_: integer - -- POLICY\_RETENTION\_SHIFT: - Number of years that the datasets are kept online before are archived or deleted. A negative value means that they are never archived/deleted - _default_: -1 - _format_: integer - -- ELASTICSEARCH\_ENABLED: - Flag to enable/disable the ElasticSearch service - accept values: "yes", "no" - _default_: no default - _format_: string - -- ES\_HOST: - The base URL to the Elasticsearch cluster. Use `http` if xpack.security is disabled - _default_: no default - _format_: string - _example_: "https://localhost:9200" or "http://localhost:9200" - -- MONGODB\_COLLECTION: - Collection name to be mapped into specified Elasticsearch index - _default_: no default - _format_: string - -- ES\_MAX\_RESULT: - Maximum records can be indexed into Elasticsearch. - _default_: 10000 - _format_: number - -- ES\_FIELDS\_LIMIT: - The total number of fields in an index. - _default_: 1000 - _format_: number - -- ES\_INDEX: - The total number of fields in an index. - _default_: no default - _format_: string - -- ES\_REFRESH: - The total number of fields in an index. - accept values: true, false, "wait_for" - _default_: false - _format_: boolean or string - -- ES\_USERNAME: - Elasticsearch cluster username. - _default_: no default, optional. - _format_: string - -- ELASTIC\_PASSWORD: - Elasticsearch cluster password. - _default_: no default. - _format_: string + Username used to authenticate on the DOI site + _default_: no default + _format_: string + +- DOI\_PASSWORD: + Password used to authenticate on the DOI site + _default_: no default + _format_: string + +- SITE: + The name of your site. + _default_: no default + _format_: string + +- SMTP\_HOST: + Host of SMTP server. + _deprecated_. Will be removed in future releases. + _default_: no default + _format_: string + +- SMTP\_MESSAGE\_FROM: + Email address that emails should be sent from. + _deprecated_. Will be removed in future releases. + _default_: no default + _format_: string, email + +- SMTP\_PORT: + Port of SMTP server. + _deprecated_. Will be removed in future releases. + _default_: no default + _format_: string + +- SMTP\_SECURE: + Secure of SMTP server. + _deprecated_. Will be removed in future releases. + _default_: no default + _format_: string + +- POLICY\_PUBLICATION\_SHIFT: + Number of years that needs to elapse before the dataset is made publicly acceessible + _default_: 3 + _format_: integer + +- POLICY\_RETENTION\_SHIFT: + Number of years that the datasets are kept online before are archived or deleted. A + negative value means that they are never archived/deleted + _default_: -1 + _format_: integer + +- ELASTICSEARCH\_ENABLED: + Flag to enable/disable the ElasticSearch service + accept values: "yes", "no" + _default_: no default + _format_: string + +- ES\_HOST: + The base URL to the Elasticsearch cluster. Use `http` if xpack.security is disabled + _default_: no default + _format_: string + _example_: "https://localhost:9200" or "http://localhost:9200" + +- MONGODB\_COLLECTION: + Collection name to be mapped into specified Elasticsearch index + _default_: no default + _format_: string + +- ES\_MAX\_RESULT: + Maximum records can be indexed into Elasticsearch. + _default_: 10000 + _format_: number + +- ES\_FIELDS\_LIMIT: + The total number of fields in an index. + _default_: 1000 + _format_: number + +- ES\_INDEX: + The total number of fields in an index. + _default_: no default + _format_: string + +- ES\_REFRESH: + The total number of fields in an index. + accept values: true, false, "wait_for" + _default_: false + _format_: boolean or string + +- ES\_USERNAME: + Elasticsearch cluster username. + _default_: no default, optional. + _format_: string + +- ELASTIC\_PASSWORD: + Elasticsearch cluster password. + _default_: no default. + _format_: string From caf189456705fde534f7d282fd8b2768e561d25d Mon Sep 17 00:00:00 2001 From: Spencer Bliven Date: Mon, 14 Oct 2024 15:47:50 +0200 Subject: [PATCH 3/5] Add jobconfig documentation --- Development/v4.x/backend/configuration.md | 5 + .../v4.x/backend/configuration/jobconfig.md | 185 ++++++++++++++++++ README.md | 2 +- SUMMARY.md | 1 + 4 files changed, 192 insertions(+), 1 deletion(-) create mode 100644 Development/v4.x/backend/configuration/jobconfig.md diff --git a/Development/v4.x/backend/configuration.md b/Development/v4.x/backend/configuration.md index e7618a0a..f3a6eed7 100644 --- a/Development/v4.x/backend/configuration.md +++ b/Development/v4.x/backend/configuration.md @@ -430,3 +430,8 @@ _src/config/configuration.ts_ Elasticsearch cluster password. _default_: no default. _format_: string + +- JOB\_CONFIGURATION\_FILE: + Configuration file for [job actions](configuration/jobconfig.md). + _default_: "jobConfig.json", optional. + _format_: string diff --git a/Development/v4.x/backend/configuration/jobconfig.md b/Development/v4.x/backend/configuration/jobconfig.md new file mode 100644 index 00000000..1d16c41c --- /dev/null +++ b/Development/v4.x/backend/configuration/jobconfig.md @@ -0,0 +1,185 @@ +# Job Configuration + +> _**Development Feature**. This section documents features that are still under +> development as part of the `release-jobs` branch._ + +## Overview + +The SciCat job system is used for any interactions between SciCat and external services. + +Example jobs include: + +- Request an **archive system** to *archive* or *retrieve* data from tape storage +- Move data to a *public* location (e.g. to access data from a [DOI landing + page](https://github.com/SciCatProject/LandingPageServer)) +- Run maintenance tasks such as emailing users + +### Job lifecycle + +Jobs follow a standard Create-Read-Update-Delete (CRUD) lifecycle: + +1. Jobs are _created_ via a `POST` request. This can be the result of a frontend + interaction (eg selecting a dataset for publishing) or through the REST API. + + The body of the request should follow the CreateJobDto (Data Transfer Object): + ``` + { + "type": "archive", + "ownerUser": "owner", + "ownerGroup": "group", + "contactEmail": "email" + "jobParams": {}, + } + ``` +2. Jobs can be _read_ via a `GET` request to `/jobs/:id` or through the `/jobs/fullquery` + search endpoint. The frontend uses this to display the list of jobs. +3. Jobs are _updated_ via a `PATCH` or `PUT` request to `/jobs/:id`. This is usually + used by facility services to update the job status and provide feedback. + + The body of the request should follow the UpdateJobDto: + ``` + { + "statusCode": "string", + "statusMessage": "string", + "jobResultObject": {} + } + ``` +4. Jobs may be _deleted_ periodically during maintenance. This is usually not done by + users. + +### Actions + +After _create_ and _update_ stages a series of actions can be performed by SciCat. This +can be things like sending an email, posting a message to a message broker, or calling +an API. The `jobParams` and `jobResultObject` are used to add additional information +that the actions may need, such as the list of datasets the job refers to. + +A full list of built-in actions is given below. A plugin mechanism for registering new +actions is also planned for a future SciCat release. + +## Configuration + +In SciCat v3.x, a limited number of jobs were hard-coded into the code base. This was +changed in v4.x to allow each site to configure their own set of jobs and customize +actions based on the job status. + +The available jobs are configured in the file `jobConfig.json` (or can be overridden +with the `JOB_CONFIGURATION_FILE` [environment +variable](../configuration.md#environment-variables)). An example `jobConfig.json` file +is available +[here](https://github.com/SciCatProject/scicat-backend-next/blob/release-jobs/src/jobs/config/jobConfig.example.json). + +### Configuration overview +The top-level configuration is structured like this: + +``` +{ + "configVersion": "v1.0", + "jobs": [ + { + "jobType": "archive", + "create": { + "auth": "#all", + "actions": [...] + }, + "update": { + "auth": "archivemanager", + "actions": [...] + } + } + ] +} +``` + +- `configVersion` is a string that indicates the version of this configuration file. It + is not used by SciCat itself, but is useful for migrating jobs if the configuration + changes. SciCat will log a warning if a job was updated with a different config + version than it was created with. +- `jobs` is an array allowing the configuration of different job types +- `jobType` can be defined for each SciCat instance, but the names `archive`, + `retrieve`, and `public` are traditionally the most common jobs. Only jobs matching a + configured jobType will be accepted by the backend. +- `create` and `update` correspond to `POST` and `PATCH` requests to the `/jobs` + endpoint. These configure 'actions' which are run when at different phases of the job + lifecycle. The actions are defined in the [job actions section](#job-actions). +- `auth` configures the roles authorized to use the endpoint for each job operation. + +### Authorization + +Values for `auth` are described in [Jobs Authorization](../authorization/authorization_jobs.md). Some authorization values may require certain information to be passed in the request body; for instance, `"#datasetOwner"` requires that a dataset be passed. + +### Actions Configuration + + +#### URLAction + +**Configuration**: +``` +{ + "actionType": "url", + "url": "http://localhost:3000/api/v3/health?jobid={{id}}", + "method": "GET", + "headers": { + "accept": "application/json" + } +}, +``` + +#### Validate + +**Configuration**: +``` +{ + "actionType": "validate", + "request": { + "jobParams.datasetIds[*]": { + "type": "object", + "required": ["pid","files"] + } + } +} +``` + +#### Email + +**Configuration**: +``` +{ + "actionType": "email", + "auth": { + "user": "user", + "password": "password" + }, + "to": "{{contactEmail}}", + "from": "from", + "subject": "[SciCat] Your {{type}} job was submitted successfully", + "bodyTemplateFile": "src/common/email-templates/job-template-simplified.html" +} +``` + +#### RabbitMQ + +**Configuration**: +``` +{ + "actionType": "rabbitmq", + "hostname": "rabbitmq", + "port": 5672, + "username": "guest", + "password": "guest", + "exchange": "jobs.write", + "queue": "client.jobs.write", + "key": "jobqueue" +} +``` + +#### Log + +**Configuration**: +``` +{ + "actionType": "log" +} +``` + +This is a dummy action, useful for debugging. It adds a log entry when executed. diff --git a/README.md b/README.md index 10298715..f0adbfd4 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ See the [SciCat Home Webpage](https://scicatproject.github.io) for an overview o ## Structure of Documentation -The documentaion is split into the following chapters: +The documentation is split into the following chapters: * [User Guide](Users) - Users of the system can come here to see screen captures, FAQs and find resources on how to better understand SciCat. * [Operator Guide](Operator) - System admins read this part to set up SciCat for their location diff --git a/SUMMARY.md b/SUMMARY.md index 44f2f741..1c2deea2 100644 --- a/SUMMARY.md +++ b/SUMMARY.md @@ -43,6 +43,7 @@ - [Running the Components](Development/v4.x/running.md) - [Configuration](Development/v4.x/configuration.md) - [Backend](Development/v4.x/backend/configuration.md) + - [Jobs](Development/v4.x/backend/configuration/jobconfig.md) - [Authorization](Development/v4.x/backend/authorization.md) - [Datasets](Development/v4.x/backend/authorization/authorization_datasets.md) - [OrigDatablocks](Development/v4.x/backend/authorization/authorization_origdatablocks.md) From 23ac019791d0c9e94ec057255d01047112203260 Mon Sep 17 00:00:00 2001 From: Spencer Bliven Date: Tue, 29 Oct 2024 15:08:44 +0100 Subject: [PATCH 4/5] Convert json job configuration to yaml Documents scicat-backend-next#1463 --- Development/v4.x/backend/configuration.md | 2 +- .../v4.x/backend/configuration/jobconfig.md | 126 ++++++++++-------- 2 files changed, 69 insertions(+), 59 deletions(-) diff --git a/Development/v4.x/backend/configuration.md b/Development/v4.x/backend/configuration.md index f3a6eed7..f7baed0d 100644 --- a/Development/v4.x/backend/configuration.md +++ b/Development/v4.x/backend/configuration.md @@ -433,5 +433,5 @@ _src/config/configuration.ts_ - JOB\_CONFIGURATION\_FILE: Configuration file for [job actions](configuration/jobconfig.md). - _default_: "jobConfig.json", optional. + _default_: "jobConfig.yaml", optional. _format_: string diff --git a/Development/v4.x/backend/configuration/jobconfig.md b/Development/v4.x/backend/configuration/jobconfig.md index 1d16c41c..c1a93b1b 100644 --- a/Development/v4.x/backend/configuration/jobconfig.md +++ b/Development/v4.x/backend/configuration/jobconfig.md @@ -73,22 +73,17 @@ is available The top-level configuration is structured like this: ``` -{ - "configVersion": "v1.0", - "jobs": [ - { - "jobType": "archive", - "create": { - "auth": "#all", - "actions": [...] - }, - "update": { - "auth": "archivemanager", - "actions": [...] - } - } - ] -} +configVersion: v1.0 +jobs: + - jobType: archive + create: + auth: "#all" + actions: + - ... + update: + auth: archivemanager + actions: + - ... ``` - `configVersion` is a string that indicates the version of this configuration file. It @@ -110,76 +105,91 @@ Values for `auth` are described in [Jobs Authorization](../authorization/authori ### Actions Configuration +The following actions are built-in to SciCat and can be included in the `actions` array. #### URLAction +Makes a URL request. Most fields can be templated with the job DTO and body. + **Configuration**: ``` -{ - "actionType": "url", - "url": "http://localhost:3000/api/v3/health?jobid={{id}}", - "method": "GET", - "headers": { - "accept": "application/json" - } -}, +- actionType: url + url: http://localhost:3000/api/v3/health?jobid={{id}} + method: GET + headers: + accept: application/json ``` #### Validate +The `validate` action is used to check validate requests to the job endpoints. It is +used to enforce custom constraints on `jobParams` or `jobResultObject` for each job +type. If other actions rely on custom fields in their templates they should first be +validated with this action. + **Configuration**: +ValidateAction is configured with a single parameter, `request`, which is checked +against the request body (aka the DTO). The config file will look like this: ``` -{ - "actionType": "validate", - "request": { - "jobParams.datasetIds[*]": { - "type": "object", - "required": ["pid","files"] - } - } -} +- actionType: validate + request: + : + ... +``` + +Usually `` will be a dot-delimited field in the DTO, eg. "jobParams.name". +Technically it is a [JSONPath-Plus](https://github.com/JSONPath-Plus/JSONPath) +expression, which is applied to the request body to extract any matching items. +When writing a jobconfig file it may be helpful to test an expected request body +against the [JSONPath demo](https://jsonpath-plus.github.io/JSONPath/demo/). + +The `` expression is a JSON Schema. While complicated schemas are possible, +the combination with JSONPath makes common type checks very concise and legible. +Here are some example `` expressions: + +``` +- actionType: validate + request: + jobParams.name: # match simple types + type: string + jobParams.answers[*]: # literal values (here applied to an array) + enum: ["yes", "no"] + jobResultObject.archivable: # enforce a value + const: true + "jobParams": # Apply external JSON Schema to all params + $ref: https://json.schemastore.org/schema-org-thing.json ``` #### Email **Configuration**: ``` -{ - "actionType": "email", - "auth": { - "user": "user", - "password": "password" - }, - "to": "{{contactEmail}}", - "from": "from", - "subject": "[SciCat] Your {{type}} job was submitted successfully", - "bodyTemplateFile": "src/common/email-templates/job-template-simplified.html" -} +- actionType: email + to: "{{contactEmail}}" + subject: "[SciCat] Your {{type}} job was submitted successfully" + bodyTemplateFile: src/common/email-templates/job-template-simplified.html ``` #### RabbitMQ **Configuration**: ``` -{ - "actionType": "rabbitmq", - "hostname": "rabbitmq", - "port": 5672, - "username": "guest", - "password": "guest", - "exchange": "jobs.write", - "queue": "client.jobs.write", - "key": "jobqueue" -} +- actionType: rabbitmq + exchange: jobs.write + queue: client.jobs.write + key: jobqueue ``` +The RabbitMQ connection must first be configured through environmental variables +as described in [configuration](./configuration.md). + #### Log +This is a dummy action, useful for debugging. It adds a log entry when executed. + **Configuration**: ``` -{ - "actionType": "log" -} +- actionType: log ``` -This is a dummy action, useful for debugging. It adds a log entry when executed. +The log action does not have any configuration options. From 0da65eb9934ebc2b5c61876c49be202cdbd905c5 Mon Sep 17 00:00:00 2001 From: Spencer Bliven Date: Sun, 3 Nov 2024 23:37:42 +0100 Subject: [PATCH 5/5] Update job documentation - Document ValidateAction changes (#1473) - Convert examples to YAML (#1463) --- .../v4.x/backend/configuration/jobconfig.md | 194 +++++++++++++++--- 1 file changed, 170 insertions(+), 24 deletions(-) diff --git a/Development/v4.x/backend/configuration/jobconfig.md b/Development/v4.x/backend/configuration/jobconfig.md index c1a93b1b..d3068d25 100644 --- a/Development/v4.x/backend/configuration/jobconfig.md +++ b/Development/v4.x/backend/configuration/jobconfig.md @@ -12,24 +12,37 @@ Example jobs include: - Request an **archive system** to *archive* or *retrieve* data from tape storage - Move data to a *public* location (e.g. to access data from a [DOI landing page](https://github.com/SciCatProject/LandingPageServer)) -- Run maintenance tasks such as emailing users +- Run maintenance tasks such as emailing users. + +If you just plan to use SciCat for cataloging data and don't plan to use its data +management features then you may not need any job types. If no job types are configured then SciCat will reject any backend requests to create jobs. In this case [frontend features](../../frontend/configuration.md) for archiving (`archiveWorkflowEnabled: false`) and retrieval should be disabled. + +### Migration Notes + +In v3.x the `archive`, `retrieve`, and `public` jobs were hard-coded. In v4.x the job +types can be arbitrary strings; however we recommend using the standard job names to +avoid confusion. + +Also note that some checks that were preformed by default in v3.x for certain job types +must now be configured explicitly as actions. These are included in the provided +`jobConfig.example.yaml` file and are also noted below. ### Job lifecycle Jobs follow a standard Create-Read-Update-Delete (CRUD) lifecycle: -1. Jobs are _created_ via a `POST` request. This can be the result of a frontend +1. Jobs are _created_ via a `POST` request to `/jobs`. This can be the result of a frontend interaction (eg selecting a dataset for publishing) or through the REST API. The body of the request should follow the CreateJobDto (Data Transfer Object): - ``` + ```json { - "type": "archive", - "ownerUser": "owner", - "ownerGroup": "group", - "contactEmail": "email" - "jobParams": {}, - } + "type": "archive", + "ownerUser": "owner", + "ownerGroup": "group", + "contactEmail": "email@example.com" + "jobParams": {} + } ``` 2. Jobs can be _read_ via a `GET` request to `/jobs/:id` or through the `/jobs/fullquery` search endpoint. The frontend uses this to display the list of jobs. @@ -63,11 +76,11 @@ In SciCat v3.x, a limited number of jobs were hard-coded into the code base. Thi changed in v4.x to allow each site to configure their own set of jobs and customize actions based on the job status. -The available jobs are configured in the file `jobConfig.json` (or can be overridden +The available jobs are configured in the file `jobConfig.yaml` (or can be overridden with the `JOB_CONFIGURATION_FILE` [environment -variable](../configuration.md#environment-variables)). An example `jobConfig.json` file +variable](../configuration.md#environment-variables)). An example `jobConfig.example.yaml` file is available -[here](https://github.com/SciCatProject/scicat-backend-next/blob/release-jobs/src/jobs/config/jobConfig.example.json). +[here](https://github.com/SciCatProject/scicat-backend-next/blob/release-jobs/jobConfig.example.yaml). ### Configuration overview The top-level configuration is structured like this: @@ -77,7 +90,7 @@ configVersion: v1.0 jobs: - jobType: archive create: - auth: "#all" + auth: "#datasetOwner" actions: - ... update: @@ -98,11 +111,14 @@ jobs: endpoint. These configure 'actions' which are run when at different phases of the job lifecycle. The actions are defined in the [job actions section](#job-actions). - `auth` configures the roles authorized to use the endpoint for each job operation. +- `actions` give a list of actions to run when the endpoint is called. ### Authorization Values for `auth` are described in [Jobs Authorization](../authorization/authorization_jobs.md). Some authorization values may require certain information to be passed in the request body; for instance, `"#datasetOwner"` requires that a dataset be passed. +> **Caution** Setting `auth` to a permissive value (eg `#all`) could expose archiving services to external users. Please consider the security model carefully when configuring jobs. + ### Actions Configuration The following actions are built-in to SciCat and can be included in the `actions` array. @@ -112,7 +128,8 @@ The following actions are built-in to SciCat and can be included in the `actions Makes a URL request. Most fields can be templated with the job DTO and body. **Configuration**: -``` + +```yml - actionType: url url: http://localhost:3000/api/v3/health?jobid={{id}} method: GET @@ -120,6 +137,8 @@ Makes a URL request. Most fields can be templated with the job DTO and body. accept: application/json ``` +> **TODO** Expand this section. + #### Validate The `validate` action is used to check validate requests to the job endpoints. It is @@ -127,20 +146,139 @@ used to enforce custom constraints on `jobParams` or `jobResultObject` for each type. If other actions rely on custom fields in their templates they should first be validated with this action. -**Configuration**: -ValidateAction is configured with a single parameter, `request`, which is checked -against the request body (aka the DTO). The config file will look like this: +ValidateAction is configured with a series of `: ` pairs which describe +a constraint to be validated. These can be applied to different contexts: +- **`request`** - Checks the incoming request body (aka the DTO). +- **`datasets`** - (CREATE only) requires that a list of datasets be included in + `jobParams.datasetList`. Checks are applied to each dataset + +Validation occurs before the job gets created in the database, while most other actions +are performed after the job is created. This means that correctly configuring validation +is important to detect user errors early. + +Configuration is described in detail below. However, a few illustrative examples are +provided first. + +##### Example 1: Require extra template data + +Consider a case where you want to pass a value from the request body through to other +actions. For example, you want to allow the requestor to specify the subject of an email +to be sent in the request body like this: + +```json +POST /jobs +{ + "type": "email_demo", + "jobParams": { + "subject": "Thanks for using scicat!" + } +} ``` + +In this case an `email` action would be configured using handlebars to insert the +`jobParams.subject` value. However, a `validate` action should also be configured to +catch errors early where the subject is not specified: + +`jobConfig.yaml`: +``` +jobs: + - jobType: email_demo + create: + auth: admin + actions: + - actionType: validate + request: + jobParams.subject: + type: string + - actionType: email + to: "{{contactEmail}}" + subject: "[SciCat] {{jobParams.subject}}" + bodyTemplate: demo_email.html + update: + auth: admin + actions: [] +``` + +##### Example 2: Enforce datasetLifecycle state + +Many job types require a dataset to be include. These are specified in +`jobParams.datasetList` like this: + +```json +POST /jobs +{ + "owner"... + "jobParams": { + "datasetList": [ + { + "pid": "examplePID", + "files": [] + } + ] + } +} +``` + +Permission to access to the datasets is checked with the `#dataset*` authentication +methods, but other properties need to be enforced with a `validate` action. + +The following validate actions are recommended for `archive`, `retrieve` and `publish` +jobs: + +`jobConfig.yaml`: +```yml +jobs: + - jobType: archive + create: + auth: "#datasetOwner" + actions: + - actionType: validate + datasets: + datasetlifecycle.archivable: + const: true + statusUpdate: + auth: archivemanager + actions: [] + - jobType: retrieve + create: + auth: "#datasetOwner" + actions: + - actionType: validate + datasets: + datasetlifecycle.retrievable: + const: true + statusUpdate: + auth: archivemanager + actions: [] + - jobType: public + create: + auth: "#all" + actions: + - actionType: validate + datasets: + isPublished: + const: true + statusUpdate: + auth: archivemanager +``` + + +##### Configuration + +The config file for a validate action will look like this: + +```yml - actionType: validate request: : - ... + datasets: + : ``` Usually `` will be a dot-delimited field in the DTO, eg. "jobParams.name". Technically it is a [JSONPath-Plus](https://github.com/JSONPath-Plus/JSONPath) -expression, which is applied to the request body to extract any matching items. -When writing a jobconfig file it may be helpful to test an expected request body +expression, which is applied to the request body or dataset to extract any matching +items. When writing a jobconfig file it may be helpful to test an expected request body against the [JSONPath demo](https://jsonpath-plus.github.io/JSONPath/demo/). The `` expression is a JSON Schema. While complicated schemas are possible, @@ -149,18 +287,25 @@ Here are some example `` expressions: ``` - actionType: validate - request: - jobParams.name: # match simple types + request: # applies to the request body + jobParams.stringVal: # match simple types type: string - jobParams.answers[*]: # literal values (here applied to an array) + jobParams.enumVal: # literal values enum: ["yes", "no"] - jobResultObject.archivable: # enforce a value + jobResultObject.mustBeTrue: # enforce a value const: true "jobParams": # Apply external JSON Schema to all params $ref: https://json.schemastore.org/schema-org-thing.json + dataset: # applies to all datasets + datasetLifecycle.archivable: + const: true ``` +Validation will result in a `400 Bad Request` response if either the path is not found +or if any values matching the path do not validate against the provided schema. + #### Email +> **TODO** Expand this section. **Configuration**: ``` @@ -171,6 +316,7 @@ Here are some example `` expressions: ``` #### RabbitMQ +> **TODO** Expand this section. **Configuration**: ```