From 34b6a8ce8c0a5cebd97abf15c229a2986430b61d Mon Sep 17 00:00:00 2001 From: Chris Topaloudis Date: Thu, 14 May 2020 09:15:37 +0200 Subject: [PATCH] review: 11 linking records * blocker: `pipenv run invenio records` was removed from `invenio-records` reform STEP 5 - markdown lint - ES7 mappings - child of #50 * Added warning (addresses #74). --- 07-data-models-new-field/README.md | 1 + 11-linking-records/README.md | 48 +++++------ .../mappings/v6/authors/author-v1.0.0.json | 29 ------- .../authors/mappings/{v6 => v7}/__init__.py | 2 +- .../mappings/v7/authors/author-v1.0.0.json | 27 +++++++ .../mappings/v6/records/record-v1.0.0.json | 79 ------------------- .../mappings/v7/records/record-v1.0.0.json | 77 ++++++++++++++++++ 15-application-architecture/README.md | 1 + 8 files changed, 132 insertions(+), 132 deletions(-) delete mode 100644 11-linking-records/solution/my-site/my_site/authors/mappings/v6/authors/author-v1.0.0.json rename 11-linking-records/solution/my-site/my_site/authors/mappings/{v6 => v7}/__init__.py (86%) create mode 100644 11-linking-records/solution/my-site/my_site/authors/mappings/v7/authors/author-v1.0.0.json delete mode 100644 11-linking-records/solution/my-site/my_site/records/mappings/v6/records/record-v1.0.0.json create mode 100644 11-linking-records/solution/my-site/my_site/records/mappings/v7/records/record-v1.0.0.json diff --git a/07-data-models-new-field/README.md b/07-data-models-new-field/README.md index ce45af4..ee28418 100644 --- a/07-data-models-new-field/README.md +++ b/07-data-models-new-field/README.md @@ -153,6 +153,7 @@ Now you should see an output similar to the below: "self": "https://localhost:5000/api/records/1" }, "metadata": { + "$schema": "https://my-site.com/schemas/records/record-v1.0.0.json", "contributors": [ { "name": "Doe, John" diff --git a/11-linking-records/README.md b/11-linking-records/README.md index 2e6efbe..ac388e8 100644 --- a/11-linking-records/README.md +++ b/11-linking-records/README.md @@ -1,14 +1,16 @@ +:warning: **This chapter only works with Invenio v3.1** ([#74](https://github.com/inveniosoftware/training/issues/74)). + # Tutorial 11 - Data models: link records using references The goal of this tutorial is to learn how we can link records using references, with a technique similar to [JSON Reference](https://json-spec.readthedocs.io/reference.html). -### Table of Contents +## Table of Contents - [Step 1: Bootstrap exercise](#step-1-bootstrap-exercise) - [Step 2: Add author reference to the record](#step-2-add-author-reference-to-the-record) - [Step 3: Create a JSON resolver](#step-3-create-a-JSON-resolver) - [Step 4: Update the entrypoints](#step-4-update-the-entrypoints) -- [Step 5: Try it!](#step-5-try-it) +- [Step 5: Try it](#step-5-try-it) - [Bonus](#bonus) - [About references in Invenio](#about-references-in-Invenio) - [What did we learn](#what-did-we-learn) @@ -71,8 +73,8 @@ Let's implement it. If you completed the previous tutorial, you can skip this step. If instead you would like to start from a clean state run the following commands: ```bash -$ cd ~/src/training/ -$ ./start-from.sh 10-indexing-records +cd ~/src/training/ +./start-from.sh 10-indexing-records ``` ## Step 2: Add author reference to the record @@ -101,7 +103,7 @@ We need to create a reference (in a similar way as we would do using foreign key Since we have changed the data model, we need to change the Elasticsearch mappings because you want to search records by author metadata. -`my-site/my_site/records/mappings/v6/records/record-v1.0.0.json` +`my-site/my_site/records/mappings/v7/records/record-v1.0.0.json` ```diff + "author": { @@ -144,7 +146,7 @@ from invenio_records.api import Record @jsonresolver.route('/api/resolver/author/', host='my-site.com') def record_jsonresolver(authid): """Resolve referenced author.""" - # Setup a resolver to retrive an author record given its id + # Setup a resolver to retrieve an author record given its id resolver = Resolver(pid_type='authid', object_type="rec", getter=Record.get_record) _, record = resolver.resolve(str(authid)) # we could manipulate here the record and eventually add/remove fields @@ -168,55 +170,55 @@ We need to add the JSON resolver method in the entrypoints. 'records = my_site.records.mappings', ``` -## Step 5: Try it! +## Step 5: Try it We can now try to create an author and then a record with a reference to it. But first, since we have changed schema, mappings and entrypoints, let's re-install the app and re-init DB and Elasticsearch. ```bash -$ pipenv run pip install -e . -$ ./scripts/setup -$ ./scripts/server +pipenv run pip install -e . +./scripts/setup +./scripts/server ``` Create a new author: ```bash -$ curl -k --header "Content-Type: application/json" \ +curl -k --header "Content-Type: application/json" \ --request POST \ --data '{"name": "Goodman, Martin"}' \ "https://127.0.0.1:5000/api/authors/?prettyprint=1" -$ firefox http://127.0.0.1:9200/authors/_search?pretty=true +firefox http://127.0.0.1:9200/authors/_search?pretty=true ``` Now, stop the server. Create a new record. In the `$ref` field, we will put the route URL that we have defined in the JSON resolver method. To use the REST API, we would have to change the loaders, since the `author` field is not defined. Let's create the record using the CLI. ```bash -$ cd ~/src/my-site -$ echo '{"author": { "$ref": "https://my-site.com/api/resolver/author/1" }, "title": "Invenio is awesome", "contributors": [{"name": "Kent, Clark"}], "owner": 1}' | pipenv run invenio records create --pid-minter recid +cd ~/src/my-site +echo '{"author": { "$ref": "https://my-site.com/api/resolver/author/1" }, "title": "Invenio is awesome", "contributors": [{"name": "Kent, Clark"}], "owner": 1}' | pipenv run invenio records create --pid-minter recid ``` Let's re-index the newly create record so that the `$ref` attribute will be replaced: ```bash -$ pipenv run invenio index reindex --pid-type recid --yes-i-know -$ pipenv run invenio index run +pipenv run invenio index reindex --pid-type recid --yes-i-know +pipenv run invenio index run ``` Now, we can query Elasticsearch and verify that the author metadata are in the record. ```bash -$ ./scripts/server -$ firefox http://127.0.0.1:9200/records/_search?pretty=true +./scripts/server +firefox http://127.0.0.1:9200/records/_search?pretty=true ``` If, instead, we check what's in the database (using the Admin panel) we can see that the record has still the `$ref` field: ```bash # create an admin user -$ pipenv run invenio users create admin@invenio.org --password 123456 --active -$ pipenv run invenio roles add admin@invenio.org admin -$ firefox https://127.0.0.1:5000/admin/persistentidentifier/ +pipenv run invenio users create admin@invenio.org --password 123456 --active +pipenv run invenio roles add admin@invenio.org admin +firefox https://127.0.0.1:5000/admin/persistentidentifier/ ``` ## Bonus @@ -224,7 +226,7 @@ $ firefox https://127.0.0.1:5000/admin/persistentidentifier/ Did you notice that the `contributors_count` field is also showing up in the author records? ```bash -$ firefox http://127.0.0.1:9200/authors/_search?pretty=true +firefox http://127.0.0.1:9200/authors/_search?pretty=true ``` Can you guess why? How can we fix it? @@ -234,7 +236,7 @@ Can you guess why? How can we fix it? Invenio uses the [jsonresolver](https://github.com/inveniosoftware/jsonresolver) module to define and resolve references between records. The `$ref` URL is generated using an host domain defined by the config variable `JSONSCHEMA_HOST` (it is not meant to be an existing URL) to be able to avoid performing real HTTP request when resolving but instead calling a Flask route method implementation. Given that it is an internal reference, you should not expose this field and URL when returning the schema and the records through APIs. To avoid that, by default [invenio-records-rest](https://invenio-records-rest.readthedocs.io/en/latest/usage.html) JSON serializers defines `replace_refs=True` as parameter. -Moreover, as you saw, the URL will be hardcoded in each record. If you need to change it, you will have most probably to perform an update to all your records. +Moreover, as you saw, the URL will be hard-coded in each record. If you need to change it, you will have most probably to perform an update to all your records. To hide references and solve this problem, we recommend you to create your own record class and add the `author` field creation in the overridden `create` method. In this way, everything you create a record, the `$ref` is automatically generated. As reference, here an example: diff --git a/11-linking-records/solution/my-site/my_site/authors/mappings/v6/authors/author-v1.0.0.json b/11-linking-records/solution/my-site/my_site/authors/mappings/v6/authors/author-v1.0.0.json deleted file mode 100644 index e21f372..0000000 --- a/11-linking-records/solution/my-site/my_site/authors/mappings/v6/authors/author-v1.0.0.json +++ /dev/null @@ -1,29 +0,0 @@ -{ - "mappings": { - "author-v1.0.0": { - "date_detection": false, - "numeric_detection": false, - "properties": { - "$schema": { - "type": "text", - "index": false - }, - "id": { - "type": "keyword" - }, - "name": { - "type": "text" - }, - "organization": { - "type": "text" - }, - "_created": { - "type": "date" - }, - "_updated": { - "type": "date" - } - } - } - } -} diff --git a/11-linking-records/solution/my-site/my_site/authors/mappings/v6/__init__.py b/11-linking-records/solution/my-site/my_site/authors/mappings/v7/__init__.py similarity index 86% rename from 11-linking-records/solution/my-site/my_site/authors/mappings/v6/__init__.py rename to 11-linking-records/solution/my-site/my_site/authors/mappings/v7/__init__.py index 018003d..613350d 100644 --- a/11-linking-records/solution/my-site/my_site/authors/mappings/v6/__init__.py +++ b/11-linking-records/solution/my-site/my_site/authors/mappings/v7/__init__.py @@ -5,6 +5,6 @@ # My site is free software; you can redistribute it and/or modify it under # the terms of the MIT License; see LICENSE file for more details. -"""Mappings for Elasticsearch 5.x.""" +"""Mappings for Elasticsearch 7.x.""" from __future__ import absolute_import, print_function diff --git a/11-linking-records/solution/my-site/my_site/authors/mappings/v7/authors/author-v1.0.0.json b/11-linking-records/solution/my-site/my_site/authors/mappings/v7/authors/author-v1.0.0.json new file mode 100644 index 0000000..9736c14 --- /dev/null +++ b/11-linking-records/solution/my-site/my_site/authors/mappings/v7/authors/author-v1.0.0.json @@ -0,0 +1,27 @@ +{ + "mappings": { + "date_detection": false, + "numeric_detection": false, + "properties": { + "$schema": { + "type": "text", + "index": false + }, + "id": { + "type": "keyword" + }, + "name": { + "type": "text" + }, + "organization": { + "type": "text" + }, + "_created": { + "type": "date" + }, + "_updated": { + "type": "date" + } + } + } +} diff --git a/11-linking-records/solution/my-site/my_site/records/mappings/v6/records/record-v1.0.0.json b/11-linking-records/solution/my-site/my_site/records/mappings/v6/records/record-v1.0.0.json deleted file mode 100644 index be63be3..0000000 --- a/11-linking-records/solution/my-site/my_site/records/mappings/v6/records/record-v1.0.0.json +++ /dev/null @@ -1,79 +0,0 @@ -{ - "mappings": { - "record-v1.0.0": { - "date_detection": false, - "numeric_detection": false, - "properties": { - "$schema": { - "type": "text", - "index": false - }, - "title": { - "type": "text", - "copy_to": "suggest_title" - }, - "suggest_title": { - "type": "completion" - }, - "author": { - "type": "object", - "properties": { - "id": { - "type": "integer" - }, - "name": { - "type": "text" - } - } - }, - "id": { - "type": "keyword" - }, - "owner": { - "type": "integer" - }, - "publication_date": { - "type": "date", - "format": "date" - }, - "contributors_count": { - "type": "short" - }, - "contributors": { - "type": "object", - "properties": { - "ids": { - "type": "object", - "properties": { - "source": { - "type": "text" - }, - "value": { - "type": "keyword" - } - } - }, - "affiliations": { - "type": "text" - }, - "role": { - "type": "keyword" - }, - "email": { - "type": "text" - }, - "name": { - "type": "text" - } - } - }, - "_created": { - "type": "date" - }, - "_updated": { - "type": "date" - } - } - } - } -} diff --git a/11-linking-records/solution/my-site/my_site/records/mappings/v7/records/record-v1.0.0.json b/11-linking-records/solution/my-site/my_site/records/mappings/v7/records/record-v1.0.0.json new file mode 100644 index 0000000..4aac765 --- /dev/null +++ b/11-linking-records/solution/my-site/my_site/records/mappings/v7/records/record-v1.0.0.json @@ -0,0 +1,77 @@ +{ + "mappings": { + "date_detection": false, + "numeric_detection": false, + "properties": { + "$schema": { + "type": "text", + "index": false + }, + "title": { + "type": "text", + "copy_to": "suggest_title" + }, + "suggest_title": { + "type": "completion" + }, + "author": { + "type": "object", + "properties": { + "id": { + "type": "integer" + }, + "name": { + "type": "text" + } + } + }, + "id": { + "type": "keyword" + }, + "owner": { + "type": "integer" + }, + "publication_date": { + "type": "date", + "format": "date" + }, + "contributors_count": { + "type": "short" + }, + "contributors": { + "type": "object", + "properties": { + "ids": { + "type": "object", + "properties": { + "source": { + "type": "text" + }, + "value": { + "type": "keyword" + } + } + }, + "affiliations": { + "type": "text" + }, + "role": { + "type": "keyword" + }, + "email": { + "type": "text" + }, + "name": { + "type": "text" + } + } + }, + "_created": { + "type": "date" + }, + "_updated": { + "type": "date" + } + } + } +} diff --git a/15-application-architecture/README.md b/15-application-architecture/README.md index e69de29..f17d24b 100644 --- a/15-application-architecture/README.md +++ b/15-application-architecture/README.md @@ -0,0 +1 @@ +:construction: work in progress :construction: