From 677b424374779d5b3f664b3efa2c2d715541fe0b Mon Sep 17 00:00:00 2001 From: Chris Topaloudis Date: Thu, 14 May 2020 10:43:23 +0200 Subject: [PATCH] data models scratch: review and update for Invenio v3.2 and ES 7 --- 08-data-models-from-scratch/README.md | 16 ++--- .../my_site/authors/mappings/__init__.py | 2 - .../mappings/v6/authors/author-v1.0.0.json | 29 -------- .../authors/mappings/{v6 => v7}/__init__.py | 4 +- .../mappings/v7/authors/author-v1.0.0.json | 27 ++++++++ .../my_site/authors/mappings/__init__.py | 2 - .../my_site/authors/mappings/v7/__init__.py | 4 +- .../mappings/v7/authors/author-v1.0.0.json | 46 ++++++------- .../mappings/v6/records/record-v1.0.0.json | 68 ------------------- .../mappings/v7/records/record-v1.0.0.json | 66 ++++++++++++++++++ 10 files changed, 123 insertions(+), 141 deletions(-) delete mode 100644 08-data-models-from-scratch/author_module/my_site/authors/mappings/v6/authors/author-v1.0.0.json rename 08-data-models-from-scratch/author_module/my_site/authors/mappings/{v6 => v7}/__init__.py (67%) create mode 100644 08-data-models-from-scratch/author_module/my_site/authors/mappings/v7/authors/author-v1.0.0.json delete mode 100644 08-data-models-from-scratch/solution/my-site/my_site/records/mappings/v6/records/record-v1.0.0.json create mode 100644 08-data-models-from-scratch/solution/my-site/my_site/records/mappings/v7/records/record-v1.0.0.json diff --git a/08-data-models-from-scratch/README.md b/08-data-models-from-scratch/README.md index 2f8838e..3372349 100644 --- a/08-data-models-from-scratch/README.md +++ b/08-data-models-from-scratch/README.md @@ -4,7 +4,7 @@ In this session we will learn how to build a new data model from scratch. During process we will see how to create a new REST **module** for our model and provide functionalities such as storing and searching. -### Table of contents +## Table of contents - [Step 1: Bootstrap exercise](#step-1-bootstrap-exercise) - [Step 2: Create an Authors flask extension](#step-2-create-an-Authors-flask-extension) @@ -42,6 +42,7 @@ First thing we need to do is to create an extension called `Authors` and registe - Uncomment the code we find in the `my_site/authors/ext.py` - Uncomment in the `setup.py` the following section: + ```diff 'invenio_base.api_apps': [ 'my_site = my_site.records:Mysite', @@ -56,7 +57,6 @@ First thing we need to do is to create an extension called `Authors` and registe Now that we have our extension registered, we need to tell Invenio how the internal representation of our data model is. To do so, we use [a JSONSchema](author_module/my_site/authors/jsonschemas/authors/author-v1.0.0.json) and [an Elasticsearch mapping](author_module/my_site/authors/mappings/v7/authors/author-v1.0.0.json): the former to validate the internal JSON format and the latter to tell Elasticsearch what shape our data model has so it can handle correctly its values. - ### Actions - Uncomment the entrypoints in `setup.py`: @@ -83,7 +83,6 @@ Now that we have our extension registered, we need to tell Invenio how the inter By doing this we told Invenio to register our new schema and mapping. We are also defining the name of the Elasticsearch index which will be created to enable author search. - ## Step 4: External representation: loaders and serializers So far we have a new extension which defines how our data model is **stored** and **searchable**, but have not yet provided means to transform this data when it's received or served by Invenio. To do so, we will introduce two new concepts: **loaders** whose responsibility is to transform incoming data to the internal format, and **serializers** which will be in charge of transforming the internal data to a different format, based on our needs. @@ -142,7 +141,6 @@ During the first step, we registered our **loader** in the configuration of our In the upcoming steps, we created and registered our serializers. We split them into two categories: **Record serializers** and **Search serializers**. The first is used to **serialize** the internal representation of one specific record (e.g author) while the latter is transforming each record result of a search. They are capable of doing that by using again a `Marshmallow` schema which we will explain in detail in the next section. - ## Step 5: Data validation: Marshmallow In the previous section we have configured loaders and serializers but we also started to configure our first validation check by making reference to two Marshmallow schemas. These schemas will make sure that the data has the correct format both when it arrives to the system and when it is returned to the user. @@ -153,7 +151,6 @@ In the previous section we have configured loaders and serializers but we also s Here we have added two classes which we made reference in the previous step, `AuthorMetadataSchemaV1` and `AuthorSchemaV1`. The first will take care of validating incoming author metadata and the second will take care of validating the author output format. Marshmallow is not mandatory, but highly recommended since it can do from simple validations to complex ones, for more information visit [Marshmallow documentation](https://marshmallow.readthedocs.io/en/2.x-line/). - ## Step 6: Persistent identifiers So far we have only cared about our content and its format, but we need to provide a way to retrieve our records. We are doing this by using PIDs, and the difference with normal IDs is that they do not change over time to avoid broken references. @@ -195,7 +192,6 @@ This is how we are registering our new minter and fetcher making them available. **Important**: the value of the `pid_minter` and the `pid_fetcher` defined in `config.py` should match exactly with the entrypoint names defined in `setup.py`. Also, we should make sure that the `pid_type` value and the `RECORDS_REST_ENDPOINTS` endpoint key match exactly. - ## Step 7: Create an author In order to reflect our changes in the database and Elasticsearch but also to register our new entrypoints in Invenio we need to run the following commands: @@ -211,7 +207,7 @@ We can now create new authors: ```bash $ curl -k --header "Content-Type: application/json" \ --request POST \ - --data '{"name":"Zacharias"}' \ + --data '{"name":"John Doe"}' \ https://127.0.0.1:5000/api/authors/\?prettyprint\=1 { @@ -219,7 +215,7 @@ $ curl -k --header "Content-Type: application/json" \ "id": "1", "metadata": { "id": "1", - "name": "Zacharias" + "name": "John Doe" }, "updated": "2019-03-17T16:01:07.148181+00:00" } @@ -235,7 +231,7 @@ $ curl -k "https://127.0.0.1:5000/api/authors/?prettyprint=1" "buckets": [ { "doc_count": 1, - "key": "Zacharias" + "key": "John Doe" } ], "doc_count_error_upper_bound": 0, @@ -249,7 +245,7 @@ $ curl -k "https://127.0.0.1:5000/api/authors/?prettyprint=1" "id": "1", "metadata": { "id": "1", - "name": "Zacharias" + "name": "John Doe" }, "updated": "2019-03-17T15:55:53.927761+00:00" } diff --git a/08-data-models-from-scratch/author_module/my_site/authors/mappings/__init__.py b/08-data-models-from-scratch/author_module/my_site/authors/mappings/__init__.py index ac73b6f..292b154 100644 --- a/08-data-models-from-scratch/author_module/my_site/authors/mappings/__init__.py +++ b/08-data-models-from-scratch/author_module/my_site/authors/mappings/__init__.py @@ -12,5 +12,3 @@ in Elasticsearch. You need to provide one mapping per major version of Elasticsearch you want to support. """ - -from __future__ import absolute_import, print_function diff --git a/08-data-models-from-scratch/author_module/my_site/authors/mappings/v6/authors/author-v1.0.0.json b/08-data-models-from-scratch/author_module/my_site/authors/mappings/v6/authors/author-v1.0.0.json deleted file mode 100644 index 4b8d9a1..0000000 --- a/08-data-models-from-scratch/author_module/my_site/authors/mappings/v6/authors/author-v1.0.0.json +++ /dev/null @@ -1,29 +0,0 @@ -{ - "mappings": { - "author-v1.0.0": { - "date_detection": false, - "numeric_detection": false, - "properties": { - "$schema": { - "type": "text", - "index": false - }, - "id": { - "type": "keyword" - }, - "name": { - "type": "keyword" - }, - "organization": { - "type": "keyword" - }, - "_created": { - "type": "date" - }, - "_updated": { - "type": "date" - } - } - } - } -} diff --git a/08-data-models-from-scratch/author_module/my_site/authors/mappings/v6/__init__.py b/08-data-models-from-scratch/author_module/my_site/authors/mappings/v7/__init__.py similarity index 67% rename from 08-data-models-from-scratch/author_module/my_site/authors/mappings/v6/__init__.py rename to 08-data-models-from-scratch/author_module/my_site/authors/mappings/v7/__init__.py index 018003d..5233dc2 100644 --- a/08-data-models-from-scratch/author_module/my_site/authors/mappings/v6/__init__.py +++ b/08-data-models-from-scratch/author_module/my_site/authors/mappings/v7/__init__.py @@ -5,6 +5,4 @@ # My site is free software; you can redistribute it and/or modify it under # the terms of the MIT License; see LICENSE file for more details. -"""Mappings for Elasticsearch 5.x.""" - -from __future__ import absolute_import, print_function +"""Mappings for Elasticsearch 7.x.""" diff --git a/08-data-models-from-scratch/author_module/my_site/authors/mappings/v7/authors/author-v1.0.0.json b/08-data-models-from-scratch/author_module/my_site/authors/mappings/v7/authors/author-v1.0.0.json new file mode 100644 index 0000000..fe91e88 --- /dev/null +++ b/08-data-models-from-scratch/author_module/my_site/authors/mappings/v7/authors/author-v1.0.0.json @@ -0,0 +1,27 @@ +{ + "mappings": { + "date_detection": false, + "numeric_detection": false, + "properties": { + "$schema": { + "type": "text", + "index": false + }, + "id": { + "type": "keyword" + }, + "name": { + "type": "keyword" + }, + "organization": { + "type": "keyword" + }, + "_created": { + "type": "date" + }, + "_updated": { + "type": "date" + } + } + } +} \ No newline at end of file diff --git a/08-data-models-from-scratch/solution/my-site/my_site/authors/mappings/__init__.py b/08-data-models-from-scratch/solution/my-site/my_site/authors/mappings/__init__.py index ac73b6f..292b154 100644 --- a/08-data-models-from-scratch/solution/my-site/my_site/authors/mappings/__init__.py +++ b/08-data-models-from-scratch/solution/my-site/my_site/authors/mappings/__init__.py @@ -12,5 +12,3 @@ in Elasticsearch. You need to provide one mapping per major version of Elasticsearch you want to support. """ - -from __future__ import absolute_import, print_function diff --git a/08-data-models-from-scratch/solution/my-site/my_site/authors/mappings/v7/__init__.py b/08-data-models-from-scratch/solution/my-site/my_site/authors/mappings/v7/__init__.py index 018003d..5233dc2 100644 --- a/08-data-models-from-scratch/solution/my-site/my_site/authors/mappings/v7/__init__.py +++ b/08-data-models-from-scratch/solution/my-site/my_site/authors/mappings/v7/__init__.py @@ -5,6 +5,4 @@ # My site is free software; you can redistribute it and/or modify it under # the terms of the MIT License; see LICENSE file for more details. -"""Mappings for Elasticsearch 5.x.""" - -from __future__ import absolute_import, print_function +"""Mappings for Elasticsearch 7.x.""" diff --git a/08-data-models-from-scratch/solution/my-site/my_site/authors/mappings/v7/authors/author-v1.0.0.json b/08-data-models-from-scratch/solution/my-site/my_site/authors/mappings/v7/authors/author-v1.0.0.json index e21f372..6912023 100644 --- a/08-data-models-from-scratch/solution/my-site/my_site/authors/mappings/v7/authors/author-v1.0.0.json +++ b/08-data-models-from-scratch/solution/my-site/my_site/authors/mappings/v7/authors/author-v1.0.0.json @@ -1,29 +1,27 @@ { "mappings": { - "author-v1.0.0": { - "date_detection": false, - "numeric_detection": false, - "properties": { - "$schema": { - "type": "text", - "index": false - }, - "id": { - "type": "keyword" - }, - "name": { - "type": "text" - }, - "organization": { - "type": "text" - }, - "_created": { - "type": "date" - }, - "_updated": { - "type": "date" - } + "date_detection": false, + "numeric_detection": false, + "properties": { + "$schema": { + "type": "text", + "index": false + }, + "id": { + "type": "keyword" + }, + "name": { + "type": "text" + }, + "organization": { + "type": "text" + }, + "_created": { + "type": "date" + }, + "_updated": { + "type": "date" } } } -} +} \ No newline at end of file diff --git a/08-data-models-from-scratch/solution/my-site/my_site/records/mappings/v6/records/record-v1.0.0.json b/08-data-models-from-scratch/solution/my-site/my_site/records/mappings/v6/records/record-v1.0.0.json deleted file mode 100644 index 33f4040..0000000 --- a/08-data-models-from-scratch/solution/my-site/my_site/records/mappings/v6/records/record-v1.0.0.json +++ /dev/null @@ -1,68 +0,0 @@ -{ - "mappings": { - "record-v1.0.0": { - "date_detection": false, - "numeric_detection": false, - "properties": { - "$schema": { - "type": "text", - "index": false - }, - "title": { - "type": "text", - "copy_to": "suggest_title" - }, - "suggest_title": { - "type": "completion" - }, - "id": { - "type": "keyword" - }, - "owner": { - "type": "integer" - }, - "keywords": { - "type": "keyword" - }, - "publication_date": { - "type": "date", - "format": "date" - }, - "contributors": { - "type": "object", - "properties": { - "ids": { - "type": "object", - "properties": { - "source": { - "type": "text" - }, - "value": { - "type": "keyword" - } - } - }, - "affiliations": { - "type": "text" - }, - "role": { - "type": "keyword" - }, - "email": { - "type": "text" - }, - "name": { - "type": "text" - } - } - }, - "_created": { - "type": "date" - }, - "_updated": { - "type": "date" - } - } - } - } -} diff --git a/08-data-models-from-scratch/solution/my-site/my_site/records/mappings/v7/records/record-v1.0.0.json b/08-data-models-from-scratch/solution/my-site/my_site/records/mappings/v7/records/record-v1.0.0.json new file mode 100644 index 0000000..e95e978 --- /dev/null +++ b/08-data-models-from-scratch/solution/my-site/my_site/records/mappings/v7/records/record-v1.0.0.json @@ -0,0 +1,66 @@ +{ + "mappings": { + "date_detection": false, + "numeric_detection": false, + "properties": { + "$schema": { + "type": "text", + "index": false + }, + "title": { + "type": "text", + "copy_to": "suggest_title" + }, + "suggest_title": { + "type": "completion" + }, + "id": { + "type": "keyword" + }, + "owner": { + "type": "integer" + }, + "keywords": { + "type": "keyword" + }, + "publication_date": { + "type": "date", + "format": "date" + }, + "contributors": { + "type": "object", + "properties": { + "ids": { + "type": "object", + "properties": { + "source": { + "type": "text" + }, + "value": { + "type": "keyword" + } + } + }, + "affiliations": { + "type": "text" + }, + "role": { + "type": "keyword" + }, + "email": { + "type": "text" + }, + "name": { + "type": "text" + } + } + }, + "_created": { + "type": "date" + }, + "_updated": { + "type": "date" + } + } + } +} \ No newline at end of file