From 6714dc08047335874939c54c5e86ab3707d55e82 Mon Sep 17 00:00:00 2001
From: Daniele Guido <gui.daniele@gmail.com>
Date: Wed, 16 Oct 2024 09:49:05 +0200
Subject: [PATCH] Fix/issue 28 (#29) hello world Datalab

* rephrase text

* remove individual authors

* make subtitle more visible

* fix notebook text

* Update CodeSnippet.tsx

* change notebook filename

* Update TermsOfUseModal.tsx

* Update CodeSnippet.tsx
---
 src/components/CodeSnippet.tsx                |  3 +-
 src/components/GettingStarted.tsx             |  2 +-
 src/components/TermsOfUseModal.tsx            |  2 +-
 src/components/Wall.tsx                       |  2 +-
 ...detect-news-agency-with-impresso-model.mdx | 14 +++++----
 src/content/notebooks/generic-entity-api.mdx  | 30 ++++++++++++++-----
 .../notebooks/impresso-py-collections.mdx     | 14 ++++++++-
 .../{setup.mdx => impresso-py-connect.mdx}    | 27 +++++++++++++----
 src/content/notebooks/impresso-py-maps.mdx    |  2 +-
 src/content/notebooks/impresso-py-network.mdx |  2 +-
 src/content/notebooks/impresso-py-search.mdx  |  2 +-
 src/content/series/enter-impresso-models.mdx  |  1 +
 src/content/series/enter-impresso.mdx         |  4 +--
 src/content/series/entities.mdx               |  5 ++--
 14 files changed, 77 insertions(+), 33 deletions(-)
 rename src/content/notebooks/{setup.mdx => impresso-py-connect.mdx} (94%)
diff --git a/src/components/CodeSnippet.tsx b/src/components/CodeSnippet.tsx
index 77c37ae..c959449 100644
--- a/src/components/CodeSnippet.tsx
+++ b/src/components/CodeSnippet.tsx
@@ -1,7 +1,6 @@
 import { useState, useRef, useEffect } from "react"
 import ReactCodeMirror, { EditorView } from "@uiw/react-codemirror"
 import type { ReactCodeMirrorRef } from "@uiw/react-codemirror"
-import { duotoneDark } from "@uiw/codemirror-theme-duotone"
 import { python } from "@codemirror/lang-python"
 import { Copy, CheckCircle } from "iconoir-react"
 import { createTheme } from "@uiw/codemirror-themes"
@@ -18,7 +17,7 @@ export interface CodeSnippetProps {
 const myTheme = createTheme({
   theme: "light",
   settings: {
-    background: "#fff9f2",
+    background: "#fff9f250",
     backgroundImage: "",
     foreground: "#75baff",
     caret: "#5d00ff",
diff --git a/src/components/GettingStarted.tsx b/src/components/GettingStarted.tsx
index 2beac1e..e119263 100644
--- a/src/components/GettingStarted.tsx
+++ b/src/components/GettingStarted.tsx
@@ -44,7 +44,7 @@ const GettingStarted = ({ className = "" }) => {
             <div className="badge bg-dark me-2 py-1 px-2 font-weight-extrabold text-primary">
               {startNumAfterOptionalSteps}
             </div>{" "}
-            Consult our terms of use
+            Accept our Terms of Use
           </Link>
         </li>
         <li>
diff --git a/src/components/TermsOfUseModal.tsx b/src/components/TermsOfUseModal.tsx
index c9c45cc..04faece 100644
--- a/src/components/TermsOfUseModal.tsx
+++ b/src/components/TermsOfUseModal.tsx
@@ -1,4 +1,4 @@
-import { useEffect, useRef, useState, type ChangeEvent } from "react"
+import { useEffect, useState, type ChangeEvent } from "react"
 import AcceptTermsOfUse from "./AcceptTermsOfUse"
 import Page from "./Page"
 import { Col, Container, Row } from "react-bootstrap"
diff --git a/src/components/Wall.tsx b/src/components/Wall.tsx
index 1e62136..46dd053 100644
--- a/src/components/Wall.tsx
+++ b/src/components/Wall.tsx
@@ -73,7 +73,7 @@ const Wall = ({
               <b>{numberOfAuthors}</b> authors.
             </p>
           </Col>
-          <Col md={4}>
+          <Col md={6} lg={6} xxl={5}>
             <h3>
               Join us in this early stage of development and help us to improve
               the platform.
diff --git a/src/content/notebooks/detect-news-agency-with-impresso-model.mdx b/src/content/notebooks/detect-news-agency-with-impresso-model.mdx
index a2cd98d..be7fffc 100644
--- a/src/content/notebooks/detect-news-agency-with-impresso-model.mdx
+++ b/src/content/notebooks/detect-news-agency-with-impresso-model.mdx
@@ -8,20 +8,19 @@ date: 2024-09-18T10:11:47Z
 googleColabUrl: https://colab.research.google.com/github/impresso/impresso-datalab-notebooks/blob/main/2-entity/NE_02_newsagencies.ipynb
 authors:
   - impresso-team
-seealso:
-  - setup
 ---
 
 {/* cell:0 cell_type:markdown */}
-Delivering swift and reliable news since the 1830s and 1840s, news agencies have played a pivotal role both nationally and internationally. However, understanding their precise impact on shaping news content has remained somewhat elusive. Our goal is to illuminate this aspect by identifying news agencies within historical newspaper articles. Using data from newspapers in Switzerland and Luxembourg as part of the impresso project, we've trained our pipeline to recognize these entities. 
+Delivering swift and reliable news since the 1830s and 1840s, news agencies have played a pivotal role both nationally and internationally. However, understanding their precise impact on shaping news content has remained somewhat elusive. Our goal is to illuminate this aspect by identifying news agencies within historical newspaper articles. Using data from newspapers in Switzerland and Luxembourg as part of the impresso project, we've trained our pipeline to recognize these entities.
 
 If you're here, you likely seek to detect news agency entities in your own text. This notebook will guide you through the process of setting up a workflow to identify specific newspaper or agency mentions within your text.
 
 {/* cell:1 cell_type:markdown */}
-Install necessary libraries (if not already installed) and 
+Install necessary libraries (if not already installed) and
 download the necessary NLTK data.
 
 {/* cell:2 cell_type:code */}
+
 ```python
 !pip install python-dotenv
 !pip install transformers
@@ -29,18 +28,20 @@ download the necessary NLTK data.
 ```
 
 {/* cell:3 cell_type:markdown */}
-*Note: This notebook requires `HF_TOKEN` to be set in the environment variables. You can get your token by signing up on the [Hugging Face website](https://huggingface.co/join) and read more in the [official documentation](https://huggingface.co/docs/huggingface_hub/v0.20.2/en/quick-start#environment-variable). We use [dotenv](https://pypi.org/project/python-dotenv/) library to load the HF_TOKEN value from a local .env file*
+_Note: This notebook requires `HF_TOKEN` to be set in the environment variables. You can get your token by signing up on the [Hugging Face website](https://huggingface.co/join) and read more in the [official documentation](https://huggingface.co/docs/huggingface_hub/v0.20.2/en/quick-start#environment-variable). We use [dotenv](https://pypi.org/project/python-dotenv/) library to load the HF_TOKEN value from a local .env file_
 
 {/* cell:4 cell_type:code */}
+
 ```python
 from dotenv import load_dotenv
 load_dotenv()  # take environment variables from .env.
 ```
 
 {/* cell:5 cell_type:markdown */}
-Now the fun part, this function will download the requried model and gives you the keys to successfullly detect news agencies in your text. 
+Now the fun part, this function will download the requried model and gives you the keys to successfullly detect news agencies in your text.
 
 {/* cell:6 cell_type:code */}
+
 ```python
 from transformers import is_torch_available
 from transformers import pipeline
@@ -56,6 +57,7 @@ nlp = pipeline("newsagency-ner", model="impresso-project/bert-newsagency-ner-fr"
 Run the example below to see how it works.
 
 {/* cell:8 cell_type:code */}
+
 ```python
 # Example
 text = "Mon nom est François et j'habite à Paris. (Reuter)"
diff --git a/src/content/notebooks/generic-entity-api.mdx b/src/content/notebooks/generic-entity-api.mdx
index 48243cb..e896632 100644
--- a/src/content/notebooks/generic-entity-api.mdx
+++ b/src/content/notebooks/generic-entity-api.mdx
@@ -2,7 +2,7 @@
 githubUrl: https://github.com/impresso/impresso-datalab-notebooks/blob/main/2-entity/generic-entity-api.ipynb
 authors:
   - impresso-team
-  - EmanuelaBoros
+#   - EmanuelaBoros
 title: Detect Entities and Link them to Wikipedia and Wikidata in a Text through
   the Impresso API
 sha: 54802fcabc0e32a4a05a1b4f2761a54b9807b0c5
@@ -14,10 +14,13 @@ googleColabUrl: https://colab.research.google.com/github/impresso/impresso-datal
 Named entities such as organizations, locations, persons, and temporal expressions play a crucial role in the comprehension and analysis of both historical and contemporary texts. The HIPE-2022 project focuses on named entity recognition and classification (NERC) and entity linking (EL) in multilingual historical documents.
 
 ### About HIPE-2022
+
 HIPE-2022 involves processing diverse datasets from historical newspapers and classical commentaries, spanning approximately 200 years and multiple languages. The primary goal is to confront systems with challenges related to multilinguality, domain-specific entities, and varying annotation tag sets.
 
 ### Datasets
+
 The HIPE-2022 datasets are based on six primary datasets, but this model was only trained on **hipe2020** in French and German.
+
 - **ajmc**: Classical commentaries in German, French, and English.
 - **hipe2020**: Historical newspapers in German, French, and English.
 - **letemps**: Historical newspapers in French.
@@ -26,6 +29,7 @@ The HIPE-2022 datasets are based on six primary datasets, but this model was onl
 - **sonar**: Historical newspapers in German.
 
 ### Annotation Types and Levels
+
 HIPE-2022 employs an IOB tagging scheme (inside-outside-beginning format) for entity annotations. The annotation levels include:
 
 1. **TOKEN**: The annotated token.
@@ -37,6 +41,7 @@ HIPE-2022 employs an IOB tagging scheme (inside-outside-beginning format) for en
 7. **NE-NESTED**: Coarse type of the nested entity.
 
 ### Getting Started
+
 This notebook will guide you through setting up a workflow to identify named entities within your text using the HIPE-2022 trained pipeline. By leveraging this pipeline, you can detect mentions of people, places, organizations, and temporal expressions, enhancing your analysis and understanding of historical and contemporary documents.
 
 ---
@@ -45,10 +50,11 @@ This updated description provides a clear overview of the HIPE-2022 project's go
 *Note: This notebook *might* require `HF_TOKEN` to be set in the environment variables. You can get your token by signing up on the [Hugging Face website](https://huggingface.co/join) and read more in the [official documentation](https://huggingface.co/docs/huggingface_hub/v0.20.2/en/quick-start#environment-variable)*
 
 {/* cell:1 cell_type:markdown */}
-Install necessary libraries (if not already installed) and 
+Install necessary libraries (if not already installed) and
 download the necessary NLTK data.
 
 {/* cell:2 cell_type:code */}
+
 ```python
 !pip install transformers
 !pip install nltk
@@ -56,12 +62,13 @@ download the necessary NLTK data.
 ```
 
 {/* cell:3 cell_type:code */}
+
 ```python
 def print_nicely(results, text):
     # Print the timestamp and system ID
     print(f"Timestamp: {results.get('ts')}")
     print(f"System ID: {results.get('sys_id')}")
-    
+
     entities = results.get('nes', [])
     if entities:
         print(f"\n{'Entity':<20} {'Type':<15} {'Confidence NER':<15} {'Confidence NEL':<15} {'Start':<5} {'End':<5} {'Wikidata ID':<10} {'Wikipedia Page':<20}")
@@ -72,7 +79,7 @@ def print_nicely(results, text):
             wkd_id = entity.get('wkd_id', 'N/A')
             wkpedia_pagename = entity.get('wkpedia_pagename', 'N/A')
             print(f"{entity['surface']:<20} {entity['type']:<15} {confidence_ner:<15} {confidence_nel:<15} {entity['lOffset']:<5} {entity['rOffset']:<5} {wkd_id:<10} {wkpedia_pagename:<20}")
-        
+
         print("*" * 100)
         print('Testing offsets:')
         print("*" * 100)
@@ -84,7 +91,7 @@ def print_nicely(results, text):
             wkd_id = entity.get('wkd_id', 'N/A')
             wkpedia_pagename = entity.get('wkpedia_pagename', 'N/A')
             print(f"{text[entity['lOffset']:entity['rOffset']]:<20} {entity['type']:<15} {confidence_ner:<15} {confidence_nel:<15} {entity['lOffset']:<5} {entity['rOffset']:<5} {wkd_id:<10} {wkpedia_pagename:<20}")
-            
+
         print("*" * 100)
         print('Testing offsets in the returned text:')
         print("*" * 100)
@@ -96,14 +103,15 @@ def print_nicely(results, text):
             wkd_id = entity.get('wkd_id', 'N/A')
             wkpedia_pagename = entity.get('wkpedia_pagename', 'N/A')
             print(f"{results['text'][entity['lOffset']:entity['rOffset']]:<20} {entity['type']:<15} {confidence_ner:<15} {confidence_nel:<15} {entity['lOffset']:<5} {entity['rOffset']:<5} {wkd_id:<10} {wkpedia_pagename:<20}")
-            
+
 
 ```
 
 {/* cell:4 cell_type:markdown */}
-Now the fun part, this function will download the requried model and gives you the keys to successfullly detect entities in your text. 
+Now the fun part, this function will download the requried model and gives you the keys to successfullly detect entities in your text.
 
 {/* cell:5 cell_type:code */}
+
 ```python
 from utils import get_linked_entities
 import requests
@@ -117,41 +125,49 @@ for sentence in sentences:
 ```
 
 {/* cell:6 cell_type:code */}
+
 ```python
 
 ```
 
 {/* cell:7 cell_type:code */}
+
 ```python
 
 ```
 
 {/* cell:8 cell_type:code */}
+
 ```python
 
 ```
 
 {/* cell:9 cell_type:code */}
+
 ```python
 
 ```
 
 {/* cell:10 cell_type:code */}
+
 ```python
 
 ```
 
 {/* cell:11 cell_type:code */}
+
 ```python
 
 ```
 
 {/* cell:12 cell_type:code */}
+
 ```python
 
 ```
 
 {/* cell:13 cell_type:code */}
+
 ```python
 
 ```
diff --git a/src/content/notebooks/impresso-py-collections.mdx b/src/content/notebooks/impresso-py-collections.mdx
index bdb3935..af4faf8 100644
--- a/src/content/notebooks/impresso-py-collections.mdx
+++ b/src/content/notebooks/impresso-py-collections.mdx
@@ -1,7 +1,8 @@
 ---
 githubUrl: https://github.com/impresso/impresso-py/blob/main/examples/notebooks/collections.ipynb
 authors:
-  - RomanKalyakin
+  # - RomanKalyakin
+  - impresso-team
 title: Search collections
 sha: fbebc19629cfc008a085283e61c0669de326add9
 date: 2024-09-18T15:04:39Z
@@ -9,6 +10,7 @@ googleColabUrl: https://colab.research.google.com/github/impresso/impresso-py/bl
 ---
 
 {/* cell:0 cell_type:code */}
+
 ```python
 from impresso import connect
 
@@ -16,28 +18,33 @@ impresso = connect()
 ```
 
 {/* cell:1 cell_type:code */}
+
 ```python
 result = impresso.collections.find()
 result
 ```
 
 {/* cell:2 cell_type:markdown */}
+
 # Get collection
 
 Get metadata of a colection by its ID.
 
 {/* cell:3 cell_type:code */}
+
 ```python
 result = impresso.collections.get("local-roka-tOrwrOG3")
 result
 ```
 
 {/* cell:4 cell_type:markdown */}
+
 ## Get collection items
 
 Get items from a collection by its ID.
 
 {/* cell:5 cell_type:code */}
+
 ```python
 colection_id = result.raw["uid"]
 items = impresso.collections.items(colection_id)
@@ -45,23 +52,28 @@ items
 ```
 
 {/* cell:6 cell_type:markdown */}
+
 ## Remove items from collection
 
 {/* cell:7 cell_type:code */}
+
 ```python
 item_id = items.pydantic.data[0].uid
 item_id
 ```
 
 {/* cell:8 cell_type:code */}
+
 ```python
 impresso.collections.remove_items(colection_id, [item_id])
 ```
 
 {/* cell:9 cell_type:markdown */}
+
 ## Add items to collection
 
 {/* cell:10 cell_type:code */}
+
 ```python
 impresso.collections.add_items(colection_id, [item_id])
 ```
diff --git a/src/content/notebooks/setup.mdx b/src/content/notebooks/impresso-py-connect.mdx
similarity index 94%
rename from src/content/notebooks/setup.mdx
rename to src/content/notebooks/impresso-py-connect.mdx
index fdd9fe5..1589af6 100644
--- a/src/content/notebooks/setup.mdx
+++ b/src/content/notebooks/impresso-py-connect.mdx
@@ -1,7 +1,7 @@
 ---
-title: Initialize Impresso Client
+title: How to connect to the API
 excerpt: This is the first notebook in the Enter Impresso series.
-githubUrl: https://github.com/impresso/impresso-py/blob/main/examples/notebooks/basic.ipynb
+githubUrl: https://github.com/impresso/impresso-datalab-notebooks/blob/main/1-starter/ST_01_basics.ipynb
 tags:
   - hello-world
 binderUrl: https://mybinder.org/v2/gh/binder-examples/r/master?urlpath=rstudio
@@ -9,12 +9,13 @@ authors:
   - impresso-team
 date: 2024-09-18T15:04:39Z
 seealso:
-  - detect-news-agency-with-impresso-model
+  - impresso-py-search
 sha: fbebc19629cfc008a085283e61c0669de326add9
 googleColabUrl: https://colab.research.google.com/github/impresso/impresso-py/blob/main/examples/notebooks/basic.ipynb
 ---
 
 {/* cell:0 cell_type:code */}
+
 ```python
 from impresso import connect
 
@@ -22,14 +23,15 @@ impresso = connect()
 ```
 
 {/* cell:1 cell_type:markdown */}
+
 ## Search articles
 
 In this notebook, we will search for articles that contain the term "European Union" in the text. The results are ordered by date.
 
 Below the result container is rendered as an overview of what it contains.
 
-
 {/* cell:2 cell_type:code */}
+
 ```python
 result = impresso.search.find(
     q="European Union",
@@ -46,6 +48,7 @@ The `pydantic` property is a [Pydantic](https://docs.pydantic.dev/latest/) model
 We use the `data` property of the response to iterate over the page of the results and return excerpts of the articles that contain the search term.
 
 {/* cell:4 cell_type:code */}
+
 ```python
 result = impresso.search.find(
     q="European Union",
@@ -59,36 +62,45 @@ for article in result.pydantic.data[:3]:
 There are several useful properties on the result object that let us know the total nubmer of results found, the current page and its size.
 
 {/* cell:6 cell_type:code */}
+
 ```python
 print("%i results were found for this term. The current result object contains %i items starting from the item number %i" % (result.total, result.size, result.offset))
 ```
 
 {/* cell:7 cell_type:markdown */}
+
 ### Pydantic
+
 The full response from the Impresso API as a pydantic model.
 
 {/* cell:8 cell_type:code */}
+
 ```python
 result.pydantic
 ```
 
 {/* cell:9 cell_type:markdown */}
+
 ### Pandas
-We can also get the search results as a [Pandas](https://pandas.pydata.org/) DataFrame. 
+
+We can also get the search results as a [Pandas](https://pandas.pydata.org/) DataFrame.
 This allows us to easily manipulate and analyze the data using pandas' powerful data manipulation capabilities.
 
 {/* cell:10 cell_type:code */}
+
 ```python
 df = result.df
 df.head(2)
 ```
 
 {/* cell:11 cell_type:markdown */}
+
 ## Get an article
 
 Below we will use the `articles` resource to get an article by its ID:
 
 {/* cell:12 cell_type:code */}
+
 ```python
 article = impresso.articles.get("NZZ-1794-08-09-a-i0002")
 article
@@ -98,23 +110,26 @@ article
 We can also get it as a Pydantic model or as a DataFrame.
 
 {/* cell:14 cell_type:code */}
+
 ```python
 article.pydantic.excerpt
 ```
 
 {/* cell:15 cell_type:code */}
+
 ```python
 article.df[['uid', 'country', 'language']]
 
 ```
 
 {/* cell:16 cell_type:markdown */}
+
 ## Search facets
 
 In this cell, we will search for facets related to the term "fromage" in the Impresso collection. This is a convenient way to see a breakdown of the search results by country.
 
-
 {/* cell:17 cell_type:code */}
+
 ```python
 country_facet = impresso.search.facet("country", q="fromage")
 country_facet.df
diff --git a/src/content/notebooks/impresso-py-maps.mdx b/src/content/notebooks/impresso-py-maps.mdx
index 5523c11..d4b4286 100644
--- a/src/content/notebooks/impresso-py-maps.mdx
+++ b/src/content/notebooks/impresso-py-maps.mdx
@@ -3,7 +3,7 @@ title: Exploring impresso with maps
 githubUrl: https://github.com/impresso/impresso-datalab-notebooks/blob/main/4-impresso-py/maps_explore.ipynb
 authors:
   - impresso-team
-  - RomanKalyakin
+  # - RomanKalyakin
 sha: 168c669246385a2ec6c3e088b0081364f129d11c
 date: 2024-09-27T12:54:12Z
 googleColabUrl: https://colab.research.google.com/github/impresso/impresso-datalab-notebooks/blob/main/4-impresso-py/maps_explore.ipynb
diff --git a/src/content/notebooks/impresso-py-network.mdx b/src/content/notebooks/impresso-py-network.mdx
index b56ce8f..07836df 100644
--- a/src/content/notebooks/impresso-py-network.mdx
+++ b/src/content/notebooks/impresso-py-network.mdx
@@ -3,7 +3,7 @@ title: Network graph with Impresso Py
 githubUrl: https://github.com/impresso/impresso-datalab-notebooks/blob/main/4-impresso-py/network_graph.ipynb
 authors:
   - impresso-team
-  - RomanKalyakin
+  # - RomanKalyakin
 sha: 168c669246385a2ec6c3e088b0081364f129d11c
 date: 2024-09-27T12:54:12Z
 googleColabUrl: https://colab.research.google.com/github/impresso/impresso-datalab-notebooks/blob/main/4-impresso-py/network_graph.ipynb
diff --git a/src/content/notebooks/impresso-py-search.mdx b/src/content/notebooks/impresso-py-search.mdx
index 6229d52..e750ff7 100644
--- a/src/content/notebooks/impresso-py-search.mdx
+++ b/src/content/notebooks/impresso-py-search.mdx
@@ -2,7 +2,7 @@
 githubUrl: https://github.com/impresso/impresso-py/blob/main/examples/notebooks/search.ipynb
 authors:
   - impresso-team
-  - RomanKalyakin
+  # - RomanKalyakin
 seealso:
   - impresso-py-collections
 title: Search
diff --git a/src/content/series/enter-impresso-models.mdx b/src/content/series/enter-impresso-models.mdx
index 2599429..fad32df 100644
--- a/src/content/series/enter-impresso-models.mdx
+++ b/src/content/series/enter-impresso-models.mdx
@@ -2,6 +2,7 @@
 title: Enrich your Data with Impresso Models
 excerpt: "Use Impresso’s models for the semantic indexation of your personal data collections "
 notebooks:
+  - generic-entity-api
   - detect-news-agency-with-impresso-model
 ---
 
diff --git a/src/content/series/enter-impresso.mdx b/src/content/series/enter-impresso.mdx
index c9707cb..1dc5084 100644
--- a/src/content/series/enter-impresso.mdx
+++ b/src/content/series/enter-impresso.mdx
@@ -1,8 +1,8 @@
 ---
 title: Getting Started
-excerpt: "Three easy steps to enter the impresso way of doing research."
+excerpt: "Three simple steps to begin research with Impresso."
 notebooks:
-  - setup
+  - impresso-py-connect
 ---
 
 Create an Impresso account and learn how to access our API.
diff --git a/src/content/series/entities.mdx b/src/content/series/entities.mdx
index 0d5eefc..5886ccc 100644
--- a/src/content/series/entities.mdx
+++ b/src/content/series/entities.mdx
@@ -1,10 +1,9 @@
 ---
-title: Explore and Visualise your Data
-excerpt: "Use Impresso notebook templates as starting points for your analysis."
+title: Explore and Visualise your Impresso Data
+excerpt: "Notebook templates offer complementary views on your Impresso personal collections and external datasets beyond the capabilities of the Impresso Web App."
 notebooks:
   - impresso-py-maps
   - impresso-py-network
-  - generic-entity-api
 category:
   - explorations
 position: central-column