From 988efcecb759043ff673eb4ea1665baafe04da46 Mon Sep 17 00:00:00 2001 From: donsiamese Date: Fri, 15 Nov 2024 12:20:03 +0100 Subject: [PATCH] Add badge to report issue (#53) * fix #48 * integrated lang model indicator * fix #49 --- src/components/NotebookCard.tsx | 19 +++++++- src/components/NotebookViewer.tsx | 27 +++++++++--- src/content/config.ts | 3 +- .../notebooks/impresso-py-collections.mdx | 1 + src/content/notebooks/impresso-py-connect.mdx | 1 + src/content/notebooks/impresso-py-maps.mdx | 1 + src/content/notebooks/impresso-py-network.mdx | 36 +++++++++++++-- .../ne-processing-with-impresso-api.mdx | 44 +++++++++++++------ ...newsagency-processing-with-impresso-hf.mdx | 31 ++++++++----- src/styles/global.css | 26 +++++++++++ 10 files changed, 153 insertions(+), 36 deletions(-) diff --git a/src/components/NotebookCard.tsx b/src/components/NotebookCard.tsx index 5c033b3..84032eb 100644 --- a/src/components/NotebookCard.tsx +++ b/src/components/NotebookCard.tsx @@ -4,11 +4,13 @@ import Link from "./Link.tsx" import Avatar from "boring-avatars" import { ArrowRight } from "iconoir-react" import { DateTime } from "luxon" +import { OverlayTrigger, Tooltip } from "react-bootstrap" export interface Notebook { slug: string href: string title: string + langModel?: string excerpt?: string githubUrl?: string googleColabUrl?: string @@ -32,20 +34,33 @@ const NotebookCard: React.FC<{ accessDateTime, "- title:", notebook?.title, + "notebook.langModel", + notebook.langModel ) return (
-
+
+
+ + Model language tag + + } + > + {notebook.langModel} + +
- + Open In Colab = ({ -
- By{" "} - {notebook.authors.map((author) => ( - - ))} +
+
+ By{" "} + {notebook.authors.map((author) => ( + + ))} +
+ {notebook.langModel && ( +
+

Language model is in: 

+ + Language model tag + + } + > + {notebook.langModel} + +
+ )}
{notebook.googleColabUrl ? ( diff --git a/src/content/config.ts b/src/content/config.ts index 74d19af..c20e637 100644 --- a/src/content/config.ts +++ b/src/content/config.ts @@ -12,6 +12,7 @@ const notebooks = defineCollection({ schema: z.object({ title: z.string().optional(), url: z.string().url().optional(), + langModel: z.string().optional(), githubUrl: z.string().url().optional(), googleColabUrl: z.string().url().optional(), sha: z.string().optional(), @@ -42,7 +43,7 @@ const plans = defineCollection({ status: z.string().optional(), iconColor: z.string().optional(), icon: z.enum(PlanIcons as any).optional(), - }), + }) ) .optional(), requirements: z.array(z.enum(Requirements as any)), diff --git a/src/content/notebooks/impresso-py-collections.mdx b/src/content/notebooks/impresso-py-collections.mdx index 3fe41c9..c3c81f7 100644 --- a/src/content/notebooks/impresso-py-collections.mdx +++ b/src/content/notebooks/impresso-py-collections.mdx @@ -5,6 +5,7 @@ authors: title: Search collections sha: 4a05f4772be7279de1908f46c93dc12de334d112 date: 2024-10-11T07:37:06Z +langModel: En googleColabUrl: https://colab.research.google.com/github/impresso/impresso-py/blob/main/examples/notebooks/collections.ipynb links: [] excerpt: The notebook shows how to find collections associated with your Impresso account and retrieve details about specific collections using their IDs. Then it demonstrates how to search, get, add, and remove items from collections using the Impresso Python Library. diff --git a/src/content/notebooks/impresso-py-connect.mdx b/src/content/notebooks/impresso-py-connect.mdx index 8eb692a..a7d1b89 100644 --- a/src/content/notebooks/impresso-py-connect.mdx +++ b/src/content/notebooks/impresso-py-connect.mdx @@ -12,6 +12,7 @@ seealso: - impresso-py-search - impresso-py-collections sha: bdbbe27d3eee96af6eebbb69d70cd632f4175925 +langModel: En googleColabUrl: https://colab.research.google.com/github/impresso/impresso-datalab-notebooks/blob/main/starter/basics_ImpressoAPI.ipynb links: - href: https://impresso-project.ch/datalab/ diff --git a/src/content/notebooks/impresso-py-maps.mdx b/src/content/notebooks/impresso-py-maps.mdx index 04536f9..4e13493 100644 --- a/src/content/notebooks/impresso-py-maps.mdx +++ b/src/content/notebooks/impresso-py-maps.mdx @@ -5,6 +5,7 @@ authors: - impresso-team sha: caf4e6e0a677569953b866f9c6ffe0c6d3d12dc5 date: 2024-10-25T13:17:54Z +langModel: En googleColabUrl: https://colab.research.google.com/github/impresso/impresso-datalab-notebooks/blob/main/explore-vis/place-entities_map.ipynb links: - href: https://impresso-project.ch/ diff --git a/src/content/notebooks/impresso-py-network.mdx b/src/content/notebooks/impresso-py-network.mdx index e2fe505..b947d69 100644 --- a/src/content/notebooks/impresso-py-network.mdx +++ b/src/content/notebooks/impresso-py-network.mdx @@ -5,6 +5,7 @@ authors: - impresso-team sha: dd13ddcc0ba2f4a2b24face9790c46595dc2ebca date: 2024-10-27T13:19:55Z +langModel: En googleColabUrl: https://colab.research.google.com/github/impresso/impresso-datalab-notebooks/blob/main/explore-vis/entity_network.ipynb links: - href: https://en.wikipedia.org/wiki/Prague_Spring @@ -15,22 +16,32 @@ seealso: {/* cell:0 cell_type:markdown */} - - Open In Colab + + Open In Colab {/* cell:1 cell_type:markdown */} + ## Install dependencies {/* cell:2 cell_type:code */} + ```python %pip install -q impresso ipysigma networkx tqdm ``` {/* cell:3 cell_type:markdown */} + ## Connect to Impresso {/* cell:4 cell_type:code */} + ```python from impresso import connect, OR, AND @@ -38,16 +49,19 @@ impresso_session = connect() ``` {/* cell:5 cell_type:markdown */} + ## Part 1: Get entities and their co-occurrences ### First, we retrieve all person entities mentioned in all articles that talk about the [Prague Spring](https://en.wikipedia.org/wiki/Prague_Spring). {/* cell:6 cell_type:code */} + ```python query = OR("Prague Spring", "Prager Frühling", "Printemps de Prague") ``` {/* cell:7 cell_type:code */} + ```python persons = impresso_session.search.facet( facet="person", @@ -59,13 +73,15 @@ persons ``` {/* cell:8 cell_type:markdown */} + ### Next, we generate all unique pairs of entities with a mention count higher than `n`. - + First, entities that meet the mention threshold are selected, and then all possible pairs are generated using the `itertools.combinations` function. The `n` value can be adjusted so that we don't get too many entity combinations. A sweet spot is just under 500 combinations. {/* cell:9 cell_type:code */} + ```python import itertools @@ -81,6 +97,7 @@ print(f"Total combinations: {len(person_ids_combinations)}") ``` {/* cell:10 cell_type:code */} + ```python if len(person_ids_combinations) > 500: msg = ( @@ -99,6 +116,7 @@ if len(person_ids_combinations) > 500: This piece of code gets a facet for every combination of named entities. It is a single call per combination so it may take a while for a large number of combinations. {/* cell:12 cell_type:code */} + ```python from impresso.util.error import ImpressoError from time import sleep @@ -134,6 +152,7 @@ for idx, combo in tqdm(enumerate(person_ids_combinations), total=len(person_ids_ We put all in a dataframe {/* cell:14 cell_type:code */} + ```python import pandas as pd @@ -153,6 +172,7 @@ connections_df And save the connections to a CSV file that can be visualised independently in Part 2. Provide a name for the file. {/* cell:16 cell_type:code */} + ```python from tempfile import gettempdir @@ -165,9 +185,11 @@ print(f"File saved in {connections_csv_filepath}") ``` {/* cell:17 cell_type:markdown */} + ## Part 2: visualise {/* cell:18 cell_type:code */} + ```python import pandas as pd @@ -179,6 +201,7 @@ connections_df Group connections counting number of mentions and preserve the URL. {/* cell:20 cell_type:code */} + ```python grouped_connections_df = connections_df.groupby(['node_a', 'node_b']) \ .agg({'timestamp': lambda x: ', '.join(list(x)), 'count': 'sum', 'url': lambda x: list(set(x))[0]}) \ @@ -187,6 +210,7 @@ grouped_connections_df ``` {/* cell:21 cell_type:code */} + ```python import networkx as nx @@ -206,6 +230,7 @@ G.nodes Save the file so that it could be downloaded and used elsewhere. {/* cell:23 cell_type:code */} + ```python from tempfile import gettempdir @@ -223,6 +248,7 @@ print(f"File saved in {gefx_filepath}") If running in Colab - activate custom widgets to allow `ipysigma` to render the graph. {/* cell:25 cell_type:code */} + ```python try: from google.colab import output @@ -235,6 +261,7 @@ except: Render the graph. {/* cell:27 cell_type:code */} + ```python import ipywidgets @@ -246,7 +273,7 @@ node_size_widget = ipywidgets.Dropdown( ) ipywidgets.Box( [ - ipywidgets.Label(value='What should represent the size of the nodes:'), + ipywidgets.Label(value='What should represent the size of the nodes:'), node_size_widget ] ) @@ -257,6 +284,7 @@ ipywidgets.Box( Refresh the next cell after changing the value above. {/* cell:29 cell_type:code */} + ```python import networkx as nx from ipysigma import Sigma diff --git a/src/content/notebooks/ne-processing-with-impresso-api.mdx b/src/content/notebooks/ne-processing-with-impresso-api.mdx index 226bc73..24b4bef 100644 --- a/src/content/notebooks/ne-processing-with-impresso-api.mdx +++ b/src/content/notebooks/ne-processing-with-impresso-api.mdx @@ -5,6 +5,7 @@ authors: - impresso-team sha: cc20b1b70db4da2aea4042c0e8d82a52e6ffb762 date: 2024-10-27T13:47:15Z +langModel: En googleColabUrl: https://colab.research.google.com/github/impresso/impresso-datalab-notebooks/blob/main/annotate/NE-processing_ImpressoAPI.ipynb links: - href: https://github.com/impresso/impresso-datalab-notebooks/blob/main/annotate/NE-processing_ImpressoHF.ipynb @@ -19,23 +20,32 @@ seealso: {/* cell:0 cell_type:markdown */} - - Open In Colab + + Open In Colab {/* cell:1 cell_type:markdown */} + ## What is this notebook about? -This notebook is similar to the [NE-processing_ImpressoHF](https://github.com/impresso/impresso-datalab-notebooks/blob/main/annotate/NE-processing_ImpressoHF.ipynb) one, except that instead of loading the model from Hugging Face and executing them locally (or on Colab), here we use the annotation functionalities provided by the Impresso API, using the Impresso Python Library. Behind the scene the same models are used. +This notebook is similar to the [NE-processing_ImpressoHF](https://github.com/impresso/impresso-datalab-notebooks/blob/main/annotate/NE-processing_ImpressoHF.ipynb) one, except that instead of loading the model from Hugging Face and executing them locally (or on Colab), here we use the annotation functionalities provided by the Impresso API, using the Impresso Python Library. Behind the scene the same models are used. For more information on the models, please refer to the [NE-processing_ImpressoHF](https://github.com/impresso/impresso-datalab-notebooks/blob/main/annotate/NE-processing_ImpressoHF.ipynb) notebook (we advised starting with it). For an introduction to the Impresso Python Library, please refer to the [basics_ImpressoAPI](https://github.com/impresso/impresso-datalab-notebooks/blob/main/starter/basics_ImpressoAPI.ipynb). ## What will you learn in this notebook? + By the end of this notebook, you will know how to call the NER and EL Impresso annotation services through the Impresso API, using the Impresso Python Library {/* cell:2 cell_type:code */} + ```python !pip install --upgrade --force-reinstall impresso from impresso import version @@ -43,22 +53,25 @@ print(version) ``` {/* cell:3 cell_type:code */} + ```python from impresso import connect impresso_session = connect() ``` {/* cell:4 cell_type:markdown */} + ## Named entity recognition {/* cell:5 cell_type:code */} + ```python # We define some test input -text = """In the year 1789, King Louis XVI, ruler of France, convened the Estates-General at the Palace of Versailles, - where Marie Antoinette, the Queen of France, alongside Maximilien Robespierre, a leading member of the National Assembly, - debated with Jean-Jacques Rousseau, the famous philosopher, and Charles de Talleyrand, the Bishop of Autun, - regarding the future of the French monarchy. At the same time, across the Atlantic in Philadelphia, - George Washington, the first President of the United States, and Thomas Jefferson, the nation's Secretary of State, +text = """In the year 1789, King Louis XVI, ruler of France, convened the Estates-General at the Palace of Versailles, + where Marie Antoinette, the Queen of France, alongside Maximilien Robespierre, a leading member of the National Assembly, + debated with Jean-Jacques Rousseau, the famous philosopher, and Charles de Talleyrand, the Bishop of Autun, + regarding the future of the French monarchy. At the same time, across the Atlantic in Philadelphia, + George Washington, the first President of the United States, and Thomas Jefferson, the nation's Secretary of State, were drafting policies for the newly established American government following the signing of the Constitution.""" print(text) @@ -71,16 +84,18 @@ result.df.tail(10) ``` {/* cell:6 cell_type:markdown */} + ## Named entity linking {/* cell:7 cell_type:code */} + ```python # We define some test input -text_with_markers = """In the year 1789, King Louis XVI, ruler of France, convened the Estates-General at the Palace of Versailles, - where [START] Marie Antoinette, the Queen of France [END], alongside Maximilien Robespierre, a leading member of the National Assembly, - debated with Jean-Jacques Rousseau, the famous philosopher, and Charles de Talleyrand, the Bishop of Autun, - regarding the future of the French monarchy. At the same time, across the Atlantic in Philadelphia, - George Washington, the first President of the United States, and Thomas Jefferson, the nation's Secretary of State, +text_with_markers = """In the year 1789, King Louis XVI, ruler of France, convened the Estates-General at the Palace of Versailles, + where [START] Marie Antoinette, the Queen of France [END], alongside Maximilien Robespierre, a leading member of the National Assembly, + debated with Jean-Jacques Rousseau, the famous philosopher, and Charles de Talleyrand, the Bishop of Autun, + regarding the future of the French monarchy. At the same time, across the Atlantic in Philadelphia, + George Washington, the first President of the United States, and Thomas Jefferson, the nation's Secretary of State, were drafting policies for the newly established American government following the signing of the Constitution.""" print(text_with_markers) @@ -92,9 +107,11 @@ result ``` {/* cell:8 cell_type:markdown */} + ## Named entity processing {/* cell:9 cell_type:code */} + ```python result = impresso_session.tools.ner_nel( text=text @@ -103,6 +120,7 @@ result.df ``` {/* cell:10 cell_type:code */} + ```python ``` diff --git a/src/content/notebooks/newsagency-processing-with-impresso-hf.mdx b/src/content/notebooks/newsagency-processing-with-impresso-hf.mdx index 310525a..3c4f514 100644 --- a/src/content/notebooks/newsagency-processing-with-impresso-hf.mdx +++ b/src/content/notebooks/newsagency-processing-with-impresso-hf.mdx @@ -7,6 +7,7 @@ excerpt: Impresso BERT-based pipeline, trained on Swiss and Luxembourgish detect these entities in your own text. sha: cc20b1b70db4da2aea4042c0e8d82a52e6ffb762 date: 2024-10-27T13:47:15Z +langModel: En googleColabUrl: https://colab.research.google.com/github/impresso/impresso-datalab-notebooks/blob/main/annotate/newsagency-processing_ImpressoHF.ipynb authors: - impresso-team @@ -29,7 +30,6 @@ seealso: {/* cell:0 cell_type:markdown */} - {/* cell:1 cell_type:markdown */} Delivering swift and reliable news since the 1830s and 1840s, news agencies have played a pivotal role both nationally and internationally. However, understanding their precise impact on shaping news content has remained somewhat elusive. Our goal is to illuminate this aspect by identifying news agencies within historical newspaper articles. Using data from newspapers in Switzerland and Luxembourg as part of the Impresso project, we've trained our pipeline to recognize these entities. @@ -42,6 +42,7 @@ Install necessary libraries (if not already installed) and download the necessary NLTK data. {/* cell:3 cell_type:code */} + ```python !pip install transformers !pip install stopwordsiso @@ -52,12 +53,13 @@ download the necessary NLTK data. Now the fun part, this function will download the requried model and gives you the keys to successfullly detect news agencies in your text. {/* cell:5 cell_type:code */} + ```python from transformers import pipeline -newsagency_ner_pipeline = pipeline("newsagency-ner", - model="impresso-project/ner-newsagency-bert-multilingual", - trust_remote_code=True, +newsagency_ner_pipeline = pipeline("newsagency-ner", + model="impresso-project/ner-newsagency-bert-multilingual", + trust_remote_code=True, device='cpu') ``` @@ -65,12 +67,13 @@ newsagency_ner_pipeline = pipeline("newsagency-ner", Run the example below to see how it works. {/* cell:7 cell_type:code */} + ```python -sentence = """In the year 1789, King Louis XVI, ruler of France, convened the Estates-General at the Palace of Versailles, - where Marie Antoinette, the Queen of France, alongside Maximilien Robespierre, a leading member of the National Assembly, - debated with Jean-Jacques Rousseau, the famous philosopher, and Charles de Talleyrand, the Bishop of Autun, - regarding the future of the French monarchy. At the same time, across the Atlantic in Philadelphia, - George Washington, the first President of the United States, and Thomas Jefferson, the nation's Secretary of State, +sentence = """In the year 1789, King Louis XVI, ruler of France, convened the Estates-General at the Palace of Versailles, + where Marie Antoinette, the Queen of France, alongside Maximilien Robespierre, a leading member of the National Assembly, + debated with Jean-Jacques Rousseau, the famous philosopher, and Charles de Talleyrand, the Bishop of Autun, + regarding the future of the French monarchy. At the same time, across the Atlantic in Philadelphia, + George Washington, the first President of the United States, and Thomas Jefferson, the nation's Secretary of State, were drafting policies for the newly established American government following the signing of the Constitution. (Reuter)""" # Function to print each entry nicely @@ -113,21 +116,27 @@ v3 or later. ---

- Impresso Project Logo + Impresso Project Logo

- {/* cell:9 cell_type:code */} + ```python ``` {/* cell:10 cell_type:code */} + ```python ``` {/* cell:11 cell_type:code */} + ```python ``` diff --git a/src/styles/global.css b/src/styles/global.css index 319275f..a13dba2 100644 --- a/src/styles/global.css +++ b/src/styles/global.css @@ -462,3 +462,29 @@ a.dropdown-item::after { a.dropdown-item:hover::after { transform: scaleX(1); } + +.lang-tag-name { + font-size: var(--fs14px); + color: rgba(var(--impresso-color-black-rgb), 0.68); + font-weight: var(--impresso-wght-bold); + background-color: var(--impresso-color-yellow); + padding: 0.1rem 0.3rem; + border-radius: var(--impresso-border-radius-xs); +} + +.NotebookCard .lang-tag-name { + position: absolute; + scale: 0.8; + left: -0.5rem; + top: -0.5rem; + padding: 0 0.2rem; +} + +.LangModelTag { + display: flex; + align-items: center; +} + +.LangModelTag p { + font-size: var(--fs14px); +}