diff --git a/2024/lab3/Lab3 - KG from unstructured data.ipynb b/2024/lab3/Lab3 - KG from unstructured data.ipynb index af10803..279016c 100644 --- a/2024/lab3/Lab3 - KG from unstructured data.ipynb +++ b/2024/lab3/Lab3 - KG from unstructured data.ipynb @@ -37,140 +37,140 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: requests in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (2.31.0)\n", - "Requirement already satisfied: tqdm in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (4.66.2)\n", - "Requirement already satisfied: spacy in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (3.7.4)\n", - "Requirement already satisfied: textblob in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (0.18.0.post0)\n", - "Requirement already satisfied: oxrdflib in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (0.3.6)\n", - "Requirement already satisfied: fastembed in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (0.2.2)\n", - "Requirement already satisfied: qdrant-client in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (1.7.3)\n", - "Requirement already satisfied: langchain in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (0.1.7)\n", - "Requirement already satisfied: langchain-community in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (0.0.20)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from requests) (2.2.1)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from requests) (3.3.2)\n", - "Requirement already satisfied: idna<4,>=2.5 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from requests) (3.6)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from requests) (2024.2.2)\n", - "Requirement already satisfied: jinja2 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from spacy) (3.1.3)\n", - "Requirement already satisfied: setuptools in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from spacy) (59.6.0)\n", - "Requirement already satisfied: weasel<0.4.0,>=0.1.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from spacy) (0.3.4)\n", - "Requirement already satisfied: pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from spacy) (2.6.1)\n", - "Requirement already satisfied: smart-open<7.0.0,>=5.2.1 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from spacy) (6.4.0)\n", - "Requirement already satisfied: numpy>=1.19.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from spacy) (1.26.4)\n", - "Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from spacy) (2.0.10)\n", - "Requirement already satisfied: packaging>=20.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from spacy) (23.2)\n", - "Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from spacy) (3.3.0)\n", - "Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from spacy) (3.0.9)\n", - "Requirement already satisfied: srsly<3.0.0,>=2.4.3 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from spacy) (2.4.8)\n", - "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from spacy) (2.0.8)\n", - "Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from spacy) (1.0.5)\n", - "Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.11 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from spacy) (3.0.12)\n", - "Requirement already satisfied: thinc<8.3.0,>=8.2.2 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from spacy) (8.2.3)\n", - "Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from spacy) (1.0.10)\n", - "Requirement already satisfied: typer<0.10.0,>=0.3.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from spacy) (0.9.0)\n", - "Requirement already satisfied: wasabi<1.2.0,>=0.9.1 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from spacy) (1.1.2)\n", - "Requirement already satisfied: nltk>=3.8 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from textblob) (3.8.1)\n", - "Requirement already satisfied: rdflib<8.0,>=6.3 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from oxrdflib) (7.0.0)\n", - "Requirement already satisfied: pyoxigraph~=0.3.14 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from oxrdflib) (0.3.22)\n", - "Requirement already satisfied: huggingface-hub<0.21,>=0.20 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from fastembed) (0.20.3)\n", - "Requirement already satisfied: onnxruntime<2.0.0,>=1.17.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from fastembed) (1.17.0)\n", - "Requirement already satisfied: loguru<0.8.0,>=0.7.2 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from fastembed) (0.7.2)\n", - "Requirement already satisfied: onnx<2.0.0,>=1.15.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from fastembed) (1.15.0)\n", - "Requirement already satisfied: tokenizers<0.16.0,>=0.15.1 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from fastembed) (0.15.2)\n", - "Requirement already satisfied: httpx[http2]>=0.14.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from qdrant-client) (0.26.0)\n", - "Requirement already satisfied: grpcio>=1.41.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from qdrant-client) (1.60.1)\n", - "Requirement already satisfied: portalocker<3.0.0,>=2.7.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from qdrant-client) (2.8.2)\n", - "Requirement already satisfied: grpcio-tools>=1.41.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from qdrant-client) (1.60.1)\n", - "Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from langchain) (4.0.3)\n", - "Requirement already satisfied: dataclasses-json<0.7,>=0.5.7 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from langchain) (0.6.4)\n", - "Requirement already satisfied: langsmith<0.1,>=0.0.83 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from langchain) (0.0.87)\n", - "Requirement already satisfied: jsonpatch<2.0,>=1.33 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from langchain) (1.33)\n", - "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from langchain) (3.9.3)\n", - "Requirement already satisfied: PyYAML>=5.3 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from langchain) (6.0.1)\n", - "Requirement already satisfied: langchain-core<0.2,>=0.1.22 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from langchain) (0.1.23)\n", - "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from langchain) (2.0.27)\n", - "Requirement already satisfied: tenacity<9.0.0,>=8.1.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from langchain) (8.2.3)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.4.1)\n", - "Requirement already satisfied: yarl<2.0,>=1.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.9.4)\n", - "Requirement already satisfied: multidict<7.0,>=4.5 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (6.0.5)\n", - "Requirement already satisfied: aiosignal>=1.1.2 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.3.1)\n", - "Requirement already satisfied: attrs>=17.3.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (23.2.0)\n", - "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from dataclasses-json<0.7,>=0.5.7->langchain) (3.20.2)\n", - "Requirement already satisfied: typing-inspect<1,>=0.4.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from dataclasses-json<0.7,>=0.5.7->langchain) (0.9.0)\n", - "Requirement already satisfied: protobuf<5.0dev,>=4.21.6 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from grpcio-tools>=1.41.0->qdrant-client) (4.25.3)\n", - "Requirement already satisfied: httpcore==1.* in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from httpx[http2]>=0.14.0->qdrant-client) (1.0.3)\n", - "Requirement already satisfied: sniffio in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from httpx[http2]>=0.14.0->qdrant-client) (1.3.0)\n", - "Requirement already satisfied: anyio in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from httpx[http2]>=0.14.0->qdrant-client) (4.3.0)\n", - "Requirement already satisfied: h2<5,>=3 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from httpx[http2]>=0.14.0->qdrant-client) (4.1.0)\n", - "Requirement already satisfied: h11<0.15,>=0.13 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from httpcore==1.*->httpx[http2]>=0.14.0->qdrant-client) (0.14.0)\n", - "Requirement already satisfied: fsspec>=2023.5.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from huggingface-hub<0.21,>=0.20->fastembed) (2024.2.0)\n", - "Requirement already satisfied: filelock in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from huggingface-hub<0.21,>=0.20->fastembed) (3.13.1)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from huggingface-hub<0.21,>=0.20->fastembed) (4.9.0)\n", - "Requirement already satisfied: jsonpointer>=1.9 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from jsonpatch<2.0,>=1.33->langchain) (2.4)\n", - "Requirement already satisfied: joblib in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from nltk>=3.8->textblob) (1.3.2)\n", - "Requirement already satisfied: click in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from nltk>=3.8->textblob) (8.1.7)\n", - "Requirement already satisfied: regex>=2021.8.3 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from nltk>=3.8->textblob) (2023.12.25)\n", - "Requirement already satisfied: sympy in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from onnxruntime<2.0.0,>=1.17.0->fastembed) (1.12)\n", - "Requirement already satisfied: flatbuffers in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from onnxruntime<2.0.0,>=1.17.0->fastembed) (23.5.26)\n", - "Requirement already satisfied: coloredlogs in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from onnxruntime<2.0.0,>=1.17.0->fastembed) (15.0.1)\n", - "Requirement already satisfied: pydantic-core==2.16.2 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy) (2.16.2)\n", - "Requirement already satisfied: annotated-types>=0.4.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy) (0.6.0)\n", - "Requirement already satisfied: isodate<0.7.0,>=0.6.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from rdflib<8.0,>=6.3->oxrdflib) (0.6.1)\n", - "Requirement already satisfied: pyparsing<4,>=2.1.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from rdflib<8.0,>=6.3->oxrdflib) (3.1.1)\n", - "Requirement already satisfied: greenlet!=0.4.17 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from SQLAlchemy<3,>=1.4->langchain) (3.0.3)\n", - "Requirement already satisfied: blis<0.8.0,>=0.7.8 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from thinc<8.3.0,>=8.2.2->spacy) (0.7.11)\n", - "Requirement already satisfied: confection<1.0.0,>=0.0.1 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from thinc<8.3.0,>=8.2.2->spacy) (0.1.4)\n", - "Requirement already satisfied: cloudpathlib<0.17.0,>=0.7.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from weasel<0.4.0,>=0.1.0->spacy) (0.16.0)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from jinja2->spacy) (2.1.5)\n", - "Requirement already satisfied: exceptiongroup>=1.0.2 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from anyio->httpx[http2]>=0.14.0->qdrant-client) (1.2.0)\n", - "Requirement already satisfied: hyperframe<7,>=6.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from h2<5,>=3->httpx[http2]>=0.14.0->qdrant-client) (6.0.1)\n", - "Requirement already satisfied: hpack<5,>=4.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from h2<5,>=3->httpx[http2]>=0.14.0->qdrant-client) (4.0.0)\n", - "Requirement already satisfied: six in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from isodate<0.7.0,>=0.6.0->rdflib<8.0,>=6.3->oxrdflib) (1.16.0)\n", - "Requirement already satisfied: mypy-extensions>=0.3.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain) (1.0.0)\n", - "Requirement already satisfied: humanfriendly>=9.1 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from coloredlogs->onnxruntime<2.0.0,>=1.17.0->fastembed) (10.0)\n", - "Requirement already satisfied: mpmath>=0.19 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from sympy->onnxruntime<2.0.0,>=1.17.0->fastembed) (1.3.0)\n", + "Requirement already satisfied: requests in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (2.31.0)\n", + "Requirement already satisfied: tqdm in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (4.66.2)\n", + "Requirement already satisfied: spacy in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (3.7.4)\n", + "Requirement already satisfied: textblob in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (0.18.0.post0)\n", + "Requirement already satisfied: oxrdflib in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (0.3.6)\n", + "Requirement already satisfied: fastembed in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (0.2.1)\n", + "Requirement already satisfied: qdrant-client in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (1.7.3)\n", + "Requirement already satisfied: langchain in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (0.1.7)\n", + "Requirement already satisfied: langchain-community in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (0.0.20)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from requests) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from requests) (3.6)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from requests) (2.2.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from requests) (2024.2.2)\n", + "Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.11 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from spacy) (3.0.12)\n", + "Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from spacy) (1.0.5)\n", + "Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from spacy) (1.0.10)\n", + "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from spacy) (2.0.8)\n", + "Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from spacy) (3.0.9)\n", + "Requirement already satisfied: thinc<8.3.0,>=8.2.2 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from spacy) (8.2.3)\n", + "Requirement already satisfied: wasabi<1.2.0,>=0.9.1 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from spacy) (1.1.2)\n", + "Requirement already satisfied: srsly<3.0.0,>=2.4.3 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from spacy) (2.4.8)\n", + "Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from spacy) (2.0.10)\n", + "Requirement already satisfied: weasel<0.4.0,>=0.1.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from spacy) (0.3.4)\n", + "Requirement already satisfied: typer<0.10.0,>=0.3.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from spacy) (0.9.0)\n", + "Requirement already satisfied: smart-open<7.0.0,>=5.2.1 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from spacy) (6.4.0)\n", + "Requirement already satisfied: pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from spacy) (2.6.1)\n", + "Requirement already satisfied: jinja2 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from spacy) (3.1.3)\n", + "Requirement already satisfied: setuptools in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from spacy) (59.6.0)\n", + "Requirement already satisfied: packaging>=20.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from spacy) (23.2)\n", + "Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from spacy) (3.3.0)\n", + "Requirement already satisfied: numpy>=1.19.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from spacy) (1.26.4)\n", + "Requirement already satisfied: nltk>=3.8 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from textblob) (3.8.1)\n", + "Requirement already satisfied: pyoxigraph~=0.3.14 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from oxrdflib) (0.3.22)\n", + "Requirement already satisfied: rdflib<8.0,>=6.3 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from oxrdflib) (7.0.0)\n", + "Requirement already satisfied: huggingface-hub<0.21,>=0.20 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from fastembed) (0.20.3)\n", + "Requirement already satisfied: loguru<0.8.0,>=0.7.2 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from fastembed) (0.7.2)\n", + "Requirement already satisfied: onnx<2.0.0,>=1.15.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from fastembed) (1.15.0)\n", + "Requirement already satisfied: onnxruntime<2.0.0,>=1.17.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from fastembed) (1.17.0)\n", + "Requirement already satisfied: tokenizers<0.16.0,>=0.15.1 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from fastembed) (0.15.2)\n", + "Requirement already satisfied: grpcio>=1.41.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from qdrant-client) (1.60.1)\n", + "Requirement already satisfied: grpcio-tools>=1.41.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from qdrant-client) (1.60.1)\n", + "Requirement already satisfied: httpx>=0.14.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from httpx[http2]>=0.14.0->qdrant-client) (0.26.0)\n", + "Requirement already satisfied: portalocker<3.0.0,>=2.7.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from qdrant-client) (2.8.2)\n", + "Requirement already satisfied: PyYAML>=5.3 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from langchain) (6.0.1)\n", + "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from langchain) (2.0.27)\n", + "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from langchain) (3.9.3)\n", + "Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from langchain) (4.0.3)\n", + "Requirement already satisfied: dataclasses-json<0.7,>=0.5.7 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from langchain) (0.6.4)\n", + "Requirement already satisfied: jsonpatch<2.0,>=1.33 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from langchain) (1.33)\n", + "Requirement already satisfied: langchain-core<0.2,>=0.1.22 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from langchain) (0.1.23)\n", + "Requirement already satisfied: langsmith<0.1,>=0.0.83 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from langchain) (0.0.87)\n", + "Requirement already satisfied: tenacity<9.0.0,>=8.1.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from langchain) (8.2.3)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.3.1)\n", + "Requirement already satisfied: attrs>=17.3.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (23.2.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.4.1)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (6.0.5)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.9.4)\n", + "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from dataclasses-json<0.7,>=0.5.7->langchain) (3.20.2)\n", + "Requirement already satisfied: typing-inspect<1,>=0.4.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from dataclasses-json<0.7,>=0.5.7->langchain) (0.9.0)\n", + "Requirement already satisfied: protobuf<5.0dev,>=4.21.6 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from grpcio-tools>=1.41.0->qdrant-client) (4.25.3)\n", + "Requirement already satisfied: anyio in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from httpx>=0.14.0->httpx[http2]>=0.14.0->qdrant-client) (4.2.0)\n", + "Requirement already satisfied: httpcore==1.* in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from httpx>=0.14.0->httpx[http2]>=0.14.0->qdrant-client) (1.0.3)\n", + "Requirement already satisfied: sniffio in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from httpx>=0.14.0->httpx[http2]>=0.14.0->qdrant-client) (1.3.0)\n", + "Requirement already satisfied: h11<0.15,>=0.13 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from httpcore==1.*->httpx>=0.14.0->httpx[http2]>=0.14.0->qdrant-client) (0.14.0)\n", + "Requirement already satisfied: h2<5,>=3 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from httpx[http2]>=0.14.0->qdrant-client) (4.1.0)\n", + "Requirement already satisfied: filelock in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from huggingface-hub<0.21,>=0.20->fastembed) (3.13.1)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from huggingface-hub<0.21,>=0.20->fastembed) (2024.2.0)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from huggingface-hub<0.21,>=0.20->fastembed) (4.9.0)\n", + "Requirement already satisfied: jsonpointer>=1.9 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from jsonpatch<2.0,>=1.33->langchain) (2.4)\n", + "Requirement already satisfied: click in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from nltk>=3.8->textblob) (8.1.7)\n", + "Requirement already satisfied: joblib in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from nltk>=3.8->textblob) (1.3.2)\n", + "Requirement already satisfied: regex>=2021.8.3 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from nltk>=3.8->textblob) (2023.12.25)\n", + "Requirement already satisfied: coloredlogs in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from onnxruntime<2.0.0,>=1.17.0->fastembed) (15.0.1)\n", + "Requirement already satisfied: flatbuffers in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from onnxruntime<2.0.0,>=1.17.0->fastembed) (23.5.26)\n", + "Requirement already satisfied: sympy in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from onnxruntime<2.0.0,>=1.17.0->fastembed) (1.12)\n", + "Requirement already satisfied: annotated-types>=0.4.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy) (0.6.0)\n", + "Requirement already satisfied: pydantic-core==2.16.2 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy) (2.16.2)\n", + "Requirement already satisfied: isodate<0.7.0,>=0.6.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from rdflib<8.0,>=6.3->oxrdflib) (0.6.1)\n", + "Requirement already satisfied: pyparsing<4,>=2.1.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from rdflib<8.0,>=6.3->oxrdflib) (3.1.1)\n", + "Requirement already satisfied: greenlet!=0.4.17 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from SQLAlchemy<3,>=1.4->langchain) (3.0.3)\n", + "Requirement already satisfied: blis<0.8.0,>=0.7.8 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from thinc<8.3.0,>=8.2.2->spacy) (0.7.11)\n", + "Requirement already satisfied: confection<1.0.0,>=0.0.1 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from thinc<8.3.0,>=8.2.2->spacy) (0.1.4)\n", + "Requirement already satisfied: cloudpathlib<0.17.0,>=0.7.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from weasel<0.4.0,>=0.1.0->spacy) (0.16.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from jinja2->spacy) (2.1.5)\n", + "Requirement already satisfied: exceptiongroup>=1.0.2 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from anyio->httpx>=0.14.0->httpx[http2]>=0.14.0->qdrant-client) (1.2.0)\n", + "Requirement already satisfied: hyperframe<7,>=6.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from h2<5,>=3->httpx[http2]>=0.14.0->qdrant-client) (6.0.1)\n", + "Requirement already satisfied: hpack<5,>=4.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from h2<5,>=3->httpx[http2]>=0.14.0->qdrant-client) (4.0.0)\n", + "Requirement already satisfied: six in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from isodate<0.7.0,>=0.6.0->rdflib<8.0,>=6.3->oxrdflib) (1.16.0)\n", + "Requirement already satisfied: mypy-extensions>=0.3.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain) (1.0.0)\n", + "Requirement already satisfied: humanfriendly>=9.1 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from coloredlogs->onnxruntime<2.0.0,>=1.17.0->fastembed) (10.0)\n", + "Requirement already satisfied: mpmath>=0.19 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from sympy->onnxruntime<2.0.0,>=1.17.0->fastembed) (1.3.0)\n", "Collecting en-core-web-sm==3.7.1\n", " Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)\n", - "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.8/12.8 MB\u001b[0m \u001b[31m18.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:01\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: spacy<3.8.0,>=3.7.2 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from en-core-web-sm==3.7.1) (3.7.4)\n", - "Requirement already satisfied: setuptools in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (59.6.0)\n", - "Requirement already satisfied: pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.6.1)\n", - "Requirement already satisfied: typer<0.10.0,>=0.3.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.9.0)\n", - "Requirement already satisfied: packaging>=20.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (23.2)\n", - "Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.0.5)\n", - "Requirement already satisfied: smart-open<7.0.0,>=5.2.1 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (6.4.0)\n", - "Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (4.66.2)\n", - "Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.11 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.0.12)\n", - "Requirement already satisfied: requests<3.0.0,>=2.13.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.31.0)\n", - "Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.0.10)\n", - "Requirement already satisfied: numpy>=1.19.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.26.4)\n", - "Requirement already satisfied: thinc<8.3.0,>=8.2.2 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (8.2.3)\n", - "Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.3.0)\n", - "Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.0.9)\n", - "Requirement already satisfied: wasabi<1.2.0,>=0.9.1 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.1.2)\n", - "Requirement already satisfied: weasel<0.4.0,>=0.1.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.3.4)\n", - "Requirement already satisfied: srsly<3.0.0,>=2.4.3 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.4.8)\n", - "Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.0.10)\n", - "Requirement already satisfied: jinja2 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.1.3)\n", - "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.0.8)\n", - "Requirement already satisfied: typing-extensions>=4.6.1 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (4.9.0)\n", - "Requirement already satisfied: pydantic-core==2.16.2 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.16.2)\n", - "Requirement already satisfied: annotated-types>=0.4.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.6.0)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.2.1)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.3.2)\n", - "Requirement already satisfied: idna<4,>=2.5 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.6)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2024.2.2)\n", - "Requirement already satisfied: confection<1.0.0,>=0.0.1 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from thinc<8.3.0,>=8.2.2->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.1.4)\n", - "Requirement already satisfied: blis<0.8.0,>=0.7.8 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from thinc<8.3.0,>=8.2.2->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.7.11)\n", - "Requirement already satisfied: click<9.0.0,>=7.1.1 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from typer<0.10.0,>=0.3.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (8.1.7)\n", - "Requirement already satisfied: cloudpathlib<0.17.0,>=0.7.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from weasel<0.4.0,>=0.1.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.16.0)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages (from jinja2->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.1.5)\n", + "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.8/12.8 MB\u001b[0m \u001b[31m33.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:01\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: spacy<3.8.0,>=3.7.2 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from en-core-web-sm==3.7.1) (3.7.4)\n", + "Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.11 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.0.12)\n", + "Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.0.5)\n", + "Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.0.10)\n", + "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.0.8)\n", + "Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.0.9)\n", + "Requirement already satisfied: thinc<8.3.0,>=8.2.2 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (8.2.3)\n", + "Requirement already satisfied: wasabi<1.2.0,>=0.9.1 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.1.2)\n", + "Requirement already satisfied: srsly<3.0.0,>=2.4.3 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.4.8)\n", + "Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.0.10)\n", + "Requirement already satisfied: weasel<0.4.0,>=0.1.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.3.4)\n", + "Requirement already satisfied: typer<0.10.0,>=0.3.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.9.0)\n", + "Requirement already satisfied: smart-open<7.0.0,>=5.2.1 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (6.4.0)\n", + "Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (4.66.2)\n", + "Requirement already satisfied: requests<3.0.0,>=2.13.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.31.0)\n", + "Requirement already satisfied: pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.6.1)\n", + "Requirement already satisfied: jinja2 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.1.3)\n", + "Requirement already satisfied: setuptools in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (59.6.0)\n", + "Requirement already satisfied: packaging>=20.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (23.2)\n", + "Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.3.0)\n", + "Requirement already satisfied: numpy>=1.19.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.26.4)\n", + "Requirement already satisfied: annotated-types>=0.4.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.6.0)\n", + "Requirement already satisfied: pydantic-core==2.16.2 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.16.2)\n", + "Requirement already satisfied: typing-extensions>=4.6.1 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (4.9.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.6)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.2.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2024.2.2)\n", + "Requirement already satisfied: blis<0.8.0,>=0.7.8 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from thinc<8.3.0,>=8.2.2->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.7.11)\n", + "Requirement already satisfied: confection<1.0.0,>=0.0.1 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from thinc<8.3.0,>=8.2.2->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.1.4)\n", + "Requirement already satisfied: click<9.0.0,>=7.1.1 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from typer<0.10.0,>=0.3.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (8.1.7)\n", + "Requirement already satisfied: cloudpathlib<0.17.0,>=0.7.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from weasel<0.4.0,>=0.1.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.16.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages (from jinja2->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.1.5)\n", "\u001b[38;5;2m✔ Download and installation successful\u001b[0m\n", "You can now load the package via spacy.load('en_core_web_sm')\n" ] @@ -186,14 +186,25 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## 1. ⛏️ Extract entities" + "## ☄️ Named Entity Recognition with Spacy\n", + "\n", + "We will use `spacy` NER to extract and categorize entities from an english text about climate change.\n", + "\n" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 3, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + }, { "name": "stdout", "output_type": "stream", @@ -283,7 +294,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -635,7 +646,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -674,11 +685,26 @@ "print(training_data[0])" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Train the spacy `nlp` model:" + ] + }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Training NER model: 100%|██████████| 3/3 [00:47<00:00, 15.83s/it]\n" + ] + } + ], "source": [ "import random\n", "from tqdm import tqdm\n", @@ -711,43 +737,65 @@ " example = Example.from_dict(doc, annotations)\n", " nlp.update([example], drop=0.5, sgd=optimizer, losses=losses)\n", "# Save model\n", - "nlp.to_disk(\"./ner_model_disease\")" + "# nlp.to_disk(\"../data/ner_model_disease\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can now rerun the `nlp` pipeline on the text to extract potential diseases" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
As world leaders gather on Monday for the Climate Action Summit at the UN General Assembly, it is sadly clear that the prospect of rising global temperatures and sea levels has failed to generate a sufficient sense of urgency around climate change. What \n", + "
As world leaders gather on Monday for the Climate Action Summit at the UN General Assembly, it is sadly clear that the prospect of rising global temperatures and sea levels has failed to generate a sufficient sense of \n", + "\n", + " urgency around climate change.\n", + " DISEASE\n", + "\n", + " What \n", "\n", - " might spur leaders\n", + " might spur\n", " DISEASE\n", "\n", - " to action, if it were better understood, is the enormous threat that climate change already poses to human health.
Climate change exacerbates chronic and contagious disease, worsens food and water shortages, increases the risk of pandemics, and aggravates mass displacement. The broad environmental effects of climate change have long been discussed as long-term risks; what’s clear now is that the health effects are worse than anticipated—and that they’re already being felt.
CLIMATE ILLS
The dangerous health effects of climate change begin with the emissions that cause it. Black carbon, methane, and nitrogen oxides are powerful drivers of global warming, and, along with other air pollutants such as carbon monoxide and ozone, they are responsible for over seven million deaths each year, about one in eight worldwide. The problem extends beyond cities with \n", + " leaders to action, if it were better understood, is the enormous threat that climate change already poses to human health.
Climate change exacerbates chronic and contagious disease, worsens food and water shortages, increases the risk of pandemics, and aggravates mass displacement. The broad environmental effects of climate change have long been discussed as long-term risks; what’s clear now is that the health effects are worse than anticipated—and that they’re already being felt.
CLIMATE ILLS
The dangerous health effects of climate change begin with the emissions that cause it. Black carbon, methane, and nitrogen oxides are powerful drivers of global warming, and, along with other air pollutants such as carbon monoxide and ozone, they are responsible for over seven million deaths each year, about one in eight worldwide. The problem extends beyond cities with \n", "\n", " famously\n", " DISEASE\n", "\n", - " poor air quality, such as New Delhi, Beijing, and São Paulo. Ninety percent of the world’s urban dwellers breathe air containing unsafe pollution levels, according to the World Health Organization (\n", + " poor air quality, such as New Delhi, Beijing, and São \n", + "\n", + " Paulo\n", + " DISEASE\n", + "\n", + ". Ninety percent of the world’s urban dwellers breathe air containing unsafe pollution levels, according to the World Health Organization (\n", "\n", " WHO\n", " DISEASE\n", "\n", - ").
The dangers start at the beginning of life. Toxic pollutants cross the placenta, increasing the risk of preterm birth and low birth weight, which can cause lifelong damage to multiple organ systems. Children breathe more rapidly than adults do, so they absorb more pollutants at a time when their developing organs are more vulnerable. As a result, air pollution causes an estimated 600,000 deaths each year in children under five, mostly from \n", + ").
The dangers start at the beginning of life. Toxic pollutants cross the placenta, increasing the risk of preterm birth and low birth weight, which can cause lifelong damage to multiple organ \n", + "\n", + " systems\n", + " DISEASE\n", + "\n", + ". Children breathe more rapidly than adults do, so they absorb more pollutants at a time when their developing organs are more vulnerable. As a result, air pollution causes an estimated 600,000 deaths each year in children under five, mostly from \n", "\n", " pneumonia\n", " DISEASE\n", "\n", - ". There is also emerging evidence that air pollution compromises children\n", + ". There is also emerging evidence that air pollution compromises children’s cognitive development and can increase their risk of behavioral disorders.
In adults, pollution contributes to a wide range of \n", "\n", - " ’s cognitive\n", + " respiratory\n", " DISEASE\n", "\n", - " development and can increase their risk of behavioral disorders.
In adults, pollution contributes to a wide range of respiratory and circulatory diseases, and may accelerate \n", + " and circulatory diseases, and may accelerate \n", "\n", " cognitive decline\n", " DISEASE\n", @@ -757,7 +805,12 @@ " seniors\n", " DISEASE\n", "\n", - ". Most air-pollution-related deaths are due to heart attacks and \n", + ". Most air-pollution-related deaths are due to \n", + "\n", + " heart\n", + " DISEASE\n", + "\n", + " attacks and \n", "\n", " strokes\n", " DISEASE\n", @@ -787,22 +840,37 @@ " emissions\n", " DISEASE\n", "\n", - " are responsible for rising global temperatures. These in turn lead to increased humidity and cause more frequent and intense heat waves that worsen \n", + " are responsible for rising global temperatures. These in turn lead to increased \n", + "\n", + " humidity\n", + " DISEASE\n", + "\n", + " and cause more frequent and intense heat waves that worsen \n", "\n", " hypertension\n", " DISEASE\n", "\n", - " and mental health problems, and can limit the effectiveness of certain medications. When a person’s body temperature rises to 104 degrees Fahrenheit or above, systematic organ failure occurs. Heat waves this summer killed 1,435 people in France alone, the only country to have published statistics on heat-related deaths. As many of the world’s major population centers grow hotter and more humid, more people will die from simply overheating.
Climate change also compounds the threat of communicable diseases. Increased rainfall and higher temperatures favor vector-borne diseases—those caused by parasites, viruses, and bacteria transmitted by mosquitoes, ticks, flies, and fleas. Cold-blooded insects generally prefer warmer temperatures, which not only extend their breeding seasons but accelerate their \n", + " and mental health problems, and can limit the effectiveness of certain medications. When a person’s body temperature rises to 104 degrees Fahrenheit or above, systematic organ \n", + "\n", + " failure\n", + " DISEASE\n", + "\n", + " occurs. Heat waves this summer killed 1,435 people in France alone, the only country to have published statistics on heat-related deaths. As many of the world’s major population centers grow hotter and more humid, more people will die from simply overheating.
Climate change also compounds the threat of communicable diseases. Increased rainfall and higher temperatures favor \n", "\n", - " geographical expansion\n", + " vector-borne diseases—those caused\n", " DISEASE\n", "\n", - ".
As man-made climate change has \n", + " by parasites, viruses, and bacteria transmitted by mosquitoes, ticks, flies, and fleas. Cold-blooded insects generally prefer warmer \n", "\n", - " taken hold\n", + " temperatures\n", " DISEASE\n", "\n", - " over the last four decades, dozens of new infectious diseases have emerged or begun to threaten new regions, including Zika and Ebola.
The mosquito is already the deadliest animal in the world, causing more than half a million deaths each year—438,000 of them from \n", + ", which not only extend their breeding seasons but accelerate their geographical expansion.
As man-made climate change has taken hold over the last four decades, dozens of new \n", + "\n", + " infectious\n", + " DISEASE\n", + "\n", + " diseases have emerged or begun to threaten new regions, including Zika and Ebola.
The mosquito is already the deadliest animal in the world, causing more than half a million deaths each year—438,000 of them from \n", "\n", " malaria\n", " DISEASE\n", @@ -814,27 +882,27 @@ "\n", " at higher altitudes, and may cause it to spread farther into African highlands.
Another virus likely to spread as a result of climate change is dengue, which currently infects 96 million people each year and kills 90,000 of them. Dengue virus is transmitted by two species of mosquito—Aedes aegypti and \n", "\n", - " Aedes albopictus\n", + " Aedes albopictus—that are unusually tough\n", " DISEASE\n", "\n", - "—that are unusually tough and also transmit yellow fever, Zika, West Nile, and other viruses. More than half the world’s population lives in areas where an Aedes species is already present—and that proportion is likely to grow. A. aegypti in particular thrives not only in warm and moist environments but in \n", + " and also transmit yellow fever, Zika, West Nile, and other viruses. More than half the world’s population lives in areas where an Aedes species is already present—and that proportion is likely to grow. A. aegypti in particular thrives not only in warm and moist environments but in drought-prone ones, too. Europe, North America, and high-elevation areas in the tropics may soon have to contend with dengue as well as other emerging diseases.
The steady swarm of Aedes into new regions points to the single biggest threat of all: \n", "\n", - " drought-prone\n", + " pandemic disease\n", " DISEASE\n", "\n", - " ones, too. Europe, North America, and high-elevation areas in the tropics may soon have to contend with dengue as well as other emerging diseases.
The steady swarm of Aedes into new regions points to the single biggest threat of all: pandemic disease. As man-made climate change has \n", + ". As man-made climate change has taken hold over the last four decades, dozens of new \n", "\n", - " taken hold\n", + " infectious\n", " DISEASE\n", "\n", - " over the last four decades, dozens of new infectious diseases have emerged or begun to threaten new regions, including Zika and Ebola. \n", + " diseases have emerged or begun to threaten new regions, including Zika and Ebola. \n", "\n", " Cholera\n", " DISEASE\n", "\n", - " is also becoming more difficult to control: warm, brackish waters and rising sea levels help spread the disease, which infects about four million people each year and kills about 100,000 of them. \n", + " is also becoming more difficult to control: warm, brackish waters and rising sea levels help spread the disease, which infects about four million people each year and kills about 100,000 of them. Bubonic \n", "\n", - " Bubonic plague\n", + " plague\n", " DISEASE\n", "\n", ", spread by rats and fleas, is predicted to increase with warmer springs and wetter summers. \n", @@ -842,12 +910,32 @@ " Anthrax\n", " DISEASE\n", "\n", - ", whose spores are released by thawing permafrost, could spread farther as a result of stronger winds.
And those are just the direct health effects of climate change. Rising sea levels and increased ocean acidification will reduce fishing and aquaculture, aggravating malnutrition and food insecurity. \n", + ", whose \n", + "\n", + " spores\n", + " DISEASE\n", + "\n", + " are released by thawing permafrost, could spread farther as a result of stronger winds.
And those are just the direct health effects of climate change. Rising sea levels and increased ocean acidification will reduce fishing and aquaculture, aggravating malnutrition and food \n", + "\n", + " insecurity\n", + " DISEASE\n", + "\n", + ". \n", "\n", " Contamination\n", " DISEASE\n", "\n", - " of aquifers will exacerbate water shortages. Droughts, which already kill and displace more people than any other type of weather catastrophe, are predicted to grow longer and more frequent. The World Bank estimates that by 2050, there could be one billion climate refugees from sub-Saharan Africa, South Asia, and Latin America.
A HEALTHY RESPONSE
Extreme weather also disrupts public health infrastructure and services. That is why it is imperative that countries around the world invest in adapting health-care systems to the environmental changes already underway and likely to follow. At the first high-level meeting on Universal Health Coverage (\n", + " of aquifers will exacerbate water shortages. Droughts, which already kill and displace more people than any other type of weather catastrophe, are predicted to grow longer and more frequent. The World Bank estimates that by 2050, there could be one billion climate refugees from sub-Saharan Africa, South Asia, and Latin America.
A HEALTHY RESPONSE
Extreme weather also disrupts public health infrastructure and services. That is why it is imperative that countries around the world invest in adapting health-care \n", + "\n", + " systems\n", + " DISEASE\n", + "\n", + " to the environmental changes already underway and likely to follow. At the first high-level meeting on \n", + "\n", + " Universal Health Coverage\n", + " DISEASE\n", + "\n", + " (\n", "\n", " UHC\n", " DISEASE\n", @@ -889,7 +977,11 @@ ], "source": [ "doc = nlp(annotate_text)\n", - "spacy.displacy.render(doc, jupyter=True, style=\"ent\")" + "\n", + "spacy.displacy.render(doc,jupyter=True, style = \"ent\")\n", + "\n", + "# print(\"text | type | sentence\")\n", + "# print(\"\\n\".join([f\"{ent.text} | {ent.label_} | {ent.sent.text}\" for ent in doc.ents]))" ] }, { @@ -915,9 +1007,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "266481" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from rdflib import Graph\n", "\n", @@ -928,9 +1031,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "🧫 Found 11500 diseases\n", + "angiosarcoma http://purl.obolibrary.org/obo/DOID_0001816\n", + "pterygium http://purl.obolibrary.org/obo/DOID_0002116\n", + "disease of metabolism http://purl.obolibrary.org/obo/DOID_0014667\n" + ] + } + ], "source": [ "# Recursively get all subClassesOf disease (doid:4)\n", "query = \"\"\"\n", @@ -951,13 +1065,6 @@ " print(f\"{row.label} {row.diseaseUri}\")\n" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 2. 🔗 Map to diseases concepts in an ontology" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -981,27 +1088,85 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/home/vemonet/dev/courses/UM_KEN4256_KnowledgeGraphs/.venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n", - "\u001b[32m2024-02-19 12:28:00.502\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mfastembed.embedding\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m7\u001b[0m - \u001b[33m\u001b[1mDefaultEmbedding, FlagEmbedding, JinaEmbedding are deprecated.Use from fastembed import TextEmbedding instead.\u001b[0m\n" + "Fetching 9 files: 100%|██████████| 9/9 [00:00<00:00, 16792.14it/s]\n" ] }, { - "ename": "NameError", - "evalue": "name 'qres' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[14], line 9\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mqdrant_client\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mhttp\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m 3\u001b[0m Distance,\n\u001b[1;32m 4\u001b[0m PointStruct,\n\u001b[1;32m 5\u001b[0m VectorParams,\n\u001b[1;32m 6\u001b[0m )\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mfastembed\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01membedding\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m FlagEmbedding\n\u001b[0;32m----> 9\u001b[0m res_list \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(\u001b[43mqres\u001b[49m)\n\u001b[1;32m 10\u001b[0m \u001b[38;5;66;03m# You can reduce the amount of diseases for the demo (it takes ~2min30s on 16 CPUs for the 11k diseases)\u001b[39;00m\n\u001b[1;32m 11\u001b[0m \u001b[38;5;66;03m# res_list = list(qres)[:5000]\u001b[39;00m\n\u001b[1;32m 12\u001b[0m \n\u001b[1;32m 13\u001b[0m \u001b[38;5;66;03m# Initialize embedding model and vector database\u001b[39;00m\n\u001b[1;32m 14\u001b[0m embedding_model \u001b[38;5;241m=\u001b[39m FlagEmbedding(model_name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mBAAI/bge-small-en-v1.5\u001b[39m\u001b[38;5;124m\"\u001b[39m, max_length\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m512\u001b[39m)\n", - "\u001b[0;31mNameError\u001b[0m: name 'qres' is not defined" + "name": "stdout", + "output_type": "stream", + "text": [ + "[ 0.01414234 -0.013885 0.02664707 -0.00595295 0.02500878 -0.05168652\n", + " 0.05693207 0.03443561 0.03690271 0.01074176 -0.02259133 -0.02173014\n", + " 0.00696225 -0.0204661 -0.01870032 0.04507202 -0.05871626 0.02881367\n", + " -0.06865767 0.04924264 0.0039005 -0.02974666 -0.04282469 -0.05110366\n", + " 0.00613714 0.02783325 -0.01617181 -0.00422632 -0.04081199 -0.10839949\n", + " -0.0334617 0.01359641 -0.00690251 -0.0043321 -0.02209448 -0.02759799\n", + " -0.05973151 0.06467617 -0.05012893 0.04891388 0.02927759 0.03886029\n", + " -0.03951165 0.03423601 -0.0767827 -0.02751978 -0.02291217 -0.00599798\n", + " 0.09871525 0.03441411 -0.03673831 -0.06545779 0.00328765 0.0021383\n", + " -0.00782257 -0.02303196 0.03252333 -0.01079618 0.05677114 0.01385299\n", + " 0.04179419 -0.00321197 -0.13654813 0.09156476 -0.04054898 -0.04316433\n", + " -0.01886327 -0.01941874 0.03138032 0.03272972 0.00983377 -0.0179648\n", + " 0.00446826 0.05778679 -0.03451386 -0.04024907 0.04950196 -0.0144544\n", + " 0.03468213 0.04343939 -0.02649575 0.05254826 -0.01273234 -0.03309491\n", + " -0.02200587 -0.05140508 -0.03250891 -0.11879018 0.01510269 0.04129571\n", + " -0.0444955 0.03089114 0.06763803 0.03107056 -0.06312456 -0.01755762\n", + " 0.01307746 -0.02787874 -0.00281535 0.3114548 0.02631707 0.00125276\n", + " 0.00438112 0.02391482 -0.04130033 -0.03525911 0.00880765 -0.01970966\n", + " -0.03153884 0.04207596 0.08272165 -0.05328522 -0.00600261 -0.01981342\n", + " -0.04524404 -0.0005646 0.09770197 0.02815565 -0.03996886 0.03072252\n", + " 0.04465761 -0.01168233 -0.04960685 0.00402869 0.03958668 -0.02891143\n", + " -0.01635479 0.09044147 0.0740295 -0.00851143 0.12291927 0.0336718\n", + " -0.00765157 -0.04932648 0.0827454 0.01209539 0.0329143 0.01242327\n", + " -0.08896572 0.01817499 -0.02719959 -0.12896222 -0.0062194 -0.0140607\n", + " -0.05931816 0.12108037 -0.00126646 0.09657639 -0.06887486 0.02276755\n", + " 0.01293885 0.05276014 0.00280623 0.07234164 0.00529392 0.05413717\n", + " -0.04352788 0.03776156 -0.01909566 0.01680525 0.05535909 0.0105627\n", + " -0.02102524 0.04014051 0.01383714 -0.04329155 -0.00304223 0.01914789\n", + " 0.07784131 0.05911944 -0.01047428 -0.03404262 0.00596298 -0.05576868\n", + " 0.01403596 -0.05668803 -0.04395681 -0.03470498 -0.03918268 -0.03710565\n", + " 0.04972596 -0.00653216 -0.01214456 0.07245234 0.03171008 0.0048596\n", + " -0.0191042 -0.0479681 0.06677026 -0.00329712 0.00449569 -0.00377265\n", + " -0.03683615 0.00171017 -0.05317863 -0.10200896 0.01225357 0.00133082\n", + " -0.04647435 -0.01817175 0.151367 -0.01851162 -0.00712101 0.04137385\n", + " 0.0815076 0.08932334 0.00916797 0.0674205 -0.02234688 0.01131144\n", + " 0.00917329 0.0163309 0.00902087 0.00194496 -0.0064065 0.02694527\n", + " -0.02507114 0.00899984 0.00225168 -0.03223851 0.05030023 -0.01093081\n", + " 0.01363805 -0.2755928 0.02446636 -0.01264891 -0.00848014 -0.03298567\n", + " 0.05690612 0.03493537 0.00664419 0.02979421 0.01988582 -0.0108583\n", + " 0.05488859 0.03095806 0.05268979 0.00040277 0.07285438 0.01992969\n", + " -0.01307491 -0.00736642 -0.01473349 -0.08402849 -0.00340331 -0.02377428\n", + " -0.00515192 0.03453619 0.03327222 0.13334076 0.01851444 -0.05254203\n", + " -0.04605773 0.09612017 0.06995213 -0.03938622 -0.06177534 0.01072397\n", + " -0.02506309 -0.05003777 -0.01442132 -0.02627243 0.04329702 0.03939595\n", + " -0.02110046 -0.02675634 0.06541573 0.04956737 -0.03916068 0.00283775\n", + " 0.03694887 -0.01284113 0.01043614 0.04847318 -0.02854956 -0.00883358\n", + " 0.03537585 0.01100568 -0.01575713 -0.06137805 0.02887745 -0.06185652\n", + " -0.0469049 0.00847882 -0.03991816 0.05940219 -0.05509797 -0.01177482\n", + " -0.0065258 0.01529353 0.02008492 0.04499531 -0.07121827 -0.02772809\n", + " 0.04735106 0.01871876 0.02116814 0.05908853 0.00349959 0.03679019\n", + " 0.05816434 -0.03691323 -0.02901519 0.038678 -0.05429847 0.01851513\n", + " 0.0048438 -0.00803171 0.06613715 -0.00046515 0.0103803 -0.05596929\n", + " -0.07536671 0.01218346 0.01889269 -0.04886561 -0.0363 -0.04586501\n", + " -0.00146434 -0.28305864 0.00917302 -0.0003382 -0.0227192 0.01409691\n", + " -0.02680678 -0.04033561 0.05060802 0.02250225 -0.02339446 0.0115717\n", + " 0.04950395 -0.0030709 -0.0133867 -0.02676994 -0.0207772 0.04572217\n", + " 0.02438199 0.01673711 -0.05941632 -0.01769224 0.00639655 0.13909611\n", + " -0.08048704 -0.03021036 -0.0060039 0.00616416 0.01245894 -0.01578943\n", + " -0.02335382 0.00451832 0.02021749 -0.02965549 0.01635892 0.02286422\n", + " 0.11561577 -0.03600654 0.04829871 -0.05599661 -0.01996102 -0.04199656\n", + " 0.02535603 0.001692 -0.01472612 0.04134357 -0.03099626 -0.00754061\n", + " -0.04858302 0.05219911 0.02858651 -0.07098112 -0.00865761 0.02596696\n", + " -0.06052928 0.01566699 -0.10622156 0.01770238 -0.08028527 0.02550406\n", + " -0.11475673 0.03187792 -0.01268816 -0.07375916 0.05314633 0.06550446]\n", + "('vectors_count', 11500)\n" ] } ], @@ -1034,7 +1199,7 @@ "# Generate embeddings for the list of disease labels\n", "labels_list = [row.label for row in res_list]\n", "embeddings = list(embedding_model.embed(labels_list))\n", - "# print(embeddings[0])\n", + "print(embeddings[0])\n", "\n", "# Create the objects to insert in the collection with the embeddings, label and URI\n", "points = [\n", @@ -1055,11 +1220,30 @@ "print(list(collection_info)[2])" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's check if it works:" + ] + }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "migraine | http://purl.obolibrary.org/obo/DOID_6364 [0.8831740617752075]\n", + "migraine with aura | http://purl.obolibrary.org/obo/DOID_10024 [0.7594956159591675]\n", + "facial neuralgia | http://purl.obolibrary.org/obo/DOID_13865 [0.7179465293884277]\n", + "migraine without aura | http://purl.obolibrary.org/obo/DOID_12783 [0.7174647450447083]\n", + "familial hemiplegic migraine | http://purl.obolibrary.org/obo/DOID_0060178 [0.7143872976303101]\n" + ] + } + ], "source": [ "search = \"headache\"\n", "\n", @@ -1080,14 +1264,52 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## 3. 🕸️ Generate RDF" + "Finally let's run it on the entities extracted by spacy, and populate a RDF KG using the [Text Annotation Ontology](https://vemonet.github.io/tao).\n", + "\n", + "We use `tao:parts_of` to link each annotation to the annotated text, and `dcterms:isPartOf` to link each annotation to the sentence they are in." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "pneumonia > pneumonia (http://purl.obolibrary.org/obo/DOID_0060178) [1.0000001192092896]\n", + "respiratory > respiratory failure (http://purl.obolibrary.org/obo/DOID_0060178) [0.8170928955078125]\n", + "cognitive decline > subjective cognitive decline (http://purl.obolibrary.org/obo/DOID_0060178) [0.9172177314758301]\n", + "strokes > cerebral infarction (http://purl.obolibrary.org/obo/DOID_0060178) [0.816611111164093]\n", + "pneumonia > pneumonia (http://purl.obolibrary.org/obo/DOID_0060178) [1.0000001192092896]\n", + "asthma > asthma (http://purl.obolibrary.org/obo/DOID_0060178) [1.0]\n", + "emphysema > pulmonary emphysema (http://purl.obolibrary.org/obo/DOID_0060178) [0.934086799621582]\n", + "lung cancer > lung cancer (http://purl.obolibrary.org/obo/DOID_0060178) [1.0]\n", + "hypertension > hypertension (http://purl.obolibrary.org/obo/DOID_0060178) [1.0]\n", + "infectious > viral infectious disease (http://purl.obolibrary.org/obo/DOID_0060178) [0.8203649520874023]\n", + "malaria > malaria (http://purl.obolibrary.org/obo/DOID_0060178) [1.0000001192092896]\n", + "malaria > malaria (http://purl.obolibrary.org/obo/DOID_0060178) [1.0000001192092896]\n", + "infectious > viral infectious disease (http://purl.obolibrary.org/obo/DOID_0060178) [0.8203649520874023]\n", + "Cholera > cholera (http://purl.obolibrary.org/obo/DOID_0060178) [1.0000001192092896]\n", + "plague > plague (http://purl.obolibrary.org/obo/DOID_0060178) [1.0]\n", + "Anthrax > anthrax disease (http://purl.obolibrary.org/obo/DOID_0060178) [0.9072661995887756]\n", + "smallpox > smallpox (http://purl.obolibrary.org/obo/DOID_0060178) [1.0]\n", + "AIDS > AIDS phobia (http://purl.obolibrary.org/obo/DOID_0060178) [0.8142892122268677]\n", + "malaria > malaria (http://purl.obolibrary.org/obo/DOID_0060178) [1.0000001192092896]\n" + ] + }, + { + "data": { + "text/plain": [ + ")>" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from rdflib import URIRef, Literal, RDFS, Namespace, RDF, XSD, DCTERMS\n", "\n", @@ -1138,6 +1360,164 @@ "g.serialize(\"diseases_annotations_kg.ttl\", format=\"ttl\")\n", "# print(g.serialize(format=\"ttl\"))" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 😐 Sentiment analysis" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "👍 Sentiment(polarity=0.08600756239645128, subjectivity=0.4497620049703383)\n" + ] + } + ], + "source": [ + "from textblob import TextBlob\n", + "\n", + "# Sentiment Analysis\n", + "blob = TextBlob(annotate_text)\n", + "if blob.sentiment.polarity > 0:\n", + " print(f\"👍 {blob.sentiment}\")\n", + "else:\n", + " print(f\"👎 {blob.sentiment}\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 🤗 Use LLMs\n", + "\n", + "Use the [HuggingFace Inference API](https://huggingface.co/docs/api-inference/faq) to **query open source LLMs for free**, but with a limited context size. Just create an account on HuggingFace and get an access token.\n", + "\n", + "> Put the access token in the `.hf_token` file in the same folder as the notebook\n", + "\n", + "Query the Mixtral 8x7B model through HuggingFace HTTP API endpoint:" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[21], line 21\u001b[0m\n\u001b[1;32m 13\u001b[0m payload\u001b[38;5;241m=\u001b[39m{\n\u001b[1;32m 14\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minputs\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m[INST] From the text below, extract the diseases mentioned. Return the results as a JSON list of the extracted diseases label in the text. \u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mTEXT: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28minput\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m?\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m[/INST]\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 15\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mparameters\u001b[39m\u001b[38;5;124m\"\u001b[39m: {\n\u001b[1;32m 16\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mreturn_full_text\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m 17\u001b[0m }\n\u001b[1;32m 18\u001b[0m }\n\u001b[1;32m 19\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m requests\u001b[38;5;241m.\u001b[39mpost(API_URL, headers\u001b[38;5;241m=\u001b[39mheaders, json\u001b[38;5;241m=\u001b[39mpayload)\u001b[38;5;241m.\u001b[39mjson()\n\u001b[0;32m---> 21\u001b[0m \u001b[43mquery_llm\u001b[49m\u001b[43m(\u001b[49m\u001b[43mquestion\u001b[49m\u001b[43m)\u001b[49m\n", + "Cell \u001b[0;32mIn[21], line 19\u001b[0m, in \u001b[0;36mquery_llm\u001b[0;34m(input)\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mquery_llm\u001b[39m(\u001b[38;5;28minput\u001b[39m: \u001b[38;5;28mstr\u001b[39m):\n\u001b[1;32m 13\u001b[0m payload\u001b[38;5;241m=\u001b[39m{\n\u001b[1;32m 14\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minputs\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m[INST] From the text below, extract the diseases mentioned. Return the results as a JSON list of the extracted diseases label in the text. \u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mTEXT: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28minput\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m?\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m[/INST]\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 15\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mparameters\u001b[39m\u001b[38;5;124m\"\u001b[39m: {\n\u001b[1;32m 16\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mreturn_full_text\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m 17\u001b[0m }\n\u001b[1;32m 18\u001b[0m }\n\u001b[0;32m---> 19\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mrequests\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpost\u001b[49m\u001b[43m(\u001b[49m\u001b[43mAPI_URL\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mjson\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpayload\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mjson()\n", + "File \u001b[0;32m~/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages/requests/api.py:115\u001b[0m, in \u001b[0;36mpost\u001b[0;34m(url, data, json, **kwargs)\u001b[0m\n\u001b[1;32m 103\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mpost\u001b[39m(url, data\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, json\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 104\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124mr\u001b[39m\u001b[38;5;124;03m\"\"\"Sends a POST request.\u001b[39;00m\n\u001b[1;32m 105\u001b[0m \n\u001b[1;32m 106\u001b[0m \u001b[38;5;124;03m :param url: URL for the new :class:`Request` object.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 112\u001b[0m \u001b[38;5;124;03m :rtype: requests.Response\u001b[39;00m\n\u001b[1;32m 113\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 115\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpost\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mjson\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjson\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages/requests/api.py:59\u001b[0m, in \u001b[0;36mrequest\u001b[0;34m(method, url, **kwargs)\u001b[0m\n\u001b[1;32m 55\u001b[0m \u001b[38;5;66;03m# By using the 'with' statement we are sure the session is closed, thus we\u001b[39;00m\n\u001b[1;32m 56\u001b[0m \u001b[38;5;66;03m# avoid leaving sockets open which can trigger a ResourceWarning in some\u001b[39;00m\n\u001b[1;32m 57\u001b[0m \u001b[38;5;66;03m# cases, and look like a memory leak in others.\u001b[39;00m\n\u001b[1;32m 58\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m sessions\u001b[38;5;241m.\u001b[39mSession() \u001b[38;5;28;01mas\u001b[39;00m session:\n\u001b[0;32m---> 59\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43msession\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages/requests/sessions.py:589\u001b[0m, in \u001b[0;36mSession.request\u001b[0;34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001b[0m\n\u001b[1;32m 584\u001b[0m send_kwargs \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 585\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtimeout\u001b[39m\u001b[38;5;124m\"\u001b[39m: timeout,\n\u001b[1;32m 586\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mallow_redirects\u001b[39m\u001b[38;5;124m\"\u001b[39m: allow_redirects,\n\u001b[1;32m 587\u001b[0m }\n\u001b[1;32m 588\u001b[0m send_kwargs\u001b[38;5;241m.\u001b[39mupdate(settings)\n\u001b[0;32m--> 589\u001b[0m resp \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mprep\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43msend_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 591\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m resp\n", + "File \u001b[0;32m~/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages/requests/sessions.py:703\u001b[0m, in \u001b[0;36mSession.send\u001b[0;34m(self, request, **kwargs)\u001b[0m\n\u001b[1;32m 700\u001b[0m start \u001b[38;5;241m=\u001b[39m preferred_clock()\n\u001b[1;32m 702\u001b[0m \u001b[38;5;66;03m# Send the request\u001b[39;00m\n\u001b[0;32m--> 703\u001b[0m r \u001b[38;5;241m=\u001b[39m \u001b[43madapter\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 705\u001b[0m \u001b[38;5;66;03m# Total elapsed time of the request (approximately)\u001b[39;00m\n\u001b[1;32m 706\u001b[0m elapsed \u001b[38;5;241m=\u001b[39m preferred_clock() \u001b[38;5;241m-\u001b[39m start\n", + "File \u001b[0;32m~/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages/requests/adapters.py:486\u001b[0m, in \u001b[0;36mHTTPAdapter.send\u001b[0;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[1;32m 483\u001b[0m timeout \u001b[38;5;241m=\u001b[39m TimeoutSauce(connect\u001b[38;5;241m=\u001b[39mtimeout, read\u001b[38;5;241m=\u001b[39mtimeout)\n\u001b[1;32m 485\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 486\u001b[0m resp \u001b[38;5;241m=\u001b[39m \u001b[43mconn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43murlopen\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 487\u001b[0m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 488\u001b[0m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 489\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 490\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 491\u001b[0m \u001b[43m \u001b[49m\u001b[43mredirect\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 492\u001b[0m \u001b[43m \u001b[49m\u001b[43massert_same_host\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 493\u001b[0m \u001b[43m \u001b[49m\u001b[43mpreload_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 494\u001b[0m \u001b[43m \u001b[49m\u001b[43mdecode_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 495\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmax_retries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 496\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 497\u001b[0m \u001b[43m \u001b[49m\u001b[43mchunked\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchunked\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 498\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 500\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (ProtocolError, \u001b[38;5;167;01mOSError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[1;32m 501\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mConnectionError\u001b[39;00m(err, request\u001b[38;5;241m=\u001b[39mrequest)\n", + "File \u001b[0;32m~/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages/urllib3/connectionpool.py:793\u001b[0m, in \u001b[0;36mHTTPConnectionPool.urlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)\u001b[0m\n\u001b[1;32m 790\u001b[0m response_conn \u001b[38;5;241m=\u001b[39m conn \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m release_conn \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 792\u001b[0m \u001b[38;5;66;03m# Make the request on the HTTPConnection object\u001b[39;00m\n\u001b[0;32m--> 793\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 794\u001b[0m \u001b[43m \u001b[49m\u001b[43mconn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 795\u001b[0m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 796\u001b[0m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 797\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout_obj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 798\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 799\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 800\u001b[0m \u001b[43m \u001b[49m\u001b[43mchunked\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchunked\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 801\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 802\u001b[0m \u001b[43m \u001b[49m\u001b[43mresponse_conn\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresponse_conn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 803\u001b[0m \u001b[43m \u001b[49m\u001b[43mpreload_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpreload_content\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 804\u001b[0m \u001b[43m \u001b[49m\u001b[43mdecode_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdecode_content\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 805\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mresponse_kw\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 806\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 808\u001b[0m \u001b[38;5;66;03m# Everything went great!\u001b[39;00m\n\u001b[1;32m 809\u001b[0m clean_exit \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n", + "File \u001b[0;32m~/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages/urllib3/connectionpool.py:537\u001b[0m, in \u001b[0;36mHTTPConnectionPool._make_request\u001b[0;34m(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length)\u001b[0m\n\u001b[1;32m 535\u001b[0m \u001b[38;5;66;03m# Receive the response from the server\u001b[39;00m\n\u001b[1;32m 536\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 537\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mconn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgetresponse\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 538\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (BaseSSLError, \u001b[38;5;167;01mOSError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 539\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_raise_timeout(err\u001b[38;5;241m=\u001b[39me, url\u001b[38;5;241m=\u001b[39murl, timeout_value\u001b[38;5;241m=\u001b[39mread_timeout)\n", + "File \u001b[0;32m~/dev/courses/UM_KEN4256_KnowledgeGraphs_private/.venv/lib/python3.10/site-packages/urllib3/connection.py:466\u001b[0m, in \u001b[0;36mHTTPConnection.getresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 463\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mresponse\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m HTTPResponse\n\u001b[1;32m 465\u001b[0m \u001b[38;5;66;03m# Get the response from http.client.HTTPConnection\u001b[39;00m\n\u001b[0;32m--> 466\u001b[0m httplib_response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgetresponse\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 468\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 469\u001b[0m assert_header_parsing(httplib_response\u001b[38;5;241m.\u001b[39mmsg)\n", + "File \u001b[0;32m/usr/lib/python3.10/http/client.py:1375\u001b[0m, in \u001b[0;36mHTTPConnection.getresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1373\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1374\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1375\u001b[0m \u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbegin\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1376\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mConnectionError\u001b[39;00m:\n\u001b[1;32m 1377\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mclose()\n", + "File \u001b[0;32m/usr/lib/python3.10/http/client.py:318\u001b[0m, in \u001b[0;36mHTTPResponse.begin\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 316\u001b[0m \u001b[38;5;66;03m# read until we get a non-100 response\u001b[39;00m\n\u001b[1;32m 317\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m--> 318\u001b[0m version, status, reason \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_read_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 319\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m status \u001b[38;5;241m!=\u001b[39m CONTINUE:\n\u001b[1;32m 320\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n", + "File \u001b[0;32m/usr/lib/python3.10/http/client.py:279\u001b[0m, in \u001b[0;36mHTTPResponse._read_status\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 278\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_read_status\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m--> 279\u001b[0m line \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mstr\u001b[39m(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreadline\u001b[49m\u001b[43m(\u001b[49m\u001b[43m_MAXLINE\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124miso-8859-1\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 280\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(line) \u001b[38;5;241m>\u001b[39m _MAXLINE:\n\u001b[1;32m 281\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m LineTooLong(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstatus line\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", + "File \u001b[0;32m/usr/lib/python3.10/socket.py:705\u001b[0m, in \u001b[0;36mSocketIO.readinto\u001b[0;34m(self, b)\u001b[0m\n\u001b[1;32m 703\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[1;32m 704\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 705\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sock\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrecv_into\u001b[49m\u001b[43m(\u001b[49m\u001b[43mb\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 706\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m timeout:\n\u001b[1;32m 707\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_timeout_occurred \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n", + "File \u001b[0;32m/usr/lib/python3.10/ssl.py:1303\u001b[0m, in \u001b[0;36mSSLSocket.recv_into\u001b[0;34m(self, buffer, nbytes, flags)\u001b[0m\n\u001b[1;32m 1299\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m flags \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 1300\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 1301\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnon-zero flags not allowed in calls to recv_into() on \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39m\n\u001b[1;32m 1302\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m)\n\u001b[0;32m-> 1303\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnbytes\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbuffer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1304\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1305\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39mrecv_into(buffer, nbytes, flags)\n", + "File \u001b[0;32m/usr/lib/python3.10/ssl.py:1159\u001b[0m, in \u001b[0;36mSSLSocket.read\u001b[0;34m(self, len, buffer)\u001b[0m\n\u001b[1;32m 1157\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1158\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m buffer \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1159\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sslobj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbuffer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1160\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1161\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_sslobj\u001b[38;5;241m.\u001b[39mread(\u001b[38;5;28mlen\u001b[39m)\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + ] + } + ], + "source": [ + "import os\n", + "import requests\n", + "\n", + "question = \"The world has tended to fight such threats one disease at a time\\u2014whether it is smallpox, polio, TB, HIV/AIDS, or malaria\\u2014and to adopt a firefighting approach when a deadly pandemic such as Ebola emerges.\"\n", + "\n", + "with open(\".hf_token\", \"r\") as file:\n", + " HF_TOKEN = file.read().strip()\n", + "\n", + "API_URL = \"https://api-inference.huggingface.co/models/mistralai/Mixtral-8x7B-Instruct-v0.1\"\n", + "headers = {\"Authorization\": f\"Bearer {HF_TOKEN}\"}\n", + "\n", + "def query_llm(input: str):\n", + " payload={\n", + " \"inputs\": f\"[INST] From the text below, extract the diseases mentioned. Return the results as a JSON list of the extracted diseases label in the text. \\nTEXT: {input}?\\n[/INST]\",\n", + " \"parameters\": {\n", + " \"return_full_text\": False\n", + " }\n", + " }\n", + " return requests.post(API_URL, headers=headers, json=payload).json()\n", + "\n", + "query_llm(question)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Or use it with LangChain:" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "From the text below, extract the diseases mentioned.\n", + "Return the results as a JSON list of the extracted diseases label in the text\n", + "\n", + "Text: The world has tended to fight such threats one disease at a time—whether it is smallpox, polio, TB, HIV/AIDS, or malaria—and to adopt a firefighting approach when a deadly pandemic such as Ebola emerges.\n", + "\n", + "{\n", + " \"diseases\": [\n", + " \"smallpox\",\n", + " \"polio\",\n", + " \"TB\",\n", + " \"HIV/AIDS\",\n", + " \"malaria\",\n", + " \"Ebola\"\n", + " ]\n", + "}" + ] + } + ], + "source": [ + "from langchain_community.llms import HuggingFaceHub\n", + "from langchain.chains import LLMChain\n", + "from langchain.prompts import PromptTemplate\n", + "\n", + "os.environ[\"HUGGINGFACEHUB_API_TOKEN\"] = HF_TOKEN\n", + "\n", + "template = \"\"\"From the text below, extract the diseases mentioned.\n", + "Return the results as a JSON list of the extracted diseases label in the text\n", + "\n", + "Text: {question}\"\"\"\n", + "\n", + "prompt = PromptTemplate.from_template(template)\n", + "llm = HuggingFaceHub(\n", + " repo_id=\"mistralai/Mixtral-8x7B-Instruct-v0.1\",\n", + " # model_kwargs={\"temperature\": 0.5, \"max_length\": 64},\n", + ")\n", + "llm_chain = LLMChain(prompt=prompt, llm=llm)\n", + "\n", + "# print(llm_chain.run(annotate_text))\n", + "for chunk in llm_chain.stream(question):\n", + " print(chunk[\"text\"], end=\"\", flush=True)" + ] } ], "metadata": {