Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updated tools (ner, nel) #10

Merged
merged 7 commits into from
Oct 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .flake8
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
[flake8]
max-line-length = 120
exclude = impresso/api_client, impresso/protobuf
exclude = impresso/api_client, impresso/protobuf, impresso/api_models.py
8 changes: 4 additions & 4 deletions impresso/api_client/api/tools/perform_ner.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def sync_detailed(
client: AuthenticatedClient,
body: ImpressoNamedEntityRecognitionRequest,
) -> Response[Union[Error, ImpressoNamedEntityRecognitionResponse]]:
"""Perform Named Entity Recognition of a text
"""Perform named entity recognition (and optional named entity linking) of a text

Args:
body (ImpressoNamedEntityRecognitionRequest): Request body for the Impresso NER endpoint
Expand Down Expand Up @@ -109,7 +109,7 @@ def sync(
client: AuthenticatedClient,
body: ImpressoNamedEntityRecognitionRequest,
) -> Optional[Union[Error, ImpressoNamedEntityRecognitionResponse]]:
"""Perform Named Entity Recognition of a text
"""Perform named entity recognition (and optional named entity linking) of a text

Args:
body (ImpressoNamedEntityRecognitionRequest): Request body for the Impresso NER endpoint
Expand All @@ -133,7 +133,7 @@ async def asyncio_detailed(
client: AuthenticatedClient,
body: ImpressoNamedEntityRecognitionRequest,
) -> Response[Union[Error, ImpressoNamedEntityRecognitionResponse]]:
"""Perform Named Entity Recognition of a text
"""Perform named entity recognition (and optional named entity linking) of a text

Args:
body (ImpressoNamedEntityRecognitionRequest): Request body for the Impresso NER endpoint
Expand All @@ -160,7 +160,7 @@ async def asyncio(
client: AuthenticatedClient,
body: ImpressoNamedEntityRecognitionRequest,
) -> Optional[Union[Error, ImpressoNamedEntityRecognitionResponse]]:
"""Perform Named Entity Recognition of a text
"""Perform named entity recognition (and optional named entity linking) of a text

Args:
body (ImpressoNamedEntityRecognitionRequest): Request body for the Impresso NER endpoint
Expand Down
2 changes: 2 additions & 0 deletions impresso/api_client/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@
from .impresso_named_entity_recognition_entity_type import ImpressoNamedEntityRecognitionEntityType
from .impresso_named_entity_recognition_entity_wikidata import ImpressoNamedEntityRecognitionEntityWikidata
from .impresso_named_entity_recognition_request import ImpressoNamedEntityRecognitionRequest
from .impresso_named_entity_recognition_request_method import ImpressoNamedEntityRecognitionRequestMethod
from .impresso_named_entity_recognition_response import ImpressoNamedEntityRecognitionResponse
from .location_specific_fields import LocationSpecificFields
from .location_specific_fields_descriptions import LocationSpecificFieldsDescriptions
Expand Down Expand Up @@ -230,6 +231,7 @@
"ImpressoNamedEntityRecognitionEntityType",
"ImpressoNamedEntityRecognitionEntityWikidata",
"ImpressoNamedEntityRecognitionRequest",
"ImpressoNamedEntityRecognitionRequestMethod",
"ImpressoNamedEntityRecognitionResponse",
"LocationSpecificFields",
"LocationSpecificFieldsDescriptions",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from typing import Any, Dict, Type, TypeVar
from typing import Any, Dict, Type, TypeVar, Union

from attrs import define as _attrs_define

from ..models.impresso_named_entity_recognition_request_method import ImpressoNamedEntityRecognitionRequestMethod
from ..types import UNSET, Unset

T = TypeVar("T", bound="ImpressoNamedEntityRecognitionRequest")


Expand All @@ -11,19 +14,29 @@ class ImpressoNamedEntityRecognitionRequest:

Attributes:
text (str): Text to be processed for named entity recognition
method (Union[Unset, ImpressoNamedEntityRecognitionRequestMethod]): NER method to be used: `ner` (default),
`ner-nel` (named entity recognition with named entity linking) and `nel` (linking only - enclose entities in
[START] [END] tags). Default: ImpressoNamedEntityRecognitionRequestMethod.NER.
"""

text: str
method: Union[Unset, ImpressoNamedEntityRecognitionRequestMethod] = ImpressoNamedEntityRecognitionRequestMethod.NER

def to_dict(self) -> Dict[str, Any]:
text = self.text

method: Union[Unset, str] = UNSET
if not isinstance(self.method, Unset):
method = self.method.value

field_dict: Dict[str, Any] = {}
field_dict.update(
{
"text": text,
}
)
if method is not UNSET:
field_dict["method"] = method

return field_dict

Expand All @@ -32,8 +45,16 @@ def from_dict(cls: Type[T], src_dict: Dict[str, Any]) -> T:
d = src_dict.copy()
text = d.pop("text")

_method = d.pop("method", UNSET)
method: Union[Unset, ImpressoNamedEntityRecognitionRequestMethod]
if isinstance(_method, Unset):
method = UNSET
else:
method = ImpressoNamedEntityRecognitionRequestMethod(_method)

impresso_named_entity_recognition_request = cls(
text=text,
method=method,
)

return impresso_named_entity_recognition_request
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from enum import Enum
from typing import Literal


class ImpressoNamedEntityRecognitionRequestMethod(str, Enum):
NEL = "nel"
NER = "ner"
NER_NEL = "ner-nel"

def __str__(self) -> str:
return str(self.value)


ImpressoNamedEntityRecognitionRequestMethodLiteral = Literal[
"nel",
"ner",
"ner-nel",
]
7 changes: 7 additions & 0 deletions impresso/api_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,13 @@ class ImpressoNerRequest(BaseModel):
min_length=1,
),
]
method: Annotated[
Optional[Literal['ner', 'ner-nel', 'nel']],
Field(
'ner',
description='NER method to be used: `ner` (default), `ner-nel` (named entity recognition with named entity linking) and `nel` (linking only - enclose entities in [START] [END] tags).',
),
]


class ImpressoNerResponse(BaseModel):
Expand Down
5 changes: 4 additions & 1 deletion impresso/data_container.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,10 @@ def _repr_html_(self):
),
"</div>",
(
f'<div style="align-content: center;"><img src="data:image/png;base64,{preview_img}" style="max-width: 800px; width: 100%;"></div>'
(
f'<div style="align-content: center;"><img src="data:image/png;base64,{preview_img}" '
+ 'style="max-width: 800px; width: 100%;"></div>'
)
if preview_img
else None
),
Expand Down
6 changes: 4 additions & 2 deletions impresso/resources/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,13 +154,15 @@ def find(
entity_id: Return only content items that mention this entity or all/any of the entities.
date_range: Return only content items that were published in this date range.
language: Return only content items that are in this language or all/any of the languages.
mention: Return only content items that mention an entity with this term or all/any of entities with the terms.
mention: Return only content items that mention an entity with this term or all/any
of entities with the terms.
topic_id: Return only content items that are about this topic or all/any of the topics.
collection_id: Return only content items that are in this collection or all/any of the collections.
country: Return only content items that are from this country or all/any of the countries.
access_rights: Return only content items with this access right or all/any of the access rights.
partner_id: Return only content items that are from this partner or all/any of the partners.
text_reuse_cluster_id: Return only content items that are in this text reuse cluster or all/any of the clusters.
text_reuse_cluster_id: Return only content items that are in this text reuse cluster
or all/any of the clusters.

Returns:
_type_: _description_
Expand Down
59 changes: 58 additions & 1 deletion impresso/resources/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
from impresso.api_client.models.impresso_named_entity_recognition_request import (
ImpressoNamedEntityRecognitionRequest,
)
from impresso.api_client.models.impresso_named_entity_recognition_request_method import (
ImpressoNamedEntityRecognitionRequestMethod,
)
from impresso.api_models import ImpressoNerResponse
from impresso.data_container import DataContainer
from impresso.resources.base import Resource
Expand Down Expand Up @@ -55,6 +58,58 @@ class ToolsResource(Resource):
def ner(self, text: str) -> NerContainer:
"""Named Entity Recognition

This method is faster than `ner_nel` but does not provide any linking to external resources.

Args:
text (str): Text to process

Returns:
NerContainer: List of named entities
"""
result = perform_ner.sync(
client=self._api_client,
body=ImpressoNamedEntityRecognitionRequest(
text=text, method=ImpressoNamedEntityRecognitionRequestMethod.NER
),
)
raise_for_error(result)

return NerContainer(
result,
ImpressoNerSchema,
web_app_search_result_url=None,
)

def ner_nel(self, text: str) -> NerContainer:
"""Named Entity Recognition and Named Entity Linking

This method is slower than `ner` but provides linking to external resources.

Args:
text (str): Text to process

Returns:
NerContainer: List of named entities
"""
result = perform_ner.sync(
client=self._api_client,
body=ImpressoNamedEntityRecognitionRequest(
text=text, method=ImpressoNamedEntityRecognitionRequestMethod.NER_NEL
),
)
raise_for_error(result)

return NerContainer(
result,
ImpressoNerSchema,
web_app_search_result_url=None,
)

def nel(self, text: str) -> NerContainer:
"""Named Entity Linking

This method requires named entities to be enclosed in tags: [START]entity[END].

Args:
text (str): Text to process

Expand All @@ -63,7 +118,9 @@ def ner(self, text: str) -> NerContainer:
"""
result = perform_ner.sync(
client=self._api_client,
body=ImpressoNamedEntityRecognitionRequest(text=text),
body=ImpressoNamedEntityRecognitionRequest(
text=text, method=ImpressoNamedEntityRecognitionRequestMethod.NEL
),
)
raise_for_error(result)

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ packages = [
]
readme = "README.md"
repository = "https://github.com/impresso/impresso-py"
version = "0.9.7"
version = "0.9.8"

[tool.poetry.urls]
Endpoint = "https://impresso-project.ch/public-api"
Expand Down
Loading