From c7e87d6b5e0ace68e8b8b6f7881b2788d358ac50 Mon Sep 17 00:00:00 2001 From: Birger Schacht Date: Fri, 6 Oct 2023 09:54:49 +0200 Subject: [PATCH] refactor: get rid of apis_highlighter integration On the one hand this cleans up the codebase, on the other hand this frees up the name `apis_highlighter`, so it can be used in a new Django application. --- apis_core/api_routers.py | 17 -- apis_core/apis_entities/api_urls.py | 31 --- apis_core/apis_entities/edit_generic.py | 8 - apis_core/apis_entities/forms.py | 3 - .../apis_entities/serializers_generic.py | 39 ---- .../templates/apis_entities/edit_generic.html | 35 --- apis_core/apis_entities/views.py | 4 - apis_core/apis_metainfo/models.py | 3 - apis_core/apis_relations/forms2.py | 3 - apis_core/apis_relations/models.py | 4 +- apis_core/apis_vocabularies/models.py | 4 - .../custom_context_processors.py | 4 - apis_core/urls.py | 22 -- apis_core/utils/inter_annotator_agreement.py | 207 ------------------ 14 files changed, 1 insertion(+), 383 deletions(-) delete mode 100644 apis_core/utils/inter_annotator_agreement.py diff --git a/apis_core/api_routers.py b/apis_core/api_routers.py index 01cd225c8..56a87368f 100644 --- a/apis_core/api_routers.py +++ b/apis_core/api_routers.py @@ -31,11 +31,6 @@ from apis_core.utils import caching from apis_core.core.mixins import ListViewObjectFilterMixin -if "apis_highlighter" in getattr(settings, "INSTALLED_APPS"): - from apis_highlighter.highlighter import highlight_text_new - from apis_highlighter.models import Annotation - - try: MAX_AGE = settings.MAX_AGE except AttributeError: @@ -202,18 +197,6 @@ def add_related_entity(self, triple): ) -if "apis_highlighter" in getattr(settings, "INSTALLED_APPS"): - - class AnnotationSerializer(serializers.ModelSerializer): - related_object = VocabsBaseSerializer( - source="get_related_entity", read_only=True, many=False - ) - - class Meta: - model = Annotation - fields = ["id", "start", "end", "related_object"] - - def generic_serializer_creation_factory(): lst_cont = caching.get_all_contenttype_classes() not_allowed_filter_fields = [ diff --git a/apis_core/apis_entities/api_urls.py b/apis_core/apis_entities/api_urls.py index 1f262a77a..8c31c620a 100644 --- a/apis_core/apis_entities/api_urls.py +++ b/apis_core/apis_entities/api_urls.py @@ -20,34 +20,3 @@ # path(r'getrelatedplaces/', api_views.GetRelatedPlaces.as_view(), name="GetRelatedPlaces"), # path(r'lifepath//', api_views.LifePathViewset.as_view(), name="Lifepathviewset") ] - -if ( - "deep learning" in getattr(settings, "APIS_COMPONENTS", []) - and "apis_highlighter" in settings.INSTALLED_APPS -): - from apis_highlighter.api_views import TestDLModel - - urlpatterns.append( - path("nlp_model/", TestDLModel.as_view(), name="TestDLModel"), - ) - -if "apis_highlighter" in settings.INSTALLED_APPS: - from apis_highlighter.api_views import ( - AnnotatorAgreementView, - ShowOverlappingHighlights, - ) - - urlpatterns.extend( - [ - path( - "annotatoragreement/", - AnnotatorAgreementView.as_view(), - name="AnnotatorAgreementView", - ), - path( - "overlappinghighlights/", - ShowOverlappingHighlights.as_view(), - name="ShowOverlappingHighlights", - ), - ] - ) diff --git a/apis_core/apis_entities/edit_generic.py b/apis_core/apis_entities/edit_generic.py index 89582af71..7aace7ea8 100644 --- a/apis_core/apis_entities/edit_generic.py +++ b/apis_core/apis_entities/edit_generic.py @@ -30,9 +30,6 @@ from apis_core.utils.settings import get_entity_settings_by_modelname from apis_core.apis_entities.mixins import EntityMixin, EntityInstanceMixin -if "apis_highlighter" in settings.INSTALLED_APPS: - from apis_highlighter.forms import SelectAnnotatorAgreement - @method_decorator(login_required, name="dispatch") class GenericEntitiesEditView(EntityInstanceMixin, View): @@ -89,10 +86,6 @@ def get(self, request, *args, **kwargs): ) form = get_entities_form(self.entity.title()) form = form(instance=self.instance) - if "apis_highlighter" in settings.INSTALLED_APPS: - form_ann_agreement = SelectAnnotatorAgreement() - else: - form_ann_agreement = False if "apis_bibsonomy" in settings.INSTALLED_APPS: apis_bibsonomy = getattr(settings, "APIS_BIBSONOMY_FIELDS", []) apis_bibsonomy_texts = getattr(settings, "APIS_BIBSONOMY_TEXTS", False) @@ -127,7 +120,6 @@ def get(self, request, *args, **kwargs): "right_card": side_bar, "object_revisions": object_revisions, "object_lod": object_lod, - "form_ann_agreement": form_ann_agreement, "apis_bibsonomy": apis_bibsonomy, } form_merge_with = GenericEntitiesStanbolForm(self.entity, ent_merge_pk=self.pk) diff --git a/apis_core/apis_entities/forms.py b/apis_core/apis_entities/forms.py index d72ed971b..393abf87b 100644 --- a/apis_core/apis_entities/forms.py +++ b/apis_core/apis_entities/forms.py @@ -17,9 +17,6 @@ from apis_core.utils import DateParser, caching, settings as apis_settings from .fields import ListSelect2, Select2Multiple -if "apis_highlighter" in settings.INSTALLED_APPS: - from apis_highlighter.models import AnnotationProject - class SearchForm(forms.Form): search = forms.CharField(label="Search") diff --git a/apis_core/apis_entities/serializers_generic.py b/apis_core/apis_entities/serializers_generic.py index 69be8e0a5..47c3bb50c 100644 --- a/apis_core/apis_entities/serializers_generic.py +++ b/apis_core/apis_entities/serializers_generic.py @@ -176,7 +176,6 @@ class RelationEntitySerializer(serializers.Serializer): start_date_written = serializers.DateField() end_date_written = serializers.DateField() relation_type = serializers.SerializerMethodField(method_name="add_relation_label") - annotation = serializers.SerializerMethodField(method_name="add_annotations") revisions = serializers.SerializerMethodField(method_name="add_revisions") def add_revisions(self, obj): @@ -197,44 +196,6 @@ def add_revisions(self, obj): ) return res - def add_annotations(self, obj): - if "apis_highlighter" in settings.INSTALLED_APPS: - res = [] - offs = 50 - for an in obj.annotation_set.all(): - r1 = dict() - r1["id"] = an.pk - r1["user"] = an.user_added.username - text = an.text.text - if offs < an.start: - s = an.start - offs - else: - s = 0 - if offs + an.end < len(text): - e = an.end + offs - else: - e = len(text) - r1["annotation"] = text[an.start : an.end] - r1["text"] = text[s:e] - r1["text"] = "{}{}{}".format( - r1["text"][: an.start - s], - r1["text"][an.start - s : an.end - s], - r1["text"][an.end - s :], - ) - r1["text"] = r1["text"].replace("\r\n", "
") - r1["text"] = r1["text"].replace("\r", "
") - r1["text"] = r1["text"].replace("\n", "
") - - r1["string_offset"] = "{}-{}".format(an.start, an.end) - # r1["text_url"] = self.context["request"].build_absolute_uri( - # reverse("apis_core:apis_api:text-detail", kwargs={"pk": an.text_id}) - # ) - r1[ - "text_url" - ] = f"{base_uri}{reverse('apis_core:apis_api:text-detail', kwargs={'pk': an.text_id})}" - res.append(r1) - return res - def add_entity(self, obj): return EntitySerializer( getattr(obj, "related_{}".format(self.entity_type)), depth_ent=0 diff --git a/apis_core/apis_entities/templates/apis_entities/edit_generic.html b/apis_core/apis_entities/templates/apis_entities/edit_generic.html index 0cd75a5e7..9c3414a0c 100644 --- a/apis_core/apis_entities/templates/apis_entities/edit_generic.html +++ b/apis_core/apis_entities/templates/apis_entities/edit_generic.html @@ -4,13 +4,6 @@ {% block scriptHeader %} {{ block.super }} - {% if highlighter_active %} - - - {% endif %} - {% if apis_bibsonomy %} {% include 'apis_bibsonomy/apis_bibsonomy_include.html' %} {% endif %} @@ -126,7 +119,6 @@

{% endblock editbuttons %} - @@ -205,20 +197,6 @@

{% endif %} {% if object_texts %} - - {% if user.is_superuser %} - {% if form_ann_agreement %} -
-
-
- {% crispy form_ann_agreement form_ann_agreement.helper %} -
-
-
-
- {% endif %} - {% endif %} -
@@ -426,19 +404,6 @@

GetFormAjax("{{obj.2}}"); //unbind_ajax_forms(); {% endfor %} -{% if highlighter_active %} - init_apis_highlighter(1, {{instance.pk}}); - if (typeof $.ApisHigh.vars == 'undefined') { - $.ApisHigh.vars = {}; }; - $.ApisHigh.vars.entity_type = '{{entity_type}}'; - $.ApisHigh.vars.instance_pk = '{{instance.pk}}'; - $.ApisHigh.vars.urls = {}; - $.ApisHigh.vars.urls.get_form_ajax = "{% url 'apis:apis_relations:get_form_ajax' %}"; - $.ApisHigh.vars.urls.annotatoragreementview = "{% url 'apis:apis_api2:AnnotatorAgreementView' %}"; - $.ApisHigh.vars.urls.showoverlappinghighlights = "{% url 'apis:apis_api2:ShowOverlappingHighlights' %}"; - activate_context_menu_highlighter(); - $('body').on("click", 'mark.highlight', highlight_detail); - {% endif %} }; function deactivate_editing(){ $('.reldelete').addClass("disabled"); diff --git a/apis_core/apis_entities/views.py b/apis_core/apis_entities/views.py index 458d4a3f9..ff77f18ec 100644 --- a/apis_core/apis_entities/views.py +++ b/apis_core/apis_entities/views.py @@ -30,10 +30,6 @@ ) from .tables import get_entities_table -if "apis_highlighter" in settings.INSTALLED_APPS: - from apis_highlighter.forms import SelectAnnotationProject - from apis_highlighter.highlighter import highlight_text_new - if "charts" in settings.INSTALLED_APPS: from charts.models import ChartConfig from charts.views import create_payload diff --git a/apis_core/apis_metainfo/models.py b/apis_core/apis_metainfo/models.py index 84876f549..a80f63826 100644 --- a/apis_core/apis_metainfo/models.py +++ b/apis_core/apis_metainfo/models.py @@ -41,9 +41,6 @@ NEXT_PREV = getattr(settings, "APIS_NEXT_PREV", True) -if "apis_highlighter" in settings.INSTALLED_APPS: - from apis_highlighter.models import Annotation - @reversion.register() class RootObject(models.Model): diff --git a/apis_core/apis_relations/forms2.py b/apis_core/apis_relations/forms2.py index 1717e7188..4c58979e9 100644 --- a/apis_core/apis_relations/forms2.py +++ b/apis_core/apis_relations/forms2.py @@ -29,9 +29,6 @@ # from dal.autocomplete import ListSelect2 -if "apis_highlighter" in settings.INSTALLED_APPS: - pass - class GenericTripleForm(forms.ModelForm): # TODO RDF : Add Notes and references diff --git a/apis_core/apis_relations/models.py b/apis_core/apis_relations/models.py index a232e4377..f91473773 100644 --- a/apis_core/apis_relations/models.py +++ b/apis_core/apis_relations/models.py @@ -41,9 +41,7 @@ def filter_ann_proj(self, request=None, ann_proj=1, include_all=True): ) def filter_for_user(self): - if hasattr( - settings, "APIS_SHOW_ONLY_PUBLISHED" - ) or "apis_highlighter" in getattr(settings, "INSTALLED_APPS"): + if hasattr(settings, "APIS_SHOW_ONLY_PUBLISHED"): return self.get_queryset().filter_for_user() else: return self.get_queryset() diff --git a/apis_core/apis_vocabularies/models.py b/apis_core/apis_vocabularies/models.py index 4941f482d..726ac52c0 100644 --- a/apis_core/apis_vocabularies/models.py +++ b/apis_core/apis_vocabularies/models.py @@ -48,10 +48,6 @@ class VocabsBaseClass(RootObject): vocab_name = models.ForeignKey( VocabNames, blank=True, null=True, on_delete=models.SET_NULL ) - if "apis_highlighter" in settings.INSTALLED_APPS: - from apis_highlighter.models import Annotation - - annotation_set = GenericRelation(Annotation) def __str__(self): return self.label diff --git a/apis_core/context_processors/custom_context_processors.py b/apis_core/context_processors/custom_context_processors.py index 81a09a656..ee498ddce 100644 --- a/apis_core/context_processors/custom_context_processors.py +++ b/apis_core/context_processors/custom_context_processors.py @@ -33,10 +33,6 @@ def list_apis_settings(request): "request": request, "basetemplate": getattr(settings, "BASE_TEMPLATE", "base.html"), } - if "apis_highlighter" in settings.INSTALLED_APPS: - res["highlighter_active"] = True - else: - res["highlighter_active"] = False if "apis_bibsonomy" in settings.INSTALLED_APPS: res["bibsonomy_active"] = True else: diff --git a/apis_core/urls.py b/apis_core/urls.py index 13402e075..6faad5779 100644 --- a/apis_core/urls.py +++ b/apis_core/urls.py @@ -45,23 +45,6 @@ additional_serializer.name, ) -if "apis_highlighter" in settings.INSTALLED_APPS: - from apis_highlighter.api_views import ( - HighlighterProjectViewSet, - HighlighterTextHighViewSet, - HighlighterMenuEntryViewSet, - HighlighterAnnotationViewSet, - ) - - router.register(r"HLProjects", HighlighterProjectViewSet) - router.register(r"HLTextHigh", HighlighterTextHighViewSet) - router.register(r"HLMenuEntry", HighlighterMenuEntryViewSet) - # router.register( - # r"HLTextHighlighter", HighlighterHighlightTextViewSet, "HLTextHighlighter" - # ) - # router.register(r"HLVocabularyAPI", HighlighterVocabularyAPIViewSet) - router.register(r"HLAnnotation", HighlighterAnnotationViewSet) - # router.register(r"users", UserViewSet) # router.register(r"GeoJsonPlace", PlaceGeoJsonViewSet, "PlaceGeoJson") # router.register(r"NetJson", NetJsonViewSet, "NetJson") @@ -187,11 +170,6 @@ def build_apis_mock_request(method, path, view, original_request, **kwargs): # url(r'^accounts/', include('registration.backends.simple.urls')), ] -if "apis_highlighter" in settings.INSTALLED_APPS: - urlpatterns.append( - path("highlighter/", include("apis_highlighter.urls", namespace="highlighter")) - ) - if "apis_fulltext_download" in settings.INSTALLED_APPS: urlpatterns.append( path( diff --git a/apis_core/utils/inter_annotator_agreement.py b/apis_core/utils/inter_annotator_agreement.py deleted file mode 100644 index 4abd47d9e..000000000 --- a/apis_core/utils/inter_annotator_agreement.py +++ /dev/null @@ -1,207 +0,0 @@ -from django.conf import settings -from django.contrib.auth.models import User -from django.contrib.contenttypes.models import ContentType -from django.db.models.query import QuerySet - -from apis_core.apis_metainfo.models import Text -from apis_core.apis_entities.models import TempEntityClass -from apis_highlighter.models import Annotation - -if "annotator agreement" in getattr(settings, "APIS_COMPONENTS", []): - from nltk.metrics import AnnotationTask - import pandas as pd - from sklearn.metrics import precision_recall_fscore_support - import numpy as np - - -class InternalDataAgreement(object): - @staticmethod - def internal_data_agreement_calc( - texts, ann1, ann2, anno_proj=None, format_string="start_end_text" - ): - if type(texts) == QuerySet: - q = {"text__in": texts} - else: - q = {"text": texts} - lst_user_ann = [] - if anno_proj: - q["annotation_project_id"] = anno_proj - lst_ann_fin = dict() - for v in [ann1, ann2]: - lst_ann = [] - q["user_added_id"] = v - for an in Annotation.objects.filter(**q).order_by("start"): - lst_ann.append(an.annotation_hash(format_string=format_string)) - if an.user_added_id not in lst_user_ann: - lst_user_ann.append(an.user_added_id) - lst_ann_fin[v] = lst_ann - ann_task = [] - for ann in lst_ann_fin[ann1]: - ann_task.append((ann1, ann, 1)) - if ann in lst_ann_fin[ann2]: - ann_task.append((ann2, ann, 1)) - else: - ann_task.append((ann2, ann, 0)) - for ann in lst_ann_fin[ann2]: - if ann not in lst_ann_fin[ann1]: - ann_task.append((ann2, ann, 1)) - ann_task.append((ann1, ann, 0)) - - if len(ann_task) == 0: - return None - else: - return AnnotationTask(data=ann_task) - - def precision_recall_calc( - self, - texts, - gold_standard, - user_group, - anno_proj=None, - format_string="start_end_text", - ): - if type(texts) == QuerySet: - q = {"text__in": texts} - else: - q = {"text": texts} - if anno_proj: - q["annotation_project_id"] = anno_proj - lst_ann_fin = dict() - if not format_string: - format_string = "start_end_text" - user_list = [gold_standard] - user_list.extend( - User.objects.filter(groups__pk=user_group) - .exclude(pk=gold_standard) - .values_list("pk", flat=True) - ) - df = pd.DataFrame() - for v in user_list: - lst_ann = [] - q["user_added_id"] = v - for an in Annotation.objects.filter(**q).order_by("start"): - lst_ann.append(an.annotation_hash(format_string=format_string)) - lst_ann_fin[v] = lst_ann - gold_username = User.objects.get(pk=gold_standard).username - for u in lst_ann_fin.keys(): - ann_username = User.objects.get(pk=u).username - if u == gold_standard: - continue - gold_list = [] - ann_list = [] - for ann in lst_ann_fin[u]: - if ann in lst_ann_fin[gold_standard]: - gold_list.append(1) - ann_list.append(1) - else: - gold_list.append(0) - ann_list.append(1) - for ann in lst_ann_fin[gold_standard]: - if ann not in lst_ann_fin[u]: - gold_list.append(1) - ann_list.append(0) - prec_res = precision_recall_fscore_support( - np.array(gold_list), np.array(ann_list), average="binary", pos_label=1 - ) - for idx, k in enumerate(["precission", "recall", "fbeta_score", "support"]): - df.loc[ann_username, k] = prec_res[idx] - return df - - def get_html_table(self): - css_class = "table table-bordered table-hover" - self.html_tables = dict() - self.html_tables_gold = None - if isinstance(self.texts, dict): - for txt_id in self.texts.keys(): - self.html_tables[txt_id] = self.texts[txt_id].to_html(classes=css_class) - if self.texts_gold is not None: - self.html_tables_gold = dict() - for txt_id in self.texts_gold.keys(): - self.html_tables_gold[txt_id] = self.texts_gold[txt_id].to_html( - classes=css_class - ) - elif isinstance(self.texts, pd.DataFrame): - self.html_tables = self.texts.to_html(classes=css_class) - if self.texts_gold is not None: - self.html_tables_gold = self.texts_gold.to_html(classes=css_class) - return self.html_tables, self.html_tables_gold - - def __init__( - self, - texts, - anno_proj, - user_group, - metrics="Do_alpha", - format_string="start_end_text", - combine=False, - gold_standard=False, - ): - if not type(texts) == QuerySet: - m_name = ContentType.objects.get_for_model(texts).name - if ( - m_name == "person" - or m_name == "place" - or m_name == "institution" - or m_name == "event" - or m_name == "work" - ): - texts = Text.objects.filter(tempentityclass=texts).distinct() - elif m_name == "collection": - t = TempEntityClass.objects.filter(collection=texts) - texts = Text.objects.filter(tempentityclass__in=t).distinct() - self.texts_gold = None - self._list_users = dict() - if combine: - self.texts = pd.DataFrame() - else: - self.texts = dict() - if combine: - texts = [texts] - if gold_standard: - self.texts_gold = dict() - if not user_group: - user_qs = dict() - else: - user_qs = {"groups__pk": user_group} - for txt in texts: - df = pd.DataFrame() - test = False - for ann1 in User.objects.filter(**user_qs): - for ann2 in User.objects.filter(**user_qs): - if ann1 == ann2: - t = None - continue - try: - t = getattr( - self.internal_data_agreement_calc( - txt, - ann1.pk, - ann2.pk, - anno_proj=anno_proj, - format_string=format_string, - ), - metrics, - )() - if pd.notnull(t): - test = True - except AttributeError as ex: - t = None - df.loc[ann1.username, ann2.username] = t - if ann1.pk not in self._list_users.keys(): - self._list_users[ann1.pk] = ann1 - if ann2.pk not in self._list_users.keys(): - self._list_users[ann2.pk] = ann2 - if test: - if combine: - self.texts = df - else: - self.texts[txt.pk] = df - if gold_standard: - if combine: - self.texts_gold = self.precision_recall_calc( - txt, gold_standard, user_group, anno_proj=anno_proj - ) - else: - self.texts_gold[txt.pk] = self.precision_recall_calc( - txt, gold_standard, user_group, anno_proj=anno_proj - )