diff --git a/apis_ontology/jelinek_api_filters.py b/apis_ontology/jelinek_api_filters.py index 4a199c1..7bf5a8d 100644 --- a/apis_ontology/jelinek_api_filters.py +++ b/apis_ontology/jelinek_api_filters.py @@ -94,11 +94,11 @@ def build_filter_method(queryset, name, value): entities = [] # get internal id of entity with the given entity_id if is_chapter: - entities = [c.id for c in Chapter.objects.filter(chapter_number__in=value)] + entities = Chapter.objects.filter(chapter_number__in=value).values_list("id", flat=True) elif is_country: - entities = [c.id for c in F9_Place.objects.filter(country__in=value)] + entities = F9_Place.objects.filter(country__in=value).values_list("id", flat=True) else: - entities = [e.id for e in E1_Crm_Entity.objects.filter(entity_id__in=value)] + entities = E1_Crm_Entity.objects.filter(entity_id__in=value).values_list("id", flat=True) disjunction = Q() for (idx, entry) in enumerate(criteria_to_join): @@ -214,12 +214,27 @@ def build_filter_method(queryset, name, value): return queryset.filter(disjunction).distinct("id") return build_filter_method +def search_in_work_and_its_manifestations(role, entity_class, lookup_name="entity_id__in"): + def build_filter_method(queryset, name, value): + entities = entity_class.objects.filter(Q(**{lookup_name: value})).values_list("id", flat=True) + f1_results = F1_Work.objects.filter(triple_set_from_subj__obj__id__in=entities, triple_set_from_subj__prop__name=role).distinct().values_list("id") + f3_results = F3_Manifestation_Product_Type.objects.filter(Q(triple_set_from_obj__subj_id__in=f1_results) & (Q(f3_manifestation_product_type__isnull=False) | Q(f31_performance__isnull=False))).distinct().values_list("id") + # mix= (f1_results | f3_results).distinct() + return queryset.filter(id__in=f1_results + f3_results) + return build_filter_method + +def filter_on_related_work(queryset, name, value): + matches = [q.id for q in queryset if next((item for item in q.related_work if item["genre"] in value), None)] + res = queryset.filter(Q(id__in=matches) | Q(f1_work__genre__in=value)) + return res + + + + class SearchFilter2(django_filters.FilterSet): class TextInFilter(django_filters.BaseInFilter, django_filters.CharFilter): pass - - searchTerm = django_filters.CharFilter(method=search_in_vectors(cols_to_check=["f10", "dump", "note", "e1", "e40"])) person = django_filters.CharFilter(method=search_in_vectors(cols_to_check=["e1", "f10", "dump", "note"])) person_id = TextInFilter(method=search_in_vectors(cols_to_check=["f10", "dump", "note"])) @@ -230,7 +245,6 @@ class TextInFilter(django_filters.BaseInFilter, django_filters.CharFilter): work_id = TextInFilter(method=filter_by_entity_id(["triple_set_from_obj__subj"], or_self=True)) bibl_id = TextInFilter(field_name="f3_manifestation_product_type__entity_id", lookup_expr="in") honour_id = TextInFilter(field_name="honour__entity_id", lookup_expr="in") - genre = TextInFilter(field_name="f1_work__genre", lookup_expr="in") textLang = TextInFilter(field_name="f3_manifestation_product_type__text_language", lookup_expr="in") startDate = django_filters.DateFilter(method='start_date_filter') endDate = django_filters.DateFilter(method='end_date_filter') @@ -288,10 +302,7 @@ def exclude_null_values(queryset, name, value): filter_name = "{}__isnull".format(name) return queryset.exclude(Q(**{filter_name: True})) -def filter_on_related_work(queryset, name, value): - matches = [q.id for q in queryset if next((item for item in q.related_work if item["genre"] in value), None)] - res = queryset.filter(Q(id__in=matches) | Q(f1_work__genre__in=value)) - return res + class FacetFilter(django_filters.FilterSet): class TextInFilter(django_filters.BaseInFilter, django_filters.CharFilter): diff --git a/apis_ontology/jelinek_api_views.py b/apis_ontology/jelinek_api_views.py index 46000a8..2ab9c50 100644 --- a/apis_ontology/jelinek_api_views.py +++ b/apis_ontology/jelinek_api_views.py @@ -205,8 +205,6 @@ def get_queryset(self): "filter_endDate", "filter_persons", "filter_institutions", "filter_personRoles", "filter_institutionRoles"] work_only = set(i[0] for i in self.request.GET.items() if i[1] is not None and i[1] != "").issubset(work_only_fields) - - print(work_only) person_contenttype = ContentType.objects.get_for_model(model=F10_Person) institution_contenttype = ContentType.objects.get_for_model(model=E40_Legal_Body) person_subquery = F10_Person.objects.filter(triple_set_from_subj__obj_id=OuterRef("pk")).values(json=JSONObject(name="name", entity_id="entity_id")) diff --git a/apis_ontology/migrations/0036_e1_crm_entity_vector_search_speedup_set.py b/apis_ontology/migrations/0036_e1_crm_entity_vector_search_speedup_set.py new file mode 100644 index 0000000..7f56cc6 --- /dev/null +++ b/apis_ontology/migrations/0036_e1_crm_entity_vector_search_speedup_set.py @@ -0,0 +1,19 @@ +# Generated by Django 4.2.7 on 2023-11-28 09:57 + +import django.contrib.postgres.search +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('apis_ontology', '0035_e40_legal_body_institution_type'), + ] + + operations = [ + migrations.AddField( + model_name='e1_crm_entity', + name='vector_search_speedup_set', + field=django.contrib.postgres.search.SearchVectorField(null=True), + ), + ] diff --git a/apis_ontology/models.py b/apis_ontology/models.py index 72c8625..6cbc06a 100644 --- a/apis_ontology/models.py +++ b/apis_ontology/models.py @@ -30,12 +30,13 @@ class E1_Crm_Entity(TempEntityClass): vector_related_E40_set = SearchVectorField(null=True) vector_related_xml_content_dump_set = SearchVectorField(null=True) vector_related_xml_note_set = SearchVectorField(null=True) + vector_search_speedup_set = SearchVectorField(null=True) def get_entity_list_filter(): class AdHocEntityListFilter(django_filters.FilterSet): class Meta: model = E1_Crm_Entity - exclude = ["vector_column_e1_set", "vector_related_f10_set", "vector_related_E40_set", "vector_related_xml_content_dump_set", "vector_related_xml_note_set"] + exclude = ["vector_column_e1_set", "vector_related_f10_set", "vector_related_E40_set", "vector_related_xml_content_dump_set", "vector_related_xml_note_set", "vector_search_speedup_set"] return AdHocEntityListFilter def save(self, *args, **kwargs): diff --git a/apis_ontology/ontology_specific_scripts/populate_indexes.py b/apis_ontology/ontology_specific_scripts/populate_indexes.py index b03bd6b..06f08ab 100644 --- a/apis_ontology/ontology_specific_scripts/populate_indexes.py +++ b/apis_ontology/ontology_specific_scripts/populate_indexes.py @@ -1,8 +1,8 @@ -from apis_ontology.models import E1_Crm_Entity, E40_Legal_Body, F10_Person, XMLNote, Xml_Content_Dump +from apis_ontology.models import Chapter, E1_Crm_Entity, E40_Legal_Body, F10_Person, F1_Work, F31_Performance, F3_Manifestation_Product_Type, Honour, Keyword, XMLNote, Xml_Content_Dump from django.contrib.postgres.search import SearchVector from django.contrib.contenttypes.models import ContentType -from django.db.models import Value +from django.db.models import Value, Q def populate_indexes(): @@ -12,6 +12,10 @@ def populate_indexes(): contenttype_e40 = ContentType.objects.get_for_model(model=E40_Legal_Body) contenttype_content_dump = ContentType.objects.get_for_model(model=Xml_Content_Dump) contenttype_note = ContentType.objects.get_for_model(model=XMLNote) + contenttype_f1 = ContentType.objects.get_for_model(model=F1_Work) + contenttype_f3 = ContentType.objects.get_for_model(model=F3_Manifestation_Product_Type) + contenttype_honour = ContentType.objects.get_for_model(model=Honour) + contenttype_f31 = ContentType.objects.get_for_model(model=F31_Performance) for ent in E1_Crm_Entity.objects_inheritance.select_subclasses("f1_work", "f3_manifestation_product_type", "honour", "f31_performance").all(): count += 1 print("Processing entity {} of {}".format(count, total)) @@ -62,11 +66,69 @@ def populate_indexes(): if len(txt_xml_note) > 0: check = True ent.vector_related_xml_note_set = SearchVector(Value(txt_xml_note), config='german') + + txt_search_speedup = "" + related_work = [ent] + if ent.self_contenttype in [contenttype_f31, contenttype_f3]: + related_work = F1_Work.objects.filter(Q(triple_set_from_subj__obj=ent) | Q(triple_set_from_subj__obj__triple_set_from_subj__obj=ent, triple_set_from_subj__obj__triple_set_from_subj__prop__name="has host")).distinct() + # Chapters + is_in_chapters = Chapter.objects.filter(triple_set_from_obj__subj__in=related_work, triple_set_from_obj__prop__name="is in chapter") + is_about_chapters = Chapter.objects.filter(triple_set_from_obj__subj__in=related_work, triple_set_from_obj__prop__name="is about") + for chapter in is_in_chapters: + txt_search_speedup += "isinchapter{} ".format(chapter.chapter_number) + for chapter in is_about_chapters: + txt_search_speedup += "isaboutchapter{} ".format(chapter.chapter_number) + # Work + is_about_work = E1_Crm_Entity.objects.filter(triple_set_from_obj__subj__in=related_work, triple_set_from_obj__prop__name="is about") + for work in is_about_work: + txt_search_speedup += "isaboutentity{} ".format(work.entity_id) + # Keyword + has_keyword = Keyword.objects.filter(triple_set_from_obj__subj__in=related_work, triple_set_from_obj__prop__name="has keyword") + for kw in has_keyword: + txt_search_speedup += "haskeyword{} ".format(kw.entity_id) + + if len(txt_search_speedup) > 0: + check = True + ent.vector_search_speedup_set = SearchVector(Value(txt_search_speedup)) + if check: ent.save() +# def populate_f3_indexes(): +# count=0 +# total=F3_Manifestation_Product_Type.objects.count() +# for ent in F3_Manifestation_Product_Type.objects.all(): +# check = False +# count += 1 +# print("Processing F3 {}/{}".format(count, total)) +# txt_search_speedup = "" +# related_work = F1_Work.objects.filter(Q(triple_set_from_subj__obj=ent) | Q(triple_set_from_subj__obj__triple_set_from_subj__obj=ent, triple_set_from_subj__obj__triple_set_from_subj__prop__name="has host")).distinct() +# # Chapters +# is_in_chapters = Chapter.objects.filter(triple_set_from_obj__subj__in=related_work, triple_set_from_obj__prop__name="is in chapter") +# is_about_chapters = Chapter.objects.filter(triple_set_from_obj__subj__in=related_work, triple_set_from_obj__prop__name="is about") +# for chapter in is_in_chapters: +# txt_search_speedup += "isinchapter{} ".format(chapter.chapter_number) +# for chapter in is_about_chapters: +# txt_search_speedup += "isaboutchapter{} ".format(chapter.chapter_number) +# # Work +# is_about_work = E1_Crm_Entity.objects.filter(triple_set_from_obj__subj__in=related_work, triple_set_from_obj__prop__name="is about") +# for work in is_about_work: +# txt_search_speedup += "isaboutentity{} ".format(work.entity_id) +# # Keyword +# has_keyword = Keyword.objects.filter(triple_set_from_obj__subj__in=related_work, triple_set_from_obj__prop__name="has keyword") +# for kw in has_keyword: +# txt_search_speedup += "haskeyword{} ".format(kw.entity_id) + +# if len(txt_search_speedup) > 0: +# check = True +# ent.vector_search_speedup_set = SearchVector(Value(txt_search_speedup)) +# if check: +# ent.save() + + def run(*args, **options): def main_run(): populate_indexes() + # populate_f3_indexes() main_run() \ No newline at end of file