Skip to content

Commit

Permalink
Add index for relations to chapter, work & keyword
Browse files Browse the repository at this point in the history
  • Loading branch information
katharinawuensche committed Nov 28, 2023
1 parent e00e988 commit e1f772c
Show file tree
Hide file tree
Showing 5 changed files with 106 additions and 15 deletions.
31 changes: 21 additions & 10 deletions apis_ontology/jelinek_api_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,11 +94,11 @@ def build_filter_method(queryset, name, value):
entities = []
# get internal id of entity with the given entity_id
if is_chapter:
entities = [c.id for c in Chapter.objects.filter(chapter_number__in=value)]
entities = Chapter.objects.filter(chapter_number__in=value).values_list("id", flat=True)
elif is_country:
entities = [c.id for c in F9_Place.objects.filter(country__in=value)]
entities = F9_Place.objects.filter(country__in=value).values_list("id", flat=True)
else:
entities = [e.id for e in E1_Crm_Entity.objects.filter(entity_id__in=value)]
entities = E1_Crm_Entity.objects.filter(entity_id__in=value).values_list("id", flat=True)

disjunction = Q()
for (idx, entry) in enumerate(criteria_to_join):
Expand Down Expand Up @@ -214,12 +214,27 @@ def build_filter_method(queryset, name, value):
return queryset.filter(disjunction).distinct("id")
return build_filter_method

def search_in_work_and_its_manifestations(role, entity_class, lookup_name="entity_id__in"):
def build_filter_method(queryset, name, value):
entities = entity_class.objects.filter(Q(**{lookup_name: value})).values_list("id", flat=True)
f1_results = F1_Work.objects.filter(triple_set_from_subj__obj__id__in=entities, triple_set_from_subj__prop__name=role).distinct().values_list("id")
f3_results = F3_Manifestation_Product_Type.objects.filter(Q(triple_set_from_obj__subj_id__in=f1_results) & (Q(f3_manifestation_product_type__isnull=False) | Q(f31_performance__isnull=False))).distinct().values_list("id")
# mix= (f1_results | f3_results).distinct()
return queryset.filter(id__in=f1_results + f3_results)
return build_filter_method

def filter_on_related_work(queryset, name, value):
matches = [q.id for q in queryset if next((item for item in q.related_work if item["genre"] in value), None)]
res = queryset.filter(Q(id__in=matches) | Q(f1_work__genre__in=value))
return res




class SearchFilter2(django_filters.FilterSet):
class TextInFilter(django_filters.BaseInFilter, django_filters.CharFilter):
pass



searchTerm = django_filters.CharFilter(method=search_in_vectors(cols_to_check=["f10", "dump", "note", "e1", "e40"]))
person = django_filters.CharFilter(method=search_in_vectors(cols_to_check=["e1", "f10", "dump", "note"]))
person_id = TextInFilter(method=search_in_vectors(cols_to_check=["f10", "dump", "note"]))
Expand All @@ -230,7 +245,6 @@ class TextInFilter(django_filters.BaseInFilter, django_filters.CharFilter):
work_id = TextInFilter(method=filter_by_entity_id(["triple_set_from_obj__subj"], or_self=True))
bibl_id = TextInFilter(field_name="f3_manifestation_product_type__entity_id", lookup_expr="in")
honour_id = TextInFilter(field_name="honour__entity_id", lookup_expr="in")
genre = TextInFilter(field_name="f1_work__genre", lookup_expr="in")
textLang = TextInFilter(field_name="f3_manifestation_product_type__text_language", lookup_expr="in")
startDate = django_filters.DateFilter(method='start_date_filter')
endDate = django_filters.DateFilter(method='end_date_filter')
Expand Down Expand Up @@ -288,10 +302,7 @@ def exclude_null_values(queryset, name, value):
filter_name = "{}__isnull".format(name)
return queryset.exclude(Q(**{filter_name: True}))

def filter_on_related_work(queryset, name, value):
matches = [q.id for q in queryset if next((item for item in q.related_work if item["genre"] in value), None)]
res = queryset.filter(Q(id__in=matches) | Q(f1_work__genre__in=value))
return res


class FacetFilter(django_filters.FilterSet):
class TextInFilter(django_filters.BaseInFilter, django_filters.CharFilter):
Expand Down
2 changes: 0 additions & 2 deletions apis_ontology/jelinek_api_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,8 +205,6 @@ def get_queryset(self):
"filter_endDate", "filter_persons", "filter_institutions", "filter_personRoles", "filter_institutionRoles"]
work_only = set(i[0] for i in self.request.GET.items() if i[1] is not None and i[1] != "").issubset(work_only_fields)


print(work_only)
person_contenttype = ContentType.objects.get_for_model(model=F10_Person)
institution_contenttype = ContentType.objects.get_for_model(model=E40_Legal_Body)
person_subquery = F10_Person.objects.filter(triple_set_from_subj__obj_id=OuterRef("pk")).values(json=JSONObject(name="name", entity_id="entity_id"))
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Generated by Django 4.2.7 on 2023-11-28 09:57

import django.contrib.postgres.search
from django.db import migrations


class Migration(migrations.Migration):

dependencies = [
('apis_ontology', '0035_e40_legal_body_institution_type'),
]

operations = [
migrations.AddField(
model_name='e1_crm_entity',
name='vector_search_speedup_set',
field=django.contrib.postgres.search.SearchVectorField(null=True),
),
]
3 changes: 2 additions & 1 deletion apis_ontology/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,13 @@ class E1_Crm_Entity(TempEntityClass):
vector_related_E40_set = SearchVectorField(null=True)
vector_related_xml_content_dump_set = SearchVectorField(null=True)
vector_related_xml_note_set = SearchVectorField(null=True)
vector_search_speedup_set = SearchVectorField(null=True)

def get_entity_list_filter():
class AdHocEntityListFilter(django_filters.FilterSet):
class Meta:
model = E1_Crm_Entity
exclude = ["vector_column_e1_set", "vector_related_f10_set", "vector_related_E40_set", "vector_related_xml_content_dump_set", "vector_related_xml_note_set"]
exclude = ["vector_column_e1_set", "vector_related_f10_set", "vector_related_E40_set", "vector_related_xml_content_dump_set", "vector_related_xml_note_set", "vector_search_speedup_set"]
return AdHocEntityListFilter

def save(self, *args, **kwargs):
Expand Down
66 changes: 64 additions & 2 deletions apis_ontology/ontology_specific_scripts/populate_indexes.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@

from apis_ontology.models import E1_Crm_Entity, E40_Legal_Body, F10_Person, XMLNote, Xml_Content_Dump
from apis_ontology.models import Chapter, E1_Crm_Entity, E40_Legal_Body, F10_Person, F1_Work, F31_Performance, F3_Manifestation_Product_Type, Honour, Keyword, XMLNote, Xml_Content_Dump
from django.contrib.postgres.search import SearchVector
from django.contrib.contenttypes.models import ContentType
from django.db.models import Value
from django.db.models import Value, Q


def populate_indexes():
Expand All @@ -12,6 +12,10 @@ def populate_indexes():
contenttype_e40 = ContentType.objects.get_for_model(model=E40_Legal_Body)
contenttype_content_dump = ContentType.objects.get_for_model(model=Xml_Content_Dump)
contenttype_note = ContentType.objects.get_for_model(model=XMLNote)
contenttype_f1 = ContentType.objects.get_for_model(model=F1_Work)
contenttype_f3 = ContentType.objects.get_for_model(model=F3_Manifestation_Product_Type)
contenttype_honour = ContentType.objects.get_for_model(model=Honour)
contenttype_f31 = ContentType.objects.get_for_model(model=F31_Performance)
for ent in E1_Crm_Entity.objects_inheritance.select_subclasses("f1_work", "f3_manifestation_product_type", "honour", "f31_performance").all():
count += 1
print("Processing entity {} of {}".format(count, total))
Expand Down Expand Up @@ -62,11 +66,69 @@ def populate_indexes():
if len(txt_xml_note) > 0:
check = True
ent.vector_related_xml_note_set = SearchVector(Value(txt_xml_note), config='german')

txt_search_speedup = ""
related_work = [ent]
if ent.self_contenttype in [contenttype_f31, contenttype_f3]:
related_work = F1_Work.objects.filter(Q(triple_set_from_subj__obj=ent) | Q(triple_set_from_subj__obj__triple_set_from_subj__obj=ent, triple_set_from_subj__obj__triple_set_from_subj__prop__name="has host")).distinct()
# Chapters
is_in_chapters = Chapter.objects.filter(triple_set_from_obj__subj__in=related_work, triple_set_from_obj__prop__name="is in chapter")
is_about_chapters = Chapter.objects.filter(triple_set_from_obj__subj__in=related_work, triple_set_from_obj__prop__name="is about")
for chapter in is_in_chapters:
txt_search_speedup += "isinchapter{} ".format(chapter.chapter_number)
for chapter in is_about_chapters:
txt_search_speedup += "isaboutchapter{} ".format(chapter.chapter_number)
# Work
is_about_work = E1_Crm_Entity.objects.filter(triple_set_from_obj__subj__in=related_work, triple_set_from_obj__prop__name="is about")
for work in is_about_work:
txt_search_speedup += "isaboutentity{} ".format(work.entity_id)
# Keyword
has_keyword = Keyword.objects.filter(triple_set_from_obj__subj__in=related_work, triple_set_from_obj__prop__name="has keyword")
for kw in has_keyword:
txt_search_speedup += "haskeyword{} ".format(kw.entity_id)

if len(txt_search_speedup) > 0:
check = True
ent.vector_search_speedup_set = SearchVector(Value(txt_search_speedup))

if check:
ent.save()

# def populate_f3_indexes():
# count=0
# total=F3_Manifestation_Product_Type.objects.count()
# for ent in F3_Manifestation_Product_Type.objects.all():
# check = False
# count += 1
# print("Processing F3 {}/{}".format(count, total))
# txt_search_speedup = ""
# related_work = F1_Work.objects.filter(Q(triple_set_from_subj__obj=ent) | Q(triple_set_from_subj__obj__triple_set_from_subj__obj=ent, triple_set_from_subj__obj__triple_set_from_subj__prop__name="has host")).distinct()
# # Chapters
# is_in_chapters = Chapter.objects.filter(triple_set_from_obj__subj__in=related_work, triple_set_from_obj__prop__name="is in chapter")
# is_about_chapters = Chapter.objects.filter(triple_set_from_obj__subj__in=related_work, triple_set_from_obj__prop__name="is about")
# for chapter in is_in_chapters:
# txt_search_speedup += "isinchapter{} ".format(chapter.chapter_number)
# for chapter in is_about_chapters:
# txt_search_speedup += "isaboutchapter{} ".format(chapter.chapter_number)
# # Work
# is_about_work = E1_Crm_Entity.objects.filter(triple_set_from_obj__subj__in=related_work, triple_set_from_obj__prop__name="is about")
# for work in is_about_work:
# txt_search_speedup += "isaboutentity{} ".format(work.entity_id)
# # Keyword
# has_keyword = Keyword.objects.filter(triple_set_from_obj__subj__in=related_work, triple_set_from_obj__prop__name="has keyword")
# for kw in has_keyword:
# txt_search_speedup += "haskeyword{} ".format(kw.entity_id)

# if len(txt_search_speedup) > 0:
# check = True
# ent.vector_search_speedup_set = SearchVector(Value(txt_search_speedup))
# if check:
# ent.save()



def run(*args, **options):
def main_run():
populate_indexes()
# populate_f3_indexes()
main_run()

0 comments on commit e1f772c

Please sign in to comment.