From 2015e4f2f55350a01f44de1dee36586c5ff33560 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matja=C5=BE=20Horvat?= Date: Mon, 11 Mar 2024 09:18:39 +0100 Subject: [PATCH] Optimize filters by introducing Resource.order (#3130) String filters can be slow, especially because ordering entities by Resource.path is slow. It turns out that: - Adding DB index to Resource.path doesn't bring performance gains. - Sorting entities by any other Resource.field is fast. The reason for slowness is probably that Resource.path can be a pretty long text field. So we're now introducing Resource.order, which to represent the alphabetic order of resources in the project. We reorder resources when new resources are added to the project. --- .../base/migrations/0054_resource_order.py | 18 +++++++++ .../0055_populate_resource_order.py | 38 +++++++++++++++++++ pontoon/base/models.py | 27 ++++++++++++- pontoon/base/tests/models/test_entity.py | 1 + pontoon/sync/core.py | 1 + pontoon/sync/tests/test_core.py | 16 ++++++++ 6 files changed, 100 insertions(+), 1 deletion(-) create mode 100644 pontoon/base/migrations/0054_resource_order.py create mode 100644 pontoon/base/migrations/0055_populate_resource_order.py diff --git a/pontoon/base/migrations/0054_resource_order.py b/pontoon/base/migrations/0054_resource_order.py new file mode 100644 index 0000000000..622f2bf5af --- /dev/null +++ b/pontoon/base/migrations/0054_resource_order.py @@ -0,0 +1,18 @@ +# Generated by Django 3.2.24 on 2024-03-08 14:27 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("base", "0053_alter_translation_index_together"), + ] + + operations = [ + migrations.AddField( + model_name="resource", + name="order", + field=models.PositiveIntegerField(default=0), + ), + ] diff --git a/pontoon/base/migrations/0055_populate_resource_order.py b/pontoon/base/migrations/0055_populate_resource_order.py new file mode 100644 index 0000000000..cd3d62a483 --- /dev/null +++ b/pontoon/base/migrations/0055_populate_resource_order.py @@ -0,0 +1,38 @@ +# Generated by Django 3.2.24 on 2024-03-08 14:29 + +from django.db import migrations + + +def populate_resource_order(apps, schema_editor): + Project = apps.get_model("base", "Project") + Resource = apps.get_model("base", "Resource") + projects = Project.objects.all() + + ordered_resources = [] + + for p in projects.prefetch_related("resources"): + resources = p.resources.all().order_by("path") + for idx, r in enumerate(resources): + r.order = idx + ordered_resources.append(r) + + Resource.objects.bulk_update(ordered_resources, ["order"]) + + +def reset_resource_order(apps, schema_editor): + Resource = apps.get_model("base", "Resource") + Resource.objects.update(order=0) + + +class Migration(migrations.Migration): + + dependencies = [ + ("base", "0054_resource_order"), + ] + + operations = [ + migrations.RunPython( + code=populate_resource_order, + reverse_code=reset_resource_order, + ), + ] diff --git a/pontoon/base/models.py b/pontoon/base/models.py index 8d64be837d..9aa3d23f89 100644 --- a/pontoon/base/models.py +++ b/pontoon/base/models.py @@ -1575,6 +1575,22 @@ def available_locales_list(self): """Get a list of available locale codes.""" return list(self.locales.all().values_list("code", flat=True)) + def reset_resource_order(self): + """ + Sorting resources by path is a heavy operation, so we use the Resource.order field + to represent the alphabetic order of resources in the project. + + This method resets the order field, and should be called when new resources are + added to or removed from the project. + """ + ordered_resources = [] + + for idx, r in enumerate(self.resources.order_by("path")): + r.order = idx + ordered_resources.append(r) + + Resource.objects.bulk_update(ordered_resources, ["order"]) + class ProjectSlugHistory(models.Model): project = models.ForeignKey("Project", on_delete=models.CASCADE) @@ -2147,6 +2163,15 @@ def asymmetric(self): class Resource(models.Model): project = models.ForeignKey(Project, models.CASCADE, related_name="resources") path = models.TextField() # Path to localization file + + """ + Index in the alphabetically sorted list of resources + + Sorting resources by path is a heavy operation, so we use this field + to represent the alphabetic order of resources in the project. + """ + order = models.PositiveIntegerField(default=0) + total_strings = models.PositiveIntegerField(default=0) obsolete = models.BooleanField(default=False) @@ -3038,7 +3063,7 @@ def for_project_locale( if exclude_entities: entities = entities.exclude(pk__in=exclude_entities) - order_fields = ("resource__path", "order") + order_fields = ("resource__order", "order") if project.slug == "all-projects": order_fields = ("resource__project__name",) + order_fields diff --git a/pontoon/base/tests/models/test_entity.py b/pontoon/base/tests/models/test_entity.py index f895f79f96..ae0e3583c2 100644 --- a/pontoon/base/tests/models/test_entity.py +++ b/pontoon/base/tests/models/test_entity.py @@ -30,6 +30,7 @@ def entity_test_models(translation_a, locale_b): resourceX = ResourceFactory( project=project_a, path="resourceX.po", + order=1, ) entity_a.string = "Entity zero" entity_a.key = entity_a.string diff --git a/pontoon/sync/core.py b/pontoon/sync/core.py index 4954ffc41d..17b015305d 100644 --- a/pontoon/sync/core.py +++ b/pontoon/sync/core.py @@ -133,6 +133,7 @@ def update_resources(db_project, vcs_project): resource.save() if created: + db_project.reset_resource_order() added_paths.append(relative_path) log.debug("Added files: {}".format(", ".join(added_paths) or "None")) diff --git a/pontoon/sync/tests/test_core.py b/pontoon/sync/tests/test_core.py index e3cb00fbec..08052f9c7f 100644 --- a/pontoon/sync/tests/test_core.py +++ b/pontoon/sync/tests/test_core.py @@ -141,6 +141,22 @@ def test_basic(self): other_db_resource = Resource.objects.get(path=self.other_vcs_resource.path) assert other_db_resource.total_strings == len(self.other_vcs_resource.entities) + def test_order(self): + # Check if Resource.order gets reset for all Project resources. + self.other_db_resource.delete() + + assert self.main_db_resource.order == 0 # path="main.lang" + assert self.missing_db_resource.order == 0 # path="missing.lang" + assert self.other_db_resource.order == 0 # path="other.lang" + + update_resources(self.db_project, self.vcs_project) + self.missing_db_resource.refresh_from_db() + other_db_resource = Resource.objects.get(path=self.other_vcs_resource.path) + + assert self.main_db_resource.order == 0 + assert self.missing_db_resource.order == 1 + assert other_db_resource.order == 2 + class UpdateTranslatedResourcesTests(FakeCheckoutTestCase): @patch("pontoon.sync.core.update_translated_resources_without_config")