Skip to content

Commit

Permalink
Optimize filters by introducing Resource.order (mozilla#3130)
Browse files Browse the repository at this point in the history
String filters can be slow, especially because ordering entities by Resource.path is slow. It turns out that:
- Adding DB index to Resource.path doesn't bring performance gains.
- Sorting entities by any other Resource.field is fast.

The reason for slowness is probably that Resource.path can be a pretty long text field.

So we're now introducing Resource.order, which to represent the alphabetic order of resources in the project. We reorder resources when new resources are added to the project.
  • Loading branch information
mathjazz authored Mar 11, 2024
1 parent f47fa67 commit 2015e4f
Show file tree
Hide file tree
Showing 6 changed files with 100 additions and 1 deletion.
18 changes: 18 additions & 0 deletions pontoon/base/migrations/0054_resource_order.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 3.2.24 on 2024-03-08 14:27

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("base", "0053_alter_translation_index_together"),
]

operations = [
migrations.AddField(
model_name="resource",
name="order",
field=models.PositiveIntegerField(default=0),
),
]
38 changes: 38 additions & 0 deletions pontoon/base/migrations/0055_populate_resource_order.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Generated by Django 3.2.24 on 2024-03-08 14:29

from django.db import migrations


def populate_resource_order(apps, schema_editor):
Project = apps.get_model("base", "Project")
Resource = apps.get_model("base", "Resource")
projects = Project.objects.all()

ordered_resources = []

for p in projects.prefetch_related("resources"):
resources = p.resources.all().order_by("path")
for idx, r in enumerate(resources):
r.order = idx
ordered_resources.append(r)

Resource.objects.bulk_update(ordered_resources, ["order"])


def reset_resource_order(apps, schema_editor):
Resource = apps.get_model("base", "Resource")
Resource.objects.update(order=0)


class Migration(migrations.Migration):

dependencies = [
("base", "0054_resource_order"),
]

operations = [
migrations.RunPython(
code=populate_resource_order,
reverse_code=reset_resource_order,
),
]
27 changes: 26 additions & 1 deletion pontoon/base/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1575,6 +1575,22 @@ def available_locales_list(self):
"""Get a list of available locale codes."""
return list(self.locales.all().values_list("code", flat=True))

def reset_resource_order(self):
"""
Sorting resources by path is a heavy operation, so we use the Resource.order field
to represent the alphabetic order of resources in the project.
This method resets the order field, and should be called when new resources are
added to or removed from the project.
"""
ordered_resources = []

for idx, r in enumerate(self.resources.order_by("path")):
r.order = idx
ordered_resources.append(r)

Resource.objects.bulk_update(ordered_resources, ["order"])


class ProjectSlugHistory(models.Model):
project = models.ForeignKey("Project", on_delete=models.CASCADE)
Expand Down Expand Up @@ -2147,6 +2163,15 @@ def asymmetric(self):
class Resource(models.Model):
project = models.ForeignKey(Project, models.CASCADE, related_name="resources")
path = models.TextField() # Path to localization file

"""
Index in the alphabetically sorted list of resources
Sorting resources by path is a heavy operation, so we use this field
to represent the alphabetic order of resources in the project.
"""
order = models.PositiveIntegerField(default=0)

total_strings = models.PositiveIntegerField(default=0)
obsolete = models.BooleanField(default=False)

Expand Down Expand Up @@ -3038,7 +3063,7 @@ def for_project_locale(
if exclude_entities:
entities = entities.exclude(pk__in=exclude_entities)

order_fields = ("resource__path", "order")
order_fields = ("resource__order", "order")
if project.slug == "all-projects":
order_fields = ("resource__project__name",) + order_fields

Expand Down
1 change: 1 addition & 0 deletions pontoon/base/tests/models/test_entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ def entity_test_models(translation_a, locale_b):
resourceX = ResourceFactory(
project=project_a,
path="resourceX.po",
order=1,
)
entity_a.string = "Entity zero"
entity_a.key = entity_a.string
Expand Down
1 change: 1 addition & 0 deletions pontoon/sync/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ def update_resources(db_project, vcs_project):
resource.save()

if created:
db_project.reset_resource_order()
added_paths.append(relative_path)

log.debug("Added files: {}".format(", ".join(added_paths) or "None"))
Expand Down
16 changes: 16 additions & 0 deletions pontoon/sync/tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,22 @@ def test_basic(self):
other_db_resource = Resource.objects.get(path=self.other_vcs_resource.path)
assert other_db_resource.total_strings == len(self.other_vcs_resource.entities)

def test_order(self):
# Check if Resource.order gets reset for all Project resources.
self.other_db_resource.delete()

assert self.main_db_resource.order == 0 # path="main.lang"
assert self.missing_db_resource.order == 0 # path="missing.lang"
assert self.other_db_resource.order == 0 # path="other.lang"

update_resources(self.db_project, self.vcs_project)
self.missing_db_resource.refresh_from_db()
other_db_resource = Resource.objects.get(path=self.other_vcs_resource.path)

assert self.main_db_resource.order == 0
assert self.missing_db_resource.order == 1
assert other_db_resource.order == 2


class UpdateTranslatedResourcesTests(FakeCheckoutTestCase):
@patch("pontoon.sync.core.update_translated_resources_without_config")
Expand Down

0 comments on commit 2015e4f

Please sign in to comment.