feat: WIP: refactor views to use new get_filtered_tags()

openedx · Oct 18, 2023 · b578edf · b578edf
1 parent 3eafff7
commit b578edf
Show file tree

Hide file tree

Showing 3 changed files with 168 additions and 141 deletions.
diff --git a/openedx_tagging/core/tagging/rest_api/v1/serializers.py b/openedx_tagging/core/tagging/rest_api/v1/serializers.py
@@ -5,6 +5,7 @@
 from rest_framework import serializers
 from rest_framework.reverse import reverse
 
+from openedx_tagging.core.tagging.data import TagData
 from openedx_tagging.core.tagging.models import ObjectTag, Tag, Taxonomy
 
 
@@ -93,6 +94,39 @@ class ObjectTagUpdateQueryParamsSerializer(serializers.Serializer):  # pylint: d
     )
 
 
+class TagDataSerializer(serializers.Serializer):
+    """
+    Serializer for TagData
+
+    Adds a link to get the sub tags
+    """
+    value = serializers.CharField()
+    external_id = serializers.CharField(allow_null=True)
+    child_count = serializers.IntegerField()
+    depth = serializers.IntegerField()
+    parent_value = serializers.CharField(allow_null=True)
+    usage_count = serializers.IntegerField(required=False)
+    # Internal database ID, if any. Generally should not be used; prefer 'value' which is unique within each taxonomy.
+    # Free text taxonomies never have '_id' for their tags.
+    _id = serializers.IntegerField(allow_null=True)
+
+    sub_tags_url = serializers.SerializerMethodField()
+
+    def get_sub_tags_url(self, obj: TagData):
+        """
+        Returns URL for the list of child tags of the current tag.
+        """
+        if obj["child_count"] > 0 and "taxonomy_id" in self.context:
+            query_params = f"?parent_tag={obj['value']}"
+            url = (
+                reverse("oel_tagging:taxonomy-tags", args=[str(self.context["taxonomy_id"])])
+                + query_params
+            )
+            request = self.context.get("request")
+            return request.build_absolute_uri(url)
+        return None
+
+
 class TagsSerializer(serializers.ModelSerializer):
     """
     Serializer for Tags

diff --git a/openedx_tagging/core/tagging/rest_api/v1/views.py b/openedx_tagging/core/tagging/rest_api/v1/views.py
@@ -27,6 +27,7 @@
 from ...import_export.api import export_tags
 from ...import_export.parsers import ParserFormat
 from ...models import Taxonomy
+from ...data import TagData
 from ...rules import ObjectTagPermissionItem
 from ..paginators import SEARCH_TAGS_THRESHOLD, TAGS_THRESHOLD, DisabledTagsPagination, TagsPagination
 from .permissions import ObjectTagObjectPermissions, TagListPermissions, TaxonomyObjectPermissions
@@ -35,6 +36,7 @@
     ObjectTagSerializer,
     ObjectTagUpdateBodySerializer,
     ObjectTagUpdateQueryParamsSerializer,
+    TagDataSerializer,
     TagsForSearchSerializer,
     TagsSerializer,
     TagsWithSubTagsSerializer,
@@ -408,9 +410,15 @@ class TaxonomyTagsView(ListAPIView):
     """
     View to list tags of a taxonomy.
 
+    If you specify ?root_only or ?parent_tag_value=..., only one "level" of the
+    hierachy will be returned. Otherwise, several levels will be returned, in
+    tree order, up to the maximum supported depth. Additional levels/depth can
+    be retrieved by using ?parent_tag_value to load more data.
+
     **List Query Parameters**
         * pk (required) - The pk of the taxonomy to retrieve tags.
-        * parent_tag_id (optional) - Id of the tag to retrieve children tags.
+        * parent_tag_value (optional) - Id of the tag to retrieve children tags.
+        * root_only (optional) - If specified, only root tags are returned.
         * page (optional) - Page number (default: 1)
         * page_size (optional) - Number of items per page (default: 10)
 
@@ -426,22 +434,8 @@ class TaxonomyTagsView(ListAPIView):
     """
 
     permission_classes = [TagListPermissions]
-    pagination_enabled = True
-
-    def __init__(self):
-        # Initialized here to avoid errors on type hints
-        self.serializer_class = TagsSerializer
-
-    def get_pagination_class(self):
-        """
-        Get the corresponding class depending if the pagination is enabled.
-
-        It is necessary to call this function before returning the data.
-        """
-        if self.pagination_enabled:
-            return TagsPagination
-        else:
-            return DisabledTagsPagination
+    pagination_class = TagsPagination
+    serializer_class = TagDataSerializer
 
     def get_taxonomy(self, pk: int) -> Taxonomy:
         """
@@ -453,130 +447,40 @@ def get_taxonomy(self, pk: int) -> Taxonomy:
         self.check_object_permissions(self.request, taxonomy)
         return taxonomy
 
-    def _build_search_tree(self, tags: list[Tag]) -> list[Tag]:
-        """
-        Builds a tree with the result tags for a search.
-
-        The retult is a pruned tree that contains
-        the path from root tags to tags that match the search.
-        """
-        tag_ids = [tag.id for tag in tags]
-
-        # Get missing parents.
-        # Not all parents are in the search result.
-        # This occurs when a child tag is on the search result, but its parent not,
-        # we need to add the parent to show the tree from the root to the child.
-        for tag in tags:
-            if tag.parent and tag.parent_id and tag.parent_id not in tag_ids:
-                tag_ids.append(tag.parent_id)
-                tags.append(tag.parent)  # Our loop will iterate over this new parent tag too.
-
-        groups: dict[int, list[Tag]] = {}
-        roots: list[Tag] = []
-
-        # Group tags by parent
-        for tag in tags:
-            if tag.parent_id is not None:
-                if tag.parent_id not in groups:
-                    groups[tag.parent_id] = []
-                groups[tag.parent_id].append(tag)
-            else:
-                roots.append(tag)
-
-        for tag in tags:
-            # Used to serialize searched childrens
-            tag.sub_tags = groups.get(tag.id, [])  # type: ignore[attr-defined]
+    def get_serializer_context(self):
+        context = super().get_serializer_context()
+        context.update({
+            "request": self.request,
+            "taxonomy_id": int(self.kwargs["pk"]),
+        })
+        return context
 
-        return roots
-
-    def get_matching_tags(
-        self,
-        taxonomy_id: int,
-        parent_tag_id: str | None = None,
-        search_term: str | None = None,
-    ) -> list[Tag]:
+    def get_queryset(self) -> models.QuerySet[TagData]:
         """
-        Returns a list of tags for the given taxonomy.
-
-        The pagination can be enabled or disabled depending of the taxonomy size.
-        You can read the desicion '0014_*' to more info about this logic.
-        Also, determines the serializer to be used.
-
-        Use `parent_tag_id` to get the children of the given tag.
-
-        Use `search_term` to filter tags values that contains the given term.
+        Builds and returns the queryset to be paginated.
         """
+        taxonomy_id = int(self.kwargs.get("pk"))
         taxonomy = self.get_taxonomy(taxonomy_id)
-        if parent_tag_id:
-            # Get children of a tag.
+        parent_tag_value = self.request.query_params.get("parent_tag", None)
+        root_only = "root_only" in self.request.query_params
+        search_term = self.request.query_params.get("search_term", None)
 
-            # If you need to get the children, then the roots are
-            # paginated, so we need to paginate the childrens too.
+        if parent_tag_value:
+            # Fetching tags below a certain parent is always paginated and only returns the direct children
             self.pagination_enabled = True
-
-            # Normal serializer, with children link.
-            self.serializer_class = TagsSerializer
-            return get_children_tags(
-                taxonomy,
-                int(parent_tag_id),
-                search_term=search_term,
-            )
+            depth = 1
+            if root_only:
+                raise ValidationError("?root_only and ?parent_tag_value cannot be used together")
         else:
-            if search_term:
-                # Search tags
-                result = search_tags(
-                    taxonomy,
-                    search_term,
-                )
-                # Checks the result size to determine whether
-                # to turn pagination on or off.
-                self.pagination_enabled = len(result) > SEARCH_TAGS_THRESHOLD
-
-                # Use the special serializer to only show the tree
-                # of the search result.
-                self.serializer_class = TagsForSearchSerializer
-
-                result = self._build_search_tree(result)
+            if root_only or taxonomy.tag_set.count() > TAGS_THRESHOLD:
+                depth = 1  # Load only the root tags for now
             else:
-                # Get root tags of taxonomy
-
-                # Checks the taxonomy size to determine whether
-                # to turn pagination on or off.
-                self.pagination_enabled = taxonomy.tag_set.count() > TAGS_THRESHOLD
-
-                if self.pagination_enabled:
-                    # If pagination is enabled, use the normal serializer
-                    # with children link.
-                    self.serializer_class = TagsSerializer
-                else:
-                    # If pagination is disabled, use the special serializer
-                    # to show children. In this case, we return all taxonomy tags
-                    # in a tree structure.
-                    self.serializer_class = TagsWithSubTagsSerializer
-
-                result = get_root_tags(taxonomy)
-
-            return result
+                # We can load and display all the tags in the taxonomy at once:
+                self.pagination_class = DisabledTagsPagination
+                depth = None  # Maximum depth
 
-    def get_queryset(self) -> list[Tag]:  # type: ignore[override]
-        """
-        Builds and returns the queryset to be paginated.
-
-        The return type is not a QuerySet because the tagging python api functions
-        return lists, and on this point convert the list to a query set
-        is an unnecesary operation.
-        """
-        pk = self.kwargs.get("pk")
-        parent_tag_id = self.request.query_params.get("parent_tag_id", None)
-        search_term = self.request.query_params.get("search_term", None)
-
-        result = self.get_matching_tags(
-            pk,
-            parent_tag_id=parent_tag_id,
+        return taxonomy.get_filtered_tags(
+            parent_tag_value=parent_tag_value,
             search_term=search_term,
+            depth=depth,
         )
-
-        # This function is not called automatically
-        self.pagination_class = self.get_pagination_class()
-
-        return result
diff --git a/tests/openedx_tagging/core/tagging/test_views.py b/tests/openedx_tagging/core/tagging/test_views.py
@@ -19,6 +19,8 @@
 from openedx_tagging.core.tagging.rest_api.paginators import TagsPagination
 from openedx_tagging.core.tagging.rules import can_change_object_tag_objectid, can_view_object_tag_objectid
 
+from .utils import pretty_format_tags
+
 User = get_user_model()
 
 TAXONOMY_LIST_URL = "/tagging/rest_api/v1/taxonomies/"
@@ -958,9 +960,12 @@ def test_not_authorized_user(self):
 
         assert response.status_code == status.HTTP_403_FORBIDDEN
 
-    def test_small_taxonomy(self):
+    def test_small_taxonomy_root(self):
+        """
+        Test explicitly requesting only the root tags of a small taxonomy.
+        """
         self.client.force_authenticate(user=self.staff)
-        response = self.client.get(self.small_taxonomy_url)
+        response = self.client.get(self.small_taxonomy_url + "?root_only")
         assert response.status_code == status.HTTP_200_OK
 
         data = response.data
@@ -970,13 +975,20 @@ def test_small_taxonomy(self):
         root_count = self.small_taxonomy.tag_set.filter(parent=None).count()
         assert len(results) == root_count
 
-        # Checking tag fields
-        root_tag = self.small_taxonomy.tag_set.get(id=results[0].get("id"))
-        root_children_count = root_tag.children.count()
+        # Checking tag fields on the first tag returned:
+        root_tag = self.small_taxonomy.tag_set.get(id=results[0].get("_id"))
         assert results[0].get("value") == root_tag.value
-        assert results[0].get("taxonomy_id") == self.small_taxonomy.id
-        assert results[0].get("children_count") == root_children_count
-        assert len(results[0].get("sub_tags")) == root_children_count
+        assert results[0].get("child_count") == root_tag.children.count()
+        assert results[0].get("depth") == 0  # root tags always have depth 0
+        assert results[0].get("parent_value") == None
+        assert results[0].get("usage_count") == 0
+
+        # Check that we can load sub-tags of that tag:
+        sub_tags_response = self.client.get(results[0]["sub_tags_url"])
+        assert sub_tags_response.status_code == status.HTTP_200_OK
+        sub_tags_result = sub_tags_response.data["results"]
+        assert len(sub_tags_result) == root_tag.children.count()
+        assert set(t["value"] for t in sub_tags_result) == set(t.value for t in root_tag.children.all())
 
         # Checking pagination values
         assert data.get("next") is None
@@ -985,6 +997,83 @@ def test_small_taxonomy(self):
         assert data.get("num_pages") == 1
         assert data.get("current_page") == 1
 
+    def test_small_taxonomy(self):
+        """
+        Test loading all the tags of a small taxonomy at once.
+        """
+        self.client.force_authenticate(user=self.staff)
+        response = self.client.get(self.small_taxonomy_url)
+        assert response.status_code == status.HTTP_200_OK
+
+        data = response.data
+        results = data.get("results", [])
+        assert pretty_format_tags(results) == [
+            "Archaea (None) (used: 0, children: 3)",
+            "  DPANN (Archaea) (used: 0, children: 0)",
+            "  Euryarchaeida (Archaea) (used: 0, children: 0)",
+            "  Proteoarchaeota (Archaea) (used: 0, children: 0)",
+            "Bacteria (None) (used: 0, children: 2)",
+            "  Archaebacteria (Bacteria) (used: 0, children: 0)",
+            "  Eubacteria (Bacteria) (used: 0, children: 0)",
+            "Eukaryota (None) (used: 0, children: 5)",
+            "  Animalia (Eukaryota) (used: 0, children: 7)",
+            "    Arthropoda (Animalia) (used: 0, children: 0)",
+            "    Chordata (Animalia) (used: 0, children: 1)",
+            "    Cnidaria (Animalia) (used: 0, children: 0)",
+            "    Ctenophora (Animalia) (used: 0, children: 0)",
+            "    Gastrotrich (Animalia) (used: 0, children: 0)",
+            "    Placozoa (Animalia) (used: 0, children: 0)",
+            "    Porifera (Animalia) (used: 0, children: 0)",
+            "  Fungi (Eukaryota) (used: 0, children: 0)",
+            "  Monera (Eukaryota) (used: 0, children: 0)",
+            "  Plantae (Eukaryota) (used: 0, children: 0)",
+            "  Protista (Eukaryota) (used: 0, children: 0)",
+        ]
+
+        # Checking pagination values
+        assert data.get("next") is None
+        assert data.get("previous") is None
+        assert data.get("count") == len(results)
+        assert data.get("num_pages") == 1
+        assert data.get("current_page") == 1
+
+    def test_small_taxonomy_paged(self):
+        """
+        Test loading only the first few of the tags of a small taxonomy.
+        """
+        self.client.force_authenticate(user=self.staff)
+        response = self.client.get(self.small_taxonomy_url + "?page_size=5")
+        assert response.status_code == status.HTTP_200_OK
+        data = response.data
+        assert pretty_format_tags(data["results"]) == [
+            "Archaea (None) (used: 0, children: 3)",
+            "  DPANN (Archaea) (used: 0, children: 0)",
+            "  Euryarchaeida (Archaea) (used: 0, children: 0)",
+            "  Proteoarchaeota (Archaea) (used: 0, children: 0)",
+            "Bacteria (None) (used: 0, children: 2)",
+        ]
+
+        # Checking pagination values
+        assert data.get("next") is not None
+        assert data.get("previous") is None
+        assert data.get("count") == 20
+        assert data.get("num_pages") == 4
+        assert data.get("current_page") == 1
+
+        # Get the next page:
+        next_response = self.client.get(data.get("next"))
+        assert next_response.status_code == status.HTTP_200_OK
+        next_data = next_response.data
+        assert pretty_format_tags(next_data["results"]) == [
+            "  Archaebacteria (Bacteria) (used: 0, children: 0)",
+            "  Eubacteria (Bacteria) (used: 0, children: 0)",
+            "Eukaryota (None) (used: 0, children: 5)",
+            "  Animalia (Eukaryota) (used: 0, children: 7)",
+            "    Arthropoda (Animalia) (used: 0, children: 0)",
+        ]
+        assert next_data.get("current_page") == 2
+
+
     def test_small_search(self):
         search_term = 'eU'
         url = f"{self.small_taxonomy_url}?search_term={search_term}"