Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tagging: More powerful, flexible implementation of get_filtered_tags() [FC-0036] #92

Merged
merged 16 commits into from
Oct 26, 2023
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions mysql_test_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,14 @@
The tox targets for py38-django32 and py38-django42 will use this settings file.
For the most part, you can use test_settings.py instead (that's the default if
you just run "pytest" with no arguments).

If you need a compatible MySQL server running locally, spin one up with:
docker run --rm \
-e MYSQL_DATABASE=test_oel_db \
-e MYSQL_USER=test_oel_user \
-e MYSQL_PASSWORD=test_oel_pass \
-e MYSQL_RANDOM_ROOT_PASSWORD=true \
-p 3306:3306 mysql:8
"""

from test_settings import *
Expand Down
119 changes: 29 additions & 90 deletions openedx_tagging/core/tagging/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@
from __future__ import annotations

from django.db import transaction
from django.db.models import F, QuerySet
from django.db.models import QuerySet
from django.utils.translation import gettext as _

from .data import TagDataQuerySet
from .models import ObjectTag, Tag, Taxonomy

# Export this as part of the API
Expand Down Expand Up @@ -70,54 +71,57 @@ def get_taxonomies(enabled=True) -> QuerySet[Taxonomy]:
return queryset.filter(enabled=enabled)


def get_tags(taxonomy: Taxonomy) -> list[Tag]:
def get_tags(taxonomy: Taxonomy) -> TagDataQuerySet:
"""
Returns a list of predefined tags for the given taxonomy.
Returns a QuerySet of all the tags in the given taxonomy.

Note that if the taxonomy allows free-text tags, then the returned list will be empty.
Note that if the taxonomy is dynamic or free-text, only tags that have
already been applied to some object will be returned.
"""
return taxonomy.cast().get_tags()
return taxonomy.cast().get_filtered_tags()


def get_root_tags(taxonomy: Taxonomy) -> list[Tag]:
def get_root_tags(taxonomy: Taxonomy) -> TagDataQuerySet:
"""
Returns a list of the root tags for the given taxonomy.

Note that if the taxonomy allows free-text tags, then the returned list will be empty.
"""
return list(taxonomy.cast().get_filtered_tags())
return taxonomy.cast().get_filtered_tags(depth=1)


def search_tags(taxonomy: Taxonomy, search_term: str) -> list[Tag]:
def search_tags(taxonomy: Taxonomy, search_term: str, exclude_object_id: str | None = None) -> TagDataQuerySet:
"""
Returns a list of all tags that contains `search_term` of the given taxonomy.
Returns a list of all tags that contains `search_term` of the given
taxonomy, as well as their ancestors (so they can be displayed in a tree).

Note that if the taxonomy allows free-text tags, then the returned list will be empty.
If exclude_object_id is set, any tags applied to that object will be
excluded from the results, e.g. to power an autocomplete search when adding
additional tags to an object.
"""
return list(
taxonomy.cast().get_filtered_tags(
search_term=search_term,
search_in_all=True,
excluded_values = None
if exclude_object_id:
# Fetch tags that the object already has to exclude them from the result.
# Note: this adds a fair bit of complexity. In the future, maybe we can just do this filtering on the frontend?
excluded_values = list(
taxonomy.objecttag_set.filter(object_id=exclude_object_id).values_list(
"_value", flat=True
)
)
)
qs = taxonomy.cast().get_filtered_tags(search_term=search_term, excluded_values=excluded_values)
return qs


def get_children_tags(
taxonomy: Taxonomy,
parent_tag_id: int,
search_term: str | None = None,
) -> list[Tag]:
parent_tag_value: str,
) -> TagDataQuerySet:
"""
Returns a list of children tags for the given parent tag.
Returns a QuerySet of children tags for the given parent tag.

Note that if the taxonomy allows free-text tags, then the returned list will be empty.
"""
return list(
taxonomy.cast().get_filtered_tags(
parent_tag_id=parent_tag_id,
search_term=search_term,
)
)
return taxonomy.cast().get_filtered_tags(parent_tag_value=parent_tag_value, depth=1)


def resync_object_tags(object_tags: QuerySet | None = None) -> int:
Expand Down Expand Up @@ -250,68 +254,3 @@ def _check_new_tag_count(new_tag_count: int) -> None:
for object_tag in updated_tags:
object_tag.full_clean() # Run validation
object_tag.save()


# TODO: return tags from closed taxonomies as well as the count of how many times each is used.
def autocomplete_tags(
taxonomy: Taxonomy,
search: str,
object_id: str | None = None,
object_tags_only=True,
) -> QuerySet:
"""
Provides auto-complete suggestions by matching the `search` string against existing
ObjectTags linked to the given taxonomy. A case-insensitive search is used in order
to return the highest number of relevant tags.

If `object_id` is provided, then object tag values already linked to this object
are omitted from the returned suggestions. (ObjectTag values must be unique for a
given object + taxonomy, and so omitting these suggestions helps users avoid
duplication errors.).

Returns a QuerySet of dictionaries containing distinct `value` (string) and
`tag` (numeric ID) values, sorted alphabetically by `value`.
The `value` is what should be shown as a suggestion to users,
and if it's a free-text taxonomy, `tag` will be `None`: we include the `tag` ID
in anticipation of the second use case listed below.

Use cases:
* This method is useful for reducing tag variation in free-text taxonomies by showing
users tags that are similar to what they're typing. E.g., if the `search` string "dn"
shows that other objects have been tagged with "DNA", "DNA electrophoresis", and "DNA fingerprinting",
this encourages users to use those existing tags if relevant, instead of creating new ones that
look similar (e.g. "dna finger-printing").
* It could also be used to assist tagging for closed taxonomies with a list of possible tags which is too
large to return all at once, e.g. a user model taxonomy that dynamically creates tags on request for any
registered user in the database. (Note that this is not implemented yet, but may be as part of a future change.)
"""
if not object_tags_only:
raise NotImplementedError(
_(
"Using this would return a query set of tags instead of object tags."
"For now we recommend fetching all of the taxonomy's tags "
"using get_tags() and filtering them on the frontend."
)
)
# Fetch tags that the object already has to exclude them from the result
excluded_tags: list[str] = []
if object_id:
excluded_tags = list(
taxonomy.objecttag_set.filter(object_id=object_id).values_list(
"_value", flat=True
)
)
return (
# Fetch object tags from this taxonomy whose value contains the search
taxonomy.objecttag_set.filter(_value__icontains=search)
# omit any tags whose values match the tags on the given object
.exclude(_value__in=excluded_tags)
# alphabetical ordering
.order_by("_value")
# Alias the `_value` field to `value` to make it nicer for users
.annotate(value=F("_value"))
# obtain tag values
.values("value", "tag_id")
# remove repeats
.distinct()
)
39 changes: 39 additions & 0 deletions openedx_tagging/core/tagging/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
"""
Data models used by openedx-tagging
"""
from __future__ import annotations

from typing import TYPE_CHECKING, Any, TypedDict

from django.db.models import QuerySet
from typing_extensions import TypeAlias


class TagData(TypedDict):
"""
Data about a single tag. Many of the tagging API methods return Django
QuerySets that resolve to these dictionaries.

Even though the data will be in this same format, it will not necessarily
be an instance of this class but rather a plain dictionary. This is more a
type than a class.
"""
value: str
external_id: str | None
child_count: int
depth: int
parent_value: str | None
# Note: usage_count may not actually be present but there's no way to indicate that w/ python types at the moment
usage_count: int
pomegranited marked this conversation as resolved.
Show resolved Hide resolved
# Internal database ID, if any. Generally should not be used; prefer 'value' which is unique within each taxonomy.
_id: int | None


if TYPE_CHECKING:
from django_stubs_ext import ValuesQuerySet
TagDataQuerySet: TypeAlias = ValuesQuerySet[Any, TagData]
# The following works better for pyright (provides proper VS Code autocompletions),
# but I can't find any way to specify different types for pyright vs mypy :/
# TagDataQuerySet: TypeAlias = QuerySet[TagData]
else:
TagDataQuerySet = QuerySet[TagData]
15 changes: 8 additions & 7 deletions openedx_tagging/core/tagging/import_export/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@

from django.utils.translation import gettext as _

from ..api import get_tags
from ..models import Taxonomy
from .exceptions import EmptyCSVField, EmptyJSONField, FieldJSONError, InvalidFormat, TagParserError
from .import_plan import TagItem
Expand Down Expand Up @@ -166,17 +165,19 @@ def _load_tags_for_export(cls, taxonomy: Taxonomy) -> list[dict]:
with required and optional fields

The tags are ordered by hierarchy, first, parents and then children.
`get_tags` is in charge of returning this in a hierarchical way.
`get_filtered_tags` is in charge of returning this in a hierarchical
way.
"""
tags = get_tags(taxonomy)
tags = taxonomy.get_filtered_tags().all()
result = []
for tag in tags:
result_tag = {
"id": tag.external_id or tag.id,
"value": tag.value,
"id": tag["external_id"] or tag["_id"],
"value": tag["value"],
}
if tag.parent:
result_tag["parent_id"] = tag.parent.external_id or tag.parent.id
if tag["parent_value"]:
parent_tag = next(t for t in tags if t["value"] == tag["parent_value"])
result_tag["parent_id"] = parent_tag["external_id"] or parent_tag["_id"]
result.append(result_tag)
return result

Expand Down
Loading
Loading