Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/openedx_tagging/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,11 +200,12 @@ def get_object_tags(
.select_related("taxonomy", "tag", "tag__parent", "tag__parent__parent")
# Sort the tags within each taxonomy in "tree order". See Taxonomy._get_filtered_tags_deep for details on this:
.annotate(sort_key=Lower(Concat(
# TODO: rewrite this to use a "WITH RECURSIVE" common table expression ?
ConcatNull(F("tag__parent__parent__parent__value"), Value("\t")),
ConcatNull(F("tag__parent__parent__value"), Value("\t")),
ConcatNull(F("tag__parent__value"), Value("\t")),
Coalesce(F("tag__value"), F("_value")),
Value("\t"),
Value("\t"), # Required for MySQL to sort correctly
output_field=models.CharField(),
)))
.annotate(taxonomy_name=Coalesce(F("taxonomy__name"), F("_export_id")))
Expand Down
79 changes: 53 additions & 26 deletions src/openedx_tagging/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import logging
import re
from typing import List
from typing import List, Self

from django.core.exceptions import ValidationError
from django.db import models
Expand All @@ -14,21 +14,23 @@
from django.utils.functional import cached_property
from django.utils.module_loading import import_string
from django.utils.translation import gettext_lazy as _
from typing_extensions import Self # Until we upgrade to python 3.11

from openedx_django_lib.fields import MultiCollationTextField, case_insensitive_char_field, case_sensitive_char_field

from ..data import TagDataQuerySet
from .utils import RESERVED_TAG_CHARS, ConcatNull
from .utils import RESERVED_TAG_CHARS

log = logging.getLogger(__name__)


# Maximum depth allowed for a hierarchical taxonomy's tree of tags.
# Maximum depth of tags that can be retrieved from any single API call.
# Currently, taxonomies support unlimited depth but you can only retrieve up to three levels deep at one time.
# TODO: add an absolute limit as well.
# TODO: not all APIs work with deeper than 3 tags - e.g. get_object_tags will always return the correct
# results, but the order of tags below depth 4 won't be correct.
TAXONOMY_MAX_DEPTH = 3

# Ancestry of a given tag; the Tag.value fields of a given tag and its parents, starting from the root.
# Will contain 0...TAXONOMY_MAX_DEPTH elements.
Lineage = List[str]


Expand Down Expand Up @@ -147,20 +149,34 @@ def depth(self) -> int:
return depth

@staticmethod
def annotate_depth(qs: models.QuerySet) -> models.QuerySet:
def annotate_depth(
qs: models.QuerySet,
from_tag_id=None, # depth from None means absolute depth (default)
depth_offset=0, # If using a "from tag" (subtree depth), specify an offset here to add to each tag's depth
) -> models.QuerySet:
"""
Given a query that loads Tag objects, annotate it with the depth of
each tag.
"""
return qs.annotate(depth=models.Case(
models.When(parent_id=None, then=0),
models.When(parent__parent_id=None, then=1),
models.When(parent__parent__parent_id=None, then=2),
models.When(parent__parent__parent__parent_id=None, then=3),
# If the depth is 4 or more, currently we just "collapse" the depth
# to 4 in order not to add too many joins to this query in general.
default=4,
))

Normally this refers to the "absolute depth" (where root tags with no
parent have depth 0, but you can also get the depth relative to some
other tag, i.e. subtree depth, by passing in the ID of a tag known to be
an ancestor of all the tags in the query set and its depth.)
"""
# fmt: off
return qs.annotate(
depth=models.Case(
models.When(parent_id=from_tag_id, then=depth_offset), # e.g. when no parent, depth = 0
models.When(parent__parent_id=from_tag_id, then=depth_offset + 1), # e.g. when 1 parent, depth = 1
models.When(parent__parent__parent_id=from_tag_id, then=depth_offset + 2),
models.When(parent__parent__parent__parent_id=from_tag_id, then=depth_offset + 3),
# If the depth is 4 or more, currently we just "collapse" the depth
# to 4 in order not to add too many joins to this query in general.
# But you can call this again using a new 'from_tag' to get accurate calculations at greater depths.
default=depth_offset + 4,
)
)
# fmt: on

@cached_property
def child_count(self) -> int:
Expand Down Expand Up @@ -526,9 +542,14 @@ def _get_filtered_tags_deep(
"""
# All tags (possibly below a certain tag?) in the closed taxonomy, up to depth TAXONOMY_MAX_DEPTH
if parent_tag_value:
main_parent_id = self.tag_for_value(parent_tag_value).pk
# Get a subtree, up to three levels deep below this tag:
main_parent = self.tag_for_value(parent_tag_value, select_related=["parent", "parent__parent"])
main_parent_id = main_parent.pk
depth_offset = main_parent.depth + 1
else:
# Load the first three levels of the taxonomy.
main_parent_id = None
depth_offset = 0

assert TAXONOMY_MAX_DEPTH == 3 # If we change TAXONOMY_MAX_DEPTH we need to change this query code:
qs: models.QuerySet = self.tag_set.filter(
Expand Down Expand Up @@ -574,19 +595,21 @@ def _get_filtered_tags_deep(
qs = qs.annotate(descendant_count=F("child_count") + F("grandchild_count") + F("great_grandchild_count"))

# Add the "depth" to each tag:
qs = Tag.annotate_depth(qs)
qs = Tag.annotate_depth(qs, from_tag_id=main_parent_id, depth_offset=depth_offset)
# Add the "lineage" as a field called "sort_key" to sort them in order correctly:
# For a root tag, we want sort_key="RootValue" and for a depth=1 tag, we want sort_key="RootValue\tValue", and
# so on. To make this work with very deep trees, we generate the lineage of a subtree only (below main_parent).
# fmt: off
qs = qs.annotate(sort_key=Lower(Concat(
# For a root tag, we want sort_key="RootValue" and for a depth=1 tag
# we want sort_key="RootValue\tValue". The following does that, since
# ConcatNull(...) returns NULL if any argument is NULL.
ConcatNull(F("parent__parent__parent__value"), Value("\t")),
ConcatNull(F("parent__parent__value"), Value("\t")),
ConcatNull(F("parent__value"), Value("\t")),
# TODO: update this to use a "WITH RECURSIVE" common table expression?
models.Case(models.When(depth__gte=3 + depth_offset, then=Concat(F("parent__"*3+"value"), Value("\t")))),
models.Case(models.When(depth__gte=2 + depth_offset, then=Concat(F("parent__"*2+"value"), Value("\t")))),
models.Case(models.When(depth__gte=1 + depth_offset, then=Concat(F("parent__"*1+"value"), Value("\t")))),
F("value"),
Value("\t"), # We also need the '\t' separator character at the end, or MySQL will sort things wrong
output_field=models.CharField(),
)))
# fmt: on
# Add the parent value
qs = qs.annotate(parent_value=F("parent__value"))
qs = qs.annotate(_id=F("id")) # ID has an underscore to encourage use of 'value' rather than this internal ID
Expand Down Expand Up @@ -715,7 +738,7 @@ def validate_value(self, value: str) -> bool:
return value != "" and isinstance(value, str)
return self.tag_set.filter(value__iexact=value).exists()

def tag_for_value(self, value: str) -> Tag:
def tag_for_value(self, value: str, select_related: list[str] | None = None) -> Tag:
"""
Get the Tag object for the given value.
Some Taxonomies may auto-create the Tag at this point, e.g. a User
Expand All @@ -726,7 +749,11 @@ def tag_for_value(self, value: str) -> Tag:
self.check_casted()
if self.allow_free_text:
raise ValueError("tag_for_value() doesn't work for free text taxonomies. They don't use Tag instances.")
return self.tag_set.get(value__iexact=value)
if select_related:
qs = self.tag_set.select_related(*select_related)
else:
qs = self.tag_set
return qs.get(value__iexact=value)

def validate_external_id(self, external_id: str) -> bool:
"""
Expand Down
102 changes: 102 additions & 0 deletions tests/openedx_tagging/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -1070,3 +1070,105 @@ def test_unmark_copied_tags(self) -> None:
tags = tagging_api.get_object_tags(obj2)
assert tags.filter(is_copied=False).count() == 3
assert tags.filter(is_copied=True).count() == 0

def test_deep_taxonomy(self) -> None:
"""
Test that a taxonomy's tags can be nested deeply
"""
taxonomy = tagging_api.create_taxonomy(name="Deep Taxonomy")

tag_0 = tagging_api.add_tag_to_taxonomy(taxonomy, "Root - depth 0")
assert tag_0.depth == 0

tag_1 = tagging_api.add_tag_to_taxonomy(taxonomy, "Child - depth 1", parent_tag_value=tag_0.value)
assert tag_1.depth == 1

tag_2 = tagging_api.add_tag_to_taxonomy(taxonomy, "Grandchild - depth 2", parent_tag_value=tag_1.value)
assert tag_2.depth == 2

# Up to this point is the level at which all the APIs are guaranteed to work correctly.

tag_3 = tagging_api.add_tag_to_taxonomy(taxonomy, "Great-Grandchild - depth 3", parent_tag_value=tag_2.value)
assert tag_3.depth == 3

tag_4 = tagging_api.add_tag_to_taxonomy(taxonomy, "Great-Great-Grandchild - depth 4", parent_tag_value=tag_3.value)
assert tag_4.depth == 4

tag_5 = tagging_api.add_tag_to_taxonomy(taxonomy, "Great-Great-Great-Grandchild - depth 5", parent_tag_value=tag_4.value)
assert tag_5.depth == 5

# APIs like `get_tags()` will return only one level at a time:
assert pretty_format_tags(
tagging_api.get_tags(taxonomy)
) == [
'Root - depth 0 (None) (children: 1 + 2)',
' Child - depth 1 (Root - depth 0) (children: 1 + 2)',
' Grandchild - depth 2 (Child - depth 1) (children: 1 + 2)',
]
# But we can always load deeper levels one level at a time:
assert pretty_format_tags(
tagging_api.get_children_tags(taxonomy, parent_tag_value=tag_3.value)
) == [
' Great-Great-Grandchild - depth 4 (Great-Grandchild - depth 3) (children: 1)',
]
assert pretty_format_tags(
tagging_api.get_children_tags(taxonomy, parent_tag_value=tag_4.value)
) == [
' Great-Great-Great-Grandchild - depth 5 (Great-Great-Grandchild - depth 4) (children: 0)',
]
# Or even up to three levels at a time:
deep_result = taxonomy.get_filtered_tags(depth=None, parent_tag_value=tag_2.value)
assert pretty_format_tags(deep_result, parent=False) == [
' Great-Grandchild - depth 3 (children: 1 + 1)',
' Great-Great-Grandchild - depth 4 (children: 1)',
' Great-Great-Great-Grandchild - depth 5 (children: 0)',
]
# Let's check that all the fields on the last entry, especially 'depth':
assert deep_result[2]["value"] == "Great-Great-Great-Grandchild - depth 5"
assert deep_result[2]["parent_value"] == "Great-Great-Grandchild - depth 4"
assert deep_result[2]["depth"] == 5
assert deep_result[2]["child_count"] == 0

# Also check that Tag.get_lineage() works correctly for super deep tags:
assert tag_5.get_lineage() == [
'Root - depth 0',
'Child - depth 1',
'Grandchild - depth 2',
'Great-Grandchild - depth 3',
'Great-Great-Grandchild - depth 4',
'Great-Great-Great-Grandchild - depth 5',
]


def test_object_tags_deep(self) -> None:
"""
Test that a deep taxonomy's tags can be used to tag objects
"""
taxonomy = tagging_api.create_taxonomy(name="Deep Taxonomy")
tag_0 = tagging_api.add_tag_to_taxonomy(taxonomy, "Bob - depth 0")
tag_1 = tagging_api.add_tag_to_taxonomy(taxonomy, "Janet - depth 1", parent_tag_value=tag_0.value)
tag_2 = tagging_api.add_tag_to_taxonomy(taxonomy, "Alice - depth 2", parent_tag_value=tag_1.value)
tag_3 = tagging_api.add_tag_to_taxonomy(taxonomy, "Fred - depth 3", parent_tag_value=tag_2.value)
tag_4 = tagging_api.add_tag_to_taxonomy(taxonomy, "Patty - depth 4", parent_tag_value=tag_3.value)
tag_5 = tagging_api.add_tag_to_taxonomy(taxonomy, "Clara - depth 5", parent_tag_value=tag_4.value)
assert tag_5.depth == 5

object_id = "some object"
tagging_api.tag_object(object_id, taxonomy, tags=[tag_5.value])

assert [ot.tag for ot in tagging_api.get_object_tags(object_id)] == [tag_5]

# Now test the sort order of many tags:
# tagging_api.tag_object(object_id, taxonomy, tags=[
# tag_0.value, tag_1.value, tag_2.value, tag_3.value, tag_4.value, tag_5.value,
# ])
# assert [ot.tag for ot in tagging_api.get_object_tags(object_id)] == [
# tag_0,
# tag_1,
# tag_2,
# tag_3,
# tag_4,
# tag_5,
# ]
# Note: the above isn't quite working yet - tag_5 is out of order.
# See TODO note in get_object_tags()
Loading