Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 38 additions & 15 deletions src/openedx_tagging/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,8 @@ def depth(self) -> int:
def annotate_depth(qs: models.QuerySet) -> models.QuerySet:
"""
Given a query that loads Tag objects, annotate it with the depth of
each tag.
each tag. Depth is 0-indexed: root tags have depth=0, their children have depth=1, and so on.
That means that a taxonomy consisting of 4 levels have depths 0, 1, 2, and 3.
"""
return qs.annotate(depth=models.Case(
models.When(parent_id=None, then=0),
Expand Down Expand Up @@ -513,6 +514,28 @@ def _get_filtered_tags_one_level(
qs = qs.annotate(usage_count=models.Subquery(obj_tags.values('count')))
return qs # type: ignore[return-value]

def _descendants_filter(self, tag_id: int) -> Q:
"""
Helper function to get all descendant tags of a given parent tag
up to the supported maximum depth.
"""
assert TAXONOMY_MAX_DEPTH == 3 # If we change TAXONOMY_MAX_DEPTH we need to change this query code
children = Q(parent_id=tag_id)
grandchildren = Q(parent__parent_id=tag_id)
great_grandchildren = Q(parent__parent__parent_id=tag_id)
return (
children |
grandchildren |
great_grandchildren
)

def _max_depth_filter(self) -> Q:
"""
Helper function to get all tags up to the supported maximum depth.
"""
assert TAXONOMY_MAX_DEPTH == 3 # If we change TAXONOMY_MAX_DEPTH we need to change this query code
return Q(parent__parent__parent__parent_id=None)

def _get_filtered_tags_deep(
self,
parent_tag_value: str | None,
Expand All @@ -525,17 +548,13 @@ def _get_filtered_tags_deep(
we're including tags from multiple levels of the hierarchy.
"""
# All tags (possibly below a certain tag?) in the closed taxonomy, up to depth TAXONOMY_MAX_DEPTH
if parent_tag_value:
main_parent_id = self.tag_for_value(parent_tag_value).pk
else:
main_parent_id = None
main_parent_id = self.tag_for_value(parent_tag_value).pk if parent_tag_value else None

assert TAXONOMY_MAX_DEPTH == 3 # If we change TAXONOMY_MAX_DEPTH we need to change this query code:
qs: models.QuerySet = self.tag_set.filter(
Q(parent_id=main_parent_id) |
Q(parent__parent_id=main_parent_id) |
Q(parent__parent__parent_id=main_parent_id)
)
qs: models.QuerySet = self.tag_set.filter(self._max_depth_filter())

# If a main parent is specified, we only want to include its descendants, otherwise, all tags.
if main_parent_id is not None:
qs = qs.filter(self._descendants_filter(tag_id=main_parent_id))

if search_term:
# We need to do an additional query to find all the tags that match the search term, then limit the
Expand All @@ -551,7 +570,7 @@ def _get_filtered_tags_deep(
if pk is not None:
matching_ids.append(pk)
qs = qs.filter(pk__in=matching_ids)
qs = qs.annotate(
qs = qs.annotate( # type: ignore[no-redef]
child_count=models.Count("children", filter=Q(children__pk__in=matching_ids), distinct=True),
grandchild_count=models.Count(
"children__children", filter=Q(children__children__pk__in=matching_ids), distinct=True,
Expand All @@ -561,17 +580,21 @@ def _get_filtered_tags_deep(
filter=Q(children__children__children__pk__in=matching_ids),
),
)
qs = qs.annotate(descendant_count=F("child_count") + F("grandchild_count") + F("great_grandchild_count"))
qs = qs.annotate( # type: ignore[no-redef]
descendant_count=F("child_count") + F("grandchild_count") + F("great_grandchild_count")
)
elif excluded_values:
raise NotImplementedError("Using excluded_values without search_term is not currently supported.")
# We could implement this in the future but I'd prefer to get rid of the "excluded_values" API altogether.
# It remains to be seen if it's useful to do that on the backend, or if we can do it better/simpler on the
# frontend.
else:
qs = qs.annotate(child_count=models.Count("children", distinct=True))
qs = qs.annotate(child_count=models.Count("children", distinct=True)) # type: ignore[no-redef]
qs = qs.annotate(grandchild_count=models.Count("children__children", distinct=True))
qs = qs.annotate(great_grandchild_count=models.Count("children__children__children"))
qs = qs.annotate(descendant_count=F("child_count") + F("grandchild_count") + F("great_grandchild_count"))
qs = qs.annotate( # type: ignore[no-redef]
descendant_count=F("child_count") + F("grandchild_count") + F("great_grandchild_count")
)

# Add the "depth" to each tag:
qs = Tag.annotate_depth(qs)
Expand Down
5 changes: 2 additions & 3 deletions tests/openedx_tagging/import_export/test_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,8 @@ def test_import_template(self, template_file, parser_format):
' Hi-hat (HI-HAT) (Idiophone) (children: 0)',
' Membranophone (DRUMS) (Percussion instruments) (children: 2 + 1)',
' Cajón (CAJÓN) (Membranophone) (children: 1)',
# This tag is present in the import files, but it will be omitted from get_tags()
# because it sits beyond TAXONOMY_MAX_DEPTH.
# Pyle Stringed Jam Cajón (PYLE) (Cajón) (children: 0)
# With TAXONOMY_MAX_DEPTH=3 and 0-indexed depths, this depth=3 tag is included.
' Pyle Stringed Jam Cajón (PYLE) (Cajón) (children: 0)',
' Tabla (TABLA) (Membranophone) (children: 0)',
'String instruments (STRINGS) (None) (children: 2 + 5)',
' Bowed strings (BOW) (String instruments) (children: 2)',
Expand Down
12 changes: 7 additions & 5 deletions tests/openedx_tagging/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,8 @@ def test_get_tags(self) -> None:
"Eukaryota (children: 5 + 8)",
" Animalia (children: 7 + 1)",
" Arthropoda (children: 0)",
" Chordata (children: 1)", # The child of this is excluded due to depth limit
" Chordata (children: 1)",
" Mammalia (children: 0)",
" Cnidaria (children: 0)",
" Ctenophora (children: 0)",
" Gastrotrich (children: 0)",
Expand Down Expand Up @@ -784,11 +785,12 @@ def get_object_tags():
"Bacteria (used: 0, children: 2)",
" Archaebacteria (used: 1, children: 0)",
" Eubacteria (used: 0, children: 0)",
"Eukaryota (used: 0, children: 4 + 7)",
" Animalia (used: 1, children: 7)",
"Eukaryota (used: 0, children: 4 + 8)",
" Animalia (used: 1, children: 7 + 1)",
" Arthropoda (used: 1, children: 0)",
" Chordata (used: 0, children: 0)", # <<< Chordata has a matching child but we only support searching
" Cnidaria (used: 0, children: 0)", # 3 levels deep at once for now.
" Chordata (used: 0, children: 1)",
" Mammalia (used: 0, children: 0)",
" Cnidaria (used: 0, children: 0)",
" Ctenophora (used: 0, children: 0)",
" Gastrotrich (used: 1, children: 0)",
" Placozoa (used: 1, children: 0)",
Expand Down
26 changes: 25 additions & 1 deletion tests/openedx_tagging/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,8 @@ def test_get_all(self) -> None:
"Eukaryota (None) (children: 5 + 8)",
" Animalia (Eukaryota) (children: 7 + 1)",
" Arthropoda (Animalia) (children: 0)",
" Chordata (Animalia) (children: 1)", # note this has a child but the child is not included
" Chordata (Animalia) (children: 1)",
" Mammalia (Chordata) (children: 0)",
Comment on lines -446 to +447
Copy link
Contributor

@bradenmacdonald bradenmacdonald Mar 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My impression here is that the TAXONOMY_MAX_DEPTH constant means that no APIs should ever return more than 3 levels deep at once, which is what was happening here. Now the get_filtered_tags() API is returning 4 levels deep, which violates the constant.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought this at first too, but max_depth is zero-indexed.
A tag's depth at root or first level is 0. A tag's depth at third level is 2, and at 4th level is 3. It is an explicit attribute in the data returned by the API. So we have a max depth of 3, but max levels of 4.

Many of the tests and code explicitly refer to great-grandchildren, which are 4 levels deep, and the logic in other places always expects 4 max levels. For example,

qs = qs.annotate(great_grandchild_count=models.Count("children__children__children"))
.

I assume the test comment was added due to a similar misunderstanding.

This inconsistency meant that you can create tags 4 levels deep - including when you import taxonomies and such - but then just cut off the deepest level, not displaying it, thus misleading the viewer.

Copy link
Author

@jesperhodge jesperhodge Mar 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The code, as I see now, deals with taxonomies that may be more deeply nested (because of taxonomy imports). But it annotates taxonomy tags with a depth, which is "collapsed" to 5 levels.
Parts of the code expect the resulting taxonomy the API returns to have up to 5 levels, parts of the code expect it to have up to 4 levels, and parts of the code expect it to have up to 3.
This PR fixes the returned taxonomy not to cut tags off below the 3rd level, but below the 4th level, because that is what is defined by TAXONOMY_MAX_DEPTH.

Here is the code that sets the depth levels.

def annotate_depth(qs: models.QuerySet) -> models.QuerySet:

This makes it even worse:

  • Depth 3 (Level 4): Great-Grandchildren — This is where TAXONOMY_MAX_DEPTH caps out.
  • Depth 4 (Level 5): This is the default depth set in the method. This is the depth that any more deeply nested taxonomy tags are rolled up into - for example after a taxonomy import -, making the TAXONOMY_MAX_DEPTH insufficient.

However I consider this part of the bug out of scope for this PR.

(Maybe the method linked here should better say ... default=TAXONOMY_MAX_DEPTH?), rolling everything up to 4 levels instead of 5?

" Cnidaria (Animalia) (children: 0)",
" Ctenophora (Animalia) (children: 0)",
" Gastrotrich (Animalia) (children: 0)",
Expand Down Expand Up @@ -622,8 +623,31 @@ def test_descendant_counts(self) -> None:
"Interests (None) (used: 0, children: 1 + 7)",
" Holland Codes (Interests) (used: 0, children: 1 + 6)",
" Interests - Holland Codes (Holland Codes) (used: 0, children: 6)",
" Artistic (Interests - Holland Codes) (used: 0, children: 0)",
" Conventional (Interests - Holland Codes) (used: 0, children: 0)",
" Enterprising (Interests - Holland Codes) (used: 0, children: 0)",
" Investigative (Interests - Holland Codes) (used: 0, children: 0)",
" Realistic (Interests - Holland Codes) (used: 0, children: 0)",
" Social (Interests - Holland Codes) (used: 0, children: 0)",
]

def test_parent_query_depth_limit(self) -> None:
"""
Verify that filtering below a specific parent still respects the
taxonomy max supported depth from the root (depth <= 3).
"""
taxonomy = api.create_taxonomy("Deep Parent Filter")
root = api.add_tag_to_taxonomy(taxonomy, "Root")
depth1 = api.add_tag_to_taxonomy(taxonomy, "Depth1", parent_tag_value=root.value)
depth2 = api.add_tag_to_taxonomy(taxonomy, "Depth2", parent_tag_value=depth1.value)
depth3 = api.add_tag_to_taxonomy(taxonomy, "Depth3", parent_tag_value=depth2.value)
api.add_tag_to_taxonomy(taxonomy, "Depth4", parent_tag_value=depth3.value)

result = list(taxonomy.get_filtered_tags(depth=None, parent_tag_value=depth1.value))
result_values = [row["value"] for row in result]

assert result_values == ["Depth2", "Depth3"]


class TestFilteredTagsFreeTextTaxonomy(TestCase):
"""
Expand Down
1 change: 1 addition & 0 deletions tests/openedx_tagging/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -1459,6 +1459,7 @@ def test_small_taxonomy(self):
" Animalia (Eukaryota) (children: 7 + 1)",
" Arthropoda (Animalia) (children: 0)",
" Chordata (Animalia) (children: 1)",
" Mammalia (Chordata) (children: 0)",
" Cnidaria (Animalia) (children: 0)",
" Ctenophora (Animalia) (children: 0)",
" Gastrotrich (Animalia) (children: 0)",
Expand Down