Skip to content

Commit 88fdc7f

Browse files
committed
feat: container collections support
1 parent 0120179 commit 88fdc7f

19 files changed

Lines changed: 233 additions & 158 deletions

File tree

openedx/core/djangoapps/content/search/api.py

Lines changed: 54 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -17,35 +17,37 @@
1717
from meilisearch import Client as MeilisearchClient
1818
from meilisearch.errors import MeilisearchApiError, MeilisearchError
1919
from meilisearch.models.task import TaskInfo
20+
from opaque_keys import OpaqueKey
2021
from opaque_keys.edx.keys import UsageKey
21-
from opaque_keys.edx.locator import LibraryContainerLocator, LibraryLocatorV2, LibraryCollectionLocator
22+
from opaque_keys.edx.locator import LibraryCollectionLocator, LibraryContainerLocator, LibraryLocatorV2
2223
from openedx_learning.api import authoring as authoring_api
23-
from common.djangoapps.student.roles import GlobalStaff
2424
from rest_framework.request import Request
25+
2526
from common.djangoapps.student.role_helpers import get_course_roles
27+
from common.djangoapps.student.roles import GlobalStaff
2628
from openedx.core.djangoapps.content.course_overviews.models import CourseOverview
27-
from openedx.core.djangoapps.content.search.models import get_access_ids_for_request, IncrementalIndexCompleted
2829
from openedx.core.djangoapps.content.search.index_config import (
2930
INDEX_DISTINCT_ATTRIBUTE,
3031
INDEX_FILTERABLE_ATTRIBUTES,
31-
INDEX_SEARCHABLE_ATTRIBUTES,
32-
INDEX_SORTABLE_ATTRIBUTES,
3332
INDEX_RANKING_RULES,
33+
INDEX_SEARCHABLE_ATTRIBUTES,
34+
INDEX_SORTABLE_ATTRIBUTES
3435
)
36+
from openedx.core.djangoapps.content.search.models import IncrementalIndexCompleted, get_access_ids_for_request
3537
from openedx.core.djangoapps.content_libraries import api as lib_api
3638
from xmodule.modulestore.django import modulestore
3739

3840
from .documents import (
3941
Fields,
4042
meili_id_from_opaque_key,
41-
searchable_doc_for_course_block,
43+
searchable_doc_collections,
4244
searchable_doc_for_collection,
4345
searchable_doc_for_container,
46+
searchable_doc_for_course_block,
4447
searchable_doc_for_library_block,
4548
searchable_doc_for_usage_key,
46-
searchable_doc_collections,
4749
searchable_doc_tags,
48-
searchable_doc_tags_for_collection,
50+
searchable_doc_tags_for_collection
4951
)
5052

5153
log = logging.getLogger(__name__)
@@ -488,6 +490,7 @@ def index_container_batch(batch, num_done, library_key) -> int:
488490
doc = searchable_doc_for_container(container_key)
489491
# TODO: when we add container tags
490492
# doc.update(searchable_doc_tags_for_container(container_key))
493+
doc.update(searchable_doc_collections(container_key))
491494
docs.append(doc)
492495
except Exception as err: # pylint: disable=broad-except
493496
status_cb(f"Error indexing container {container.key}: {err}")
@@ -710,15 +713,15 @@ def upsert_library_collection_index_doc(library_key: LibraryLocatorV2, collectio
710713
If the Collection is not found or disabled (i.e. soft-deleted), then delete it from the search index.
711714
"""
712715
doc = searchable_doc_for_collection(library_key, collection_key)
713-
update_components = False
716+
update_items = False
714717

715718
# Soft-deleted/disabled collections are removed from the index
716719
# and their components updated.
717720
if doc.get('_disabled'):
718721

719722
_delete_index_doc(doc[Fields.id])
720723

721-
update_components = True
724+
update_items = True
722725

723726
# Hard-deleted collections are also deleted from the index,
724727
# but their components are automatically updated as part of the deletion process, so we don't have to.
@@ -736,10 +739,12 @@ def upsert_library_collection_index_doc(library_key: LibraryLocatorV2, collectio
736739
_update_index_docs([doc])
737740

738741
# Asynchronously update the collection's components "collections" field
739-
if update_components:
740-
from .tasks import update_library_components_collections as update_task
742+
if update_items:
743+
from .tasks import update_library_components_collections as update_components_task
744+
from .tasks import update_library_components_collections as update_containers_task
741745

742-
update_task.delay(str(library_key), collection_key)
746+
update_components_task.delay(str(library_key), collection_key)
747+
update_containers_task.delay(str(library_key), collection_key)
743748

744749

745750
def update_library_components_collections(
@@ -774,6 +779,38 @@ def update_library_components_collections(
774779
_update_index_docs(docs)
775780

776781

782+
def update_library_containers_collections(
783+
library_key: LibraryLocatorV2,
784+
collection_key: str,
785+
batch_size: int = 1000,
786+
) -> None:
787+
"""
788+
Updates the "collections" field for all containers associated with a given Library Collection.
789+
790+
Because there may be a lot of containers, we send these updates to Meilisearch in batches.
791+
"""
792+
library = lib_api.get_library(library_key)
793+
containers = authoring_api.get_collection_containers(library.learning_package_id, collection_key)
794+
795+
paginator = Paginator(containers, batch_size)
796+
for page in paginator.page_range:
797+
docs = []
798+
799+
for container in paginator.page(page).object_list:
800+
container_key = lib_api.library_container_locator(
801+
library_key,
802+
container,
803+
)
804+
doc = searchable_doc_collections(container_key)
805+
docs.append(doc)
806+
807+
log.info(
808+
f"Updating document.collections for library {library_key} containers"
809+
f" page {page} / {paginator.num_pages}"
810+
)
811+
_update_index_docs(docs)
812+
813+
777814
def upsert_library_container_index_doc(container_key: LibraryContainerLocator) -> None:
778815
"""
779816
Creates, updates, or deletes the document for the given Library Container in the search index.
@@ -820,12 +857,12 @@ def upsert_block_tags_index_docs(usage_key: UsageKey):
820857
_update_index_docs([doc])
821858

822859

823-
def upsert_block_collections_index_docs(usage_key: UsageKey):
860+
def upsert_item_collections_index_docs(opaque_key: OpaqueKey):
824861
"""
825-
Updates the collections data in documents for the given Course/Library block
862+
Updates the collections data in documents for the given Course/Library block, or Container
826863
"""
827-
doc = {Fields.id: meili_id_from_opaque_key(usage_key)}
828-
doc.update(searchable_doc_collections(usage_key))
864+
doc = {Fields.id: meili_id_from_opaque_key(opaque_key)}
865+
doc.update(searchable_doc_collections(opaque_key))
829866
_update_index_docs([doc])
830867

831868

openedx/core/djangoapps/content/search/documents.py

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
from django.core.exceptions import ObjectDoesNotExist
1010
from django.utils.text import slugify
11+
from opaque_keys import OpaqueKey
1112
from opaque_keys.edx.keys import LearningContextKey, UsageKey
1213
from opaque_keys.edx.locator import LibraryContainerLocator, LibraryLocatorV2
1314
from openedx_learning.api import authoring as authoring_api
@@ -113,7 +114,7 @@ class PublishStatus:
113114
modified = "modified"
114115

115116

116-
def meili_id_from_opaque_key(usage_key: UsageKey) -> str:
117+
def meili_id_from_opaque_key(opaque_key: OpaqueKey) -> str:
117118
"""
118119
Meilisearch requires each document to have a primary key that's either an
119120
integer or a string composed of alphanumeric characters (a-z A-Z 0-9),
@@ -124,7 +125,7 @@ def meili_id_from_opaque_key(usage_key: UsageKey) -> str:
124125
we could use PublishableEntity's primary key / UUID instead.
125126
"""
126127
# The slugified key _may_ not be unique so we append a hashed string to make it unique:
127-
key_str = str(usage_key)
128+
key_str = str(opaque_key)
128129
key_bin = key_str.encode()
129130

130131
suffix = blake2b(key_bin, digest_size=4, usedforsecurity=False).hexdigest()
@@ -309,7 +310,7 @@ def _tags_for_content_object(object_id: UsageKey | LearningContextKey) -> dict:
309310
return {Fields.tags: result}
310311

311312

312-
def _collections_for_content_object(object_id: UsageKey | LearningContextKey) -> dict:
313+
def _collections_for_content_object(object_id: OpaqueKey) -> dict:
313314
"""
314315
Given an XBlock, course, library, etc., get the collections for its index doc.
315316
@@ -340,11 +341,21 @@ def _collections_for_content_object(object_id: UsageKey | LearningContextKey) ->
340341
# Gather the collections associated with this object
341342
collections = None
342343
try:
343-
component = lib_api.get_component_from_usage_key(object_id)
344-
collections = authoring_api.get_entity_collections(
345-
component.learning_package_id,
346-
component.key,
347-
)
344+
if isinstance(object_id, UsageKey):
345+
component = lib_api.get_component_from_usage_key(object_id)
346+
collections = authoring_api.get_entity_collections(
347+
component.learning_package_id,
348+
component.key,
349+
)
350+
elif isinstance(object_id, LibraryContainerLocator):
351+
container = lib_api.get_container_from_key(object_id)
352+
collections = authoring_api.get_entity_collections(
353+
container.publishable_entity.learning_package_id,
354+
container.key,
355+
)
356+
else:
357+
return result
358+
348359
except ObjectDoesNotExist:
349360
log.warning(f"No component found for {object_id}")
350361

@@ -438,13 +449,13 @@ def searchable_doc_tags(usage_key: UsageKey) -> dict:
438449
return doc
439450

440451

441-
def searchable_doc_collections(usage_key: UsageKey) -> dict:
452+
def searchable_doc_collections(opaque_key: OpaqueKey) -> dict:
442453
"""
443454
Generate a dictionary document suitable for ingestion into a search engine
444455
like Meilisearch or Elasticsearch, with the collections data for the given content object.
445456
"""
446-
doc = searchable_doc_for_usage_key(usage_key)
447-
doc.update(_collections_for_content_object(usage_key))
457+
doc = searchable_doc_for_usage_key(opaque_key)
458+
doc.update(_collections_for_content_object(opaque_key))
448459

449460
return doc
450461

openedx/core/djangoapps/content/search/handlers.py

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from django.dispatch import receiver
99
from opaque_keys import InvalidKeyError
1010
from opaque_keys.edx.keys import UsageKey
11-
from opaque_keys.edx.locator import LibraryCollectionLocator
11+
from opaque_keys.edx.locator import LibraryCollectionLocator, LibraryContainerLocator
1212
from openedx_events.content_authoring.data import (
1313
ContentLibraryData,
1414
ContentObjectChangedData,
@@ -40,7 +40,7 @@
4040

4141
from .api import (
4242
only_if_meilisearch_enabled,
43-
upsert_block_collections_index_docs,
43+
upsert_item_collections_index_docs,
4444
upsert_block_tags_index_docs,
4545
upsert_collection_tags_index_docs,
4646
)
@@ -211,25 +211,30 @@ def content_object_associations_changed_handler(**kwargs) -> None:
211211
return
212212

213213
try:
214-
# Check if valid if course or library block
215-
usage_key = UsageKey.from_string(str(content_object.object_id))
214+
# Check if valid course or library block
215+
opaque_key = UsageKey.from_string(str(content_object.object_id))
216216
except InvalidKeyError:
217217
try:
218-
# Check if valid if library collection
219-
usage_key = LibraryCollectionLocator.from_string(str(content_object.object_id))
218+
# Check if valid library collection
219+
opaque_key = LibraryCollectionLocator.from_string(str(content_object.object_id))
220220
except InvalidKeyError:
221-
log.error("Received invalid content object id")
222-
return
221+
try:
222+
# Check if valid library container
223+
opaque_key = LibraryContainerLocator.from_string(str(content_object.object_id))
224+
except InvalidKeyError:
225+
log.error("Received invalid content object id")
226+
log.error(str(content_object.object_id))
227+
return
223228

224229
# This event's changes may contain both "tags" and "collections", but this will happen rarely, if ever.
225230
# So we allow a potential double "upsert" here.
226231
if not content_object.changes or "tags" in content_object.changes:
227-
if isinstance(usage_key, LibraryCollectionLocator):
228-
upsert_collection_tags_index_docs(usage_key)
232+
if isinstance(opaque_key, LibraryCollectionLocator):
233+
upsert_collection_tags_index_docs(opaque_key)
229234
else:
230-
upsert_block_tags_index_docs(usage_key)
235+
upsert_block_tags_index_docs(opaque_key)
231236
if not content_object.changes or "collections" in content_object.changes:
232-
upsert_block_collections_index_docs(usage_key)
237+
upsert_item_collections_index_docs(opaque_key)
233238

234239

235240
@receiver(LIBRARY_CONTAINER_CREATED)

openedx/core/djangoapps/content/search/tasks.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,19 @@ def update_library_components_collections(library_key_str: str, collection_key:
112112
api.update_library_components_collections(library_key, collection_key)
113113

114114

115+
@shared_task(base=LoggedTask, autoretry_for=(MeilisearchError, ConnectionError))
116+
@set_code_owner_attribute
117+
def update_library_containers_collections(library_key_str: str, collection_key: str) -> None:
118+
"""
119+
Celery task to update the "collections" field for containers in the given content library collection.
120+
"""
121+
library_key = LibraryLocatorV2.from_string(library_key_str)
122+
123+
log.info("Updating document.collections for library %s collection %s containers", library_key, collection_key)
124+
125+
api.update_library_containers_collections(library_key, collection_key)
126+
127+
115128
@shared_task(base=LoggedTask, autoretry_for=(MeilisearchError, ConnectionError))
116129
@set_code_owner_attribute
117130
def update_library_container_index_doc(library_key_str: str, container_key_str: str) -> None:

openedx/core/djangoapps/content/search/tests/test_api.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -581,7 +581,7 @@ def test_index_library_block_and_collections(self, mock_meilisearch):
581581
description="Second Collection",
582582
)
583583

584-
# Add Problem1 to both Collections (these internally call `upsert_block_collections_index_docs` and
584+
# Add Problem1 to both Collections (these internally call `upsert_item_collections_index_docs` and
585585
# `upsert_library_collection_index_doc`)
586586
# (adding in reverse order to test sorting of collection tag)
587587
updated_date = datetime(2023, 6, 7, 8, 9, 10, tzinfo=timezone.utc)

openedx/core/djangoapps/content_libraries/api/blocks.py

Lines changed: 15 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -3,66 +3,51 @@
33
44
These methods don't enforce permissions (only the REST APIs do).
55
"""
6-
from dataclasses import dataclass
7-
from datetime import datetime, timezone
86
import logging
97
import mimetypes
8+
from dataclasses import dataclass
9+
from datetime import datetime, timezone
1010

1111
from django.conf import settings
1212
from django.core.exceptions import ObjectDoesNotExist
1313
from django.core.validators import validate_unicode_slug
14-
from django.db.models import QuerySet
1514
from django.db import transaction
16-
from django.utils.translation import gettext as _
15+
from django.db.models import QuerySet
1716
from django.urls import reverse
17+
from django.utils.translation import gettext as _
1818
from lxml import etree
19-
from opaque_keys.edx.locator import LibraryLocatorV2, LibraryUsageLocatorV2
2019
from opaque_keys.edx.keys import UsageKeyV2
20+
from opaque_keys.edx.locator import LibraryLocatorV2, LibraryUsageLocatorV2
2121
from openedx_events.content_authoring.data import (
22+
ContentObjectChangedData,
2223
LibraryBlockData,
2324
LibraryCollectionData,
24-
LibraryContainerData,
25-
ContentObjectChangedData,
25+
LibraryContainerData
2626
)
2727
from openedx_events.content_authoring.signals import (
28+
CONTENT_OBJECT_ASSOCIATIONS_CHANGED,
2829
LIBRARY_BLOCK_CREATED,
2930
LIBRARY_BLOCK_DELETED,
3031
LIBRARY_BLOCK_UPDATED,
3132
LIBRARY_COLLECTION_UPDATED,
32-
LIBRARY_CONTAINER_UPDATED,
33-
CONTENT_OBJECT_ASSOCIATIONS_CHANGED,
33+
LIBRARY_CONTAINER_UPDATED
3434
)
35-
from xblock.core import XBlock
36-
3735
from openedx_learning.api import authoring as authoring_api
38-
from openedx_learning.api.authoring_models import (
39-
Component,
40-
ComponentVersion,
41-
LearningPackage,
42-
MediaType,
43-
)
36+
from openedx_learning.api.authoring_models import Component, ComponentVersion, LearningPackage, MediaType
37+
from xblock.core import XBlock
4438

39+
from openedx.core.djangoapps.content_libraries import api as lib_api
4540
from openedx.core.djangoapps.xblock.api import (
4641
get_component_from_usage_key,
4742
get_xblock_app_config,
48-
xblock_type_display_name,
43+
xblock_type_display_name
4944
)
5045
from openedx.core.types import User as UserType
51-
from openedx.core.djangoapps.content_libraries import api as lib_api
5246

5347
from ..models import ContentLibrary
5448
from ..permissions import CAN_EDIT_THIS_CONTENT_LIBRARY
55-
from .exceptions import (
56-
BlockLimitReachedError,
57-
ContentLibraryBlockNotFound,
58-
InvalidNameError,
59-
LibraryBlockAlreadyExists,
60-
)
61-
from .libraries import (
62-
library_component_usage_key,
63-
require_permission_for_library_key,
64-
PublishableItem,
65-
)
49+
from .exceptions import BlockLimitReachedError, ContentLibraryBlockNotFound, InvalidNameError, LibraryBlockAlreadyExists
50+
from .libraries import PublishableItem, library_component_usage_key, require_permission_for_library_key
6651

6752
log = logging.getLogger(__name__)
6853

0 commit comments

Comments
 (0)