diff --git a/openedx_learning/__init__.py b/openedx_learning/__init__.py index e84267f1..60b0a4af 100644 --- a/openedx_learning/__init__.py +++ b/openedx_learning/__init__.py @@ -2,4 +2,4 @@ Open edX Learning ("Learning Core"). """ -__version__ = "0.29.1" +__version__ = "0.30.0" diff --git a/openedx_learning/apps/authoring/publishing/admin.py b/openedx_learning/apps/authoring/publishing/admin.py index 7b144fa8..36711130 100644 --- a/openedx_learning/apps/authoring/publishing/admin.py +++ b/openedx_learning/apps/authoring/publishing/admin.py @@ -6,7 +6,7 @@ import functools from django.contrib import admin -from django.db.models import Count +from django.db.models import Count, F from django.utils.html import format_html from django.utils.safestring import SafeText @@ -21,6 +21,7 @@ EntityListRow, LearningPackage, PublishableEntity, + PublishableEntityVersion, PublishLog, PublishLogRecord, ) @@ -48,6 +49,7 @@ class PublishLogRecordTabularInline(admin.TabularInline): "title", "old_version_num", "new_version_num", + "dependencies_hash_digest", ) readonly_fields = fields @@ -89,28 +91,89 @@ class PublishLogAdmin(ReadOnlyModelAdmin): list_filter = ["learning_package"] +class PublishableEntityVersionTabularInline(admin.TabularInline): + """ + Tabular inline for a single Draft change. + """ + model = PublishableEntityVersion + + fields = ( + "version_num", + "title", + "created", + "created_by", + "dependencies_list", + ) + readonly_fields = fields + + def dependencies_list(self, version: PublishableEntityVersion): + identifiers = sorted( + [str(dep.key) for dep in version.dependencies.all()] + ) + return "\n".join(identifiers) + + def get_queryset(self, request): + queryset = super().get_queryset(request) + return ( + queryset + .order_by('-version_num') + .select_related('created_by', 'entity') + .prefetch_related('dependencies') + ) + + +class PublishStatusFilter(admin.SimpleListFilter): + """ + Custom filter for entities that have unpublished changes. + """ + title = "publish status" + parameter_name = "publish_status" + + def lookups(self, request, model_admin): + return [ + ("unpublished_changes", "Has unpublished changes"), + ] + + def queryset(self, request, queryset): + if self.value() == "unpublished_changes": + return ( + queryset + .exclude( + published__version__isnull=True, + draft__version__isnull=True, + ) + .exclude( + published__version=F("draft__version"), + published__dependencies_hash_digest=F("draft__dependencies_hash_digest") + ) + ) + return queryset + + @admin.register(PublishableEntity) class PublishableEntityAdmin(ReadOnlyModelAdmin): """ Read-only admin view for Publishable Entities """ + inlines = [PublishableEntityVersionTabularInline] + list_display = [ "key", - "draft_version", "published_version", + "draft_version", "uuid", "learning_package", "created", "created_by", "can_stand_alone", ] - list_filter = ["learning_package"] + list_filter = ["learning_package", PublishStatusFilter] search_fields = ["key", "uuid"] fields = [ "key", - "draft_version", "published_version", + "draft_version", "uuid", "learning_package", "created", @@ -120,8 +183,8 @@ class PublishableEntityAdmin(ReadOnlyModelAdmin): ] readonly_fields = [ "key", - "draft_version", "published_version", + "draft_version", "uuid", "learning_package", "created", @@ -130,21 +193,55 @@ class PublishableEntityAdmin(ReadOnlyModelAdmin): "can_stand_alone", ] - def draft_version(self, entity: PublishableEntity): - return entity.draft.version.version_num if entity.draft.version else None - - def published_version(self, entity: PublishableEntity): - return entity.published.version.version_num if entity.published and entity.published.version else None - def get_queryset(self, request): queryset = super().get_queryset(request) return queryset.select_related( - "learning_package", "published__version", + "learning_package", "published__version", "draft__version", "created_by" ) def see_also(self, entity): return one_to_one_related_model_html(entity) + def draft_version(self, entity: PublishableEntity): + """ + Version num + dependency hash if applicable, e.g. "5" or "5 (825064c2)" + + If the version info is different from the published version, we + italicize the text for emphasis. + """ + if hasattr(entity, "draft") and entity.draft.version: + draft_log_record = entity.draft.draft_log_record + if draft_log_record and draft_log_record.dependencies_hash_digest: + version_str = ( + f"{entity.draft.version.version_num} " + f"({draft_log_record.dependencies_hash_digest})" + ) + else: + version_str = str(entity.draft.version.version_num) + + if version_str == self.published_version(entity): + return version_str + else: + return format_html("{}", version_str) + + return None + + def published_version(self, entity: PublishableEntity): + """ + Version num + dependency hash if applicable, e.g. "5" or "5 (825064c2)" + """ + if hasattr(entity, "published") and entity.published.version: + publish_log_record = entity.published.publish_log_record + if publish_log_record.dependencies_hash_digest: + return ( + f"{entity.published.version.version_num} " + f"({publish_log_record.dependencies_hash_digest})" + ) + else: + return str(entity.published.version.version_num) + + return None + @admin.register(Published) class PublishedAdmin(ReadOnlyModelAdmin): @@ -197,6 +294,7 @@ class DraftChangeLogRecordTabularInline(admin.TabularInline): "title", "old_version_num", "new_version_num", + "dependencies_hash_digest", ) readonly_fields = fields diff --git a/openedx_learning/apps/authoring/publishing/api.py b/openedx_learning/apps/authoring/publishing/api.py index 5a0dcbdd..72eccd42 100644 --- a/openedx_learning/apps/authoring/publishing/api.py +++ b/openedx_learning/apps/authoring/publishing/api.py @@ -13,9 +13,11 @@ from typing import ContextManager, Optional, TypeVar from django.core.exceptions import ObjectDoesNotExist, ValidationError -from django.db.models import F, Q, QuerySet +from django.db.models import F, Prefetch, Q, QuerySet from django.db.transaction import atomic +from openedx_learning.lib.fields import create_hash_digest + from .contextmanagers import DraftChangeLogContext from .models import ( Container, @@ -31,9 +33,11 @@ PublishableEntity, PublishableEntityMixin, PublishableEntityVersion, + PublishableEntityVersionDependency, PublishableEntityVersionMixin, PublishLog, PublishLogRecord, + PublishSideEffect, ) from .models.publish_log import Published @@ -213,6 +217,8 @@ def create_publishable_entity_version( title: str, created: datetime, created_by: int | None, + *, + dependencies: list[int] | None = None, # PublishableEntity IDs ) -> PublishableEntityVersion: """ Create a PublishableEntityVersion. @@ -220,7 +226,7 @@ def create_publishable_entity_version( You'd typically want to call this right before creating your own content version model that points to it. """ - with atomic(): + with atomic(savepoint=False): version = PublishableEntityVersion.objects.create( entity_id=entity_id, version_num=version_num, @@ -228,16 +234,82 @@ def create_publishable_entity_version( created=created, created_by_id=created_by, ) + if dependencies: + set_version_dependencies(version.id, dependencies) + set_draft_version( entity_id, version.id, set_at=created, set_by=created_by, ) - return version +def set_version_dependencies( + version_id: int, # PublishableEntityVersion.id, + /, + dependencies: list[int] # List of PublishableEntity.id +) -> None: + """ + Set the dependencies of a publishable entity version. + + In general, callers should not modify dependencies after creation (i.e. use + the optional param in create_publishable_entity_version() instead of using + this function). **This function is not atomic.** If you're doing backfills, + you must wrap calls to this function with a transaction.atomic() call. + + The idea behind dependencies is that a PublishableEntity's Versions may + be declared to reference unpinned PublishableEntities. Changes to those + referenced PublishableEntities still affect the draft or published state of + the referent PublishableEntity, even if the referent entity's version is not + incremented. + + For example, we only create a new UnitVersion when there are changes to the + metadata of the Unit itself. So this would happen when the name of the Unit + is changed, or if a child Component is added, removed, or reordered. No new + UnitVersion is created when a child Component of that Unit is modified or + published. Yet we still consider a Unit to be affected when one of its child + Components is modified or published. Therefore, we say that the child + Components are dependencies of the UnitVersion. + + Dependencies vs. Container Rows/Children + + Dependencies overlap with the concept of container child rows, but the two + are not exactly the same. For instance: + + * Dependencies have no sense of ordering. + * If a row is declared to be pinned to a specific version of a child, then + it is NOT a dependency. For example, if U1.v1 is declared to have a pinned + reference to ComponentVersion C1.v1, then future changes to C1 do not + affect U1.v1 because U1.v1 will just ignore those new ComponentVersions. + + Guidelines: + + 1. Only declare one level of dependencies, e.g. immediate parent-child + relationships. The publishing app will calculate transitive dependencies + like "all descendants" based on this. This is important for saving space, + because the DAG of trasitive dependency relationships can explode out to + tens of thousands of nodes per version. + 2. Declare dependencies from the bottom-up when possible. In other words, if + you're building an entire Subsection, set the Component dependencies for + the Units before you set the Unit dependencies for the Subsection. This + code will still work if you build from the top-down, but we'll end up + doing many redundant re-calculations, since every change to a lower layer + will cause recalculation to the higher levels that depend on it. + 3. Do not create circular dependencies. + """ + PublishableEntityVersionDependency.objects.bulk_create( + [ + PublishableEntityVersionDependency( + referring_version_id=version_id, + referenced_entity_id=dep_entity_id, + ) + for dep_entity_id in set(dependencies) # dependencies have no ordering + ], + ) + + def get_publishable_entity(publishable_entity_id: int, /) -> PublishableEntity: return PublishableEntity.objects.get(id=publishable_entity_id) @@ -285,7 +357,8 @@ def get_entities_with_unpublished_changes( """ Fetch entities that have unpublished changes. - By default, this excludes soft-deleted drafts but can be included using include_deleted_drafts option. + By default, this excludes soft-deleted drafts but can be included using + include_deleted_drafts option. """ entities_qs = ( PublishableEntity.objects @@ -333,65 +406,72 @@ def publish_all_drafts( Publish everything that is a Draft and is not already published. """ draft_qset = ( - Draft.objects.select_related("entity__published") - .filter(entity__learning_package_id=learning_package_id) - - # Exclude entities where the Published version already matches the - # Draft version. - .exclude(entity__published__version_id=F("version_id")) - - # Account for soft-deletes: - # NULL != NULL in SQL, so simply excluding entities where the Draft - # and Published versions match will not catch the case where a - # soft-delete has been published (i.e. both the Draft and Published - # versions are NULL). We need to explicitly check for that case - # instead, or else we will re-publish the same soft-deletes over - # and over again. - .exclude(Q(version__isnull=True) & Q(entity__published__version__isnull=True)) + Draft.objects + .filter(entity__learning_package_id=learning_package_id) + .with_unpublished_changes() ) return publish_from_drafts( learning_package_id, draft_qset, message, published_at, published_by ) +def _get_dependencies_with_unpublished_changes( + draft_qset: QuerySet[Draft] +) -> list[QuerySet[Draft]]: + """ + Return all dependencies to publish as a list of Draft QuerySets. + + This should only return the Drafts that have actual changes, not pure side- + effects. The side-effect calculations will happen separately. + """ + # First we have to do a full crawl of *all* dependencies, regardless of + # whether they have unpublished changes or not. This is because we might + # have a dependency-of-a-dependency that has changed somewhere down the + # line. Example: The draft_qset includes a Subsection. Even if the Unit + # versions are still the same, there might be a changed Component that needs + # to be published. + all_dependency_drafts = [] + dependency_drafts = Draft.objects.filter( + entity__affects__in=draft_qset.values_list("version_id", flat=True) + ).distinct() + + while dependency_drafts: + all_dependency_drafts.append(dependency_drafts) + dependency_drafts = Draft.objects.filter( + entity__affects__in=dependency_drafts.all().values_list("version_id", flat=True) + ).distinct() + + unpublished_dependency_drafts = [ + dependency_drafts_qset.all().with_unpublished_changes() + for dependency_drafts_qset in all_dependency_drafts + ] + return unpublished_dependency_drafts + + def publish_from_drafts( learning_package_id: int, # LearningPackage.id /, - draft_qset: QuerySet, + draft_qset: QuerySet[Draft], message: str = "", published_at: datetime | None = None, published_by: int | None = None, # User.id + *, + publish_dependencies: bool = True, ) -> PublishLog: """ Publish the rows in the ``draft_model_qsets`` args passed in. + + By default, this will also publish all dependencies (e.g. unpinned children) + of the Drafts that are passed in. """ if published_at is None: published_at = datetime.now(tz=timezone.utc) with atomic(): - # If the drafts include any containers, we need to auto-publish their descendants: - # TODO: this only handles one level deep and would need to be updated to support sections > subsections > units - - # Get the IDs of the ContainerVersion for any Containers whose drafts are slated to be published. - container_version_ids = ( - Container.objects.filter(publishable_entity__draft__in=draft_qset) - .values_list("publishable_entity__draft__version__containerversion__pk", flat=True) - ) - if container_version_ids: - # We are publishing at least one container. Check if it has any child components that aren't already slated - # to be published. - unpublished_draft_children = EntityListRow.objects.filter( - entity_list__container_versions__pk__in=container_version_ids, - entity_version=None, # Unpinned entities only - ).exclude( - entity__draft__version=F("entity__published__version") # Exclude already published things - ).values_list("entity__draft__pk", flat=True) - if unpublished_draft_children: - # Force these additional child components to be published at the same time by adding them to the qset: - draft_qset = Draft.objects.filter( - Q(pk__in=draft_qset.values_list("pk", flat=True)) | - Q(pk__in=unpublished_draft_children) - ) + if publish_dependencies: + dependency_drafts_qsets = _get_dependencies_with_unpublished_changes(draft_qset) + else: + dependency_drafts_qsets = [] # One PublishLog for this entire publish operation. publish_log = PublishLog( @@ -403,32 +483,51 @@ def publish_from_drafts( publish_log.full_clean() publish_log.save(force_insert=True) - for draft in draft_qset.select_related("entity__published__version"): - try: - old_version = draft.entity.published.version - except ObjectDoesNotExist: - # This means there is no published version yet. - old_version = None - - # Create a record describing publishing this particular Publishable - # (useful for auditing and reverting). - publish_log_record = PublishLogRecord( - publish_log=publish_log, - entity=draft.entity, - old_version=old_version, - new_version=draft.version, - ) - publish_log_record.full_clean() - publish_log_record.save(force_insert=True) + # We're intentionally avoiding union() here because Django ORM unions + # introduce cumbersome restrictions (can only union once, can't + # select_related on it after, the extra rows must be exactly compatible + # in unioned qsets, etc.) Instead, we're going to have one queryset per + # dependency layer. + all_draft_qsets = [ + draft_qset.select_related("entity__published__version"), + *dependency_drafts_qsets, # one QuerySet per layer of dependencies + ] + published_draft_ids = set() + for qset in all_draft_qsets: + for draft in qset: + # Skip duplicates that we might get from expanding dependencies. + if draft.pk in published_draft_ids: + continue + + try: + old_version = draft.entity.published.version + except ObjectDoesNotExist: + # This means there is no published version yet. + old_version = None + + # Create a record describing publishing this particular + # Publishable (useful for auditing and reverting). + publish_log_record = PublishLogRecord( + publish_log=publish_log, + entity=draft.entity, + old_version=old_version, + new_version=draft.version, + ) + publish_log_record.full_clean() + publish_log_record.save(force_insert=True) + + # Update the lookup we use to fetch the published versions + Published.objects.update_or_create( + entity=draft.entity, + defaults={ + "version": draft.version, + "publish_log_record": publish_log_record, + }, + ) - # Update the lookup we use to fetch the published versions - Published.objects.update_or_create( - entity=draft.entity, - defaults={ - "version": draft.version, - "publish_log_record": publish_log_record, - }, - ) + published_draft_ids.add(draft.pk) + + _create_side_effects_for_change_log(publish_log) return publish_log @@ -505,8 +604,8 @@ def set_draft_version( from Studio's editing point of view (see ``soft_delete_draft`` for more details). - Calling this function attaches a new DraftChangeLogRecordand attaches it to a - DraftChangeLog. + Calling this function attaches a new DraftChangeLogRecord and attaches it to + a DraftChangeLog. This function will create DraftSideEffect entries and properly add any containers that may have been affected by this draft update, UNLESS it is @@ -538,7 +637,6 @@ def set_draft_version( # The actual update of the Draft model is here. Everything after this # block is bookkeeping in our DraftChangeLog. draft.version_id = publishable_entity_version_pk - draft.save() # Check to see if we're inside a context manager for an active # DraftChangeLog (i.e. what happens if the caller is using the public @@ -548,23 +646,52 @@ def set_draft_version( learning_package_id ) if active_change_log: - change = _add_to_existing_draft_change_log( + draft_log_record = _add_to_existing_draft_change_log( active_change_log, draft.entity_id, old_version_id=old_version_id, new_version_id=publishable_entity_version_pk, ) - # We explicitly *don't* create container side effects here because - # there may be many changes in this DraftChangeLog, some of which - # haven't been made yet. It wouldn't make sense to create a side - # effect that says, "this Unit changed because this Component in it - # changed" if we were changing that same Unit later on in the same + if draft_log_record: + # Normal case: a DraftChangeLogRecord was created or updated. + draft.draft_log_record = draft_log_record + else: + # Edge case: this change cancelled out other changes, and the + # net effect is that the DraftChangeLogRecord shouldn't exist, + # i.e. the version at the start and end of the DraftChangeLog is + # the same. In that case, _add_to_existing_draft_change_log will + # delete the log record for this Draft state, and we have to + # look for the most recently created DraftLogRecord from another + # DraftChangeLog. This value may be None. + # + # NOTE: There may be some weird edge cases here around soft- + # deletions and modifications of the same Draft entry in nested + # bulk_draft_changes_for() calls. I haven't thought that through + # yet--it might be better to just throw an error rather than try + # to accommodate it. + draft.draft_log_record = ( + DraftChangeLogRecord.objects + .filter(entity_id=draft.entity_id) + .order_by("-pk") + .first() + ) + draft.save() + + # We also *don't* create container side effects here because there + # may be many changes in this DraftChangeLog, some of which haven't + # been made yet. It wouldn't make sense to create a side effect that + # says, "this Unit changed because this Component in it changed" if + # we were changing that same Unit later on in the same # DraftChangeLog, because that new Unit version might not even - # include the child Component. So we'll let DraftChangeLogContext - # do that work when it exits its context. + # include that child Component. Also, calculating side-effects is + # expensive, and would result in a lot of wasted queries if we did + # it for every change will inside an active change log context. + # + # Therefore we'll let DraftChangeLogContext do that work when it + # exits its context. else: # This means there is no active DraftChangeLog, so we create our own - # and add our DraftChangeLogRecord to it. This has the minor + # and add our DraftChangeLogRecord to it. This has the very minor # optimization that we don't have to check for an existing # DraftChangeLogRecord, because we know it can't exist yet. change_log = DraftChangeLog.objects.create( @@ -572,13 +699,14 @@ def set_draft_version( changed_at=set_at, changed_by_id=set_by, ) - change = DraftChangeLogRecord.objects.create( + draft.draft_log_record = DraftChangeLogRecord.objects.create( draft_change_log=change_log, entity_id=draft.entity_id, old_version_id=old_version_id, new_version_id=publishable_entity_version_pk, ) - _create_container_side_effects_for_draft_change(change) + draft.save() + _create_side_effects_for_change_log(change_log) def _add_to_existing_draft_change_log( @@ -586,9 +714,9 @@ def _add_to_existing_draft_change_log( entity_id: int, old_version_id: int | None, new_version_id: int | None, -) -> DraftChangeLogRecord: +) -> DraftChangeLogRecord | None: """ - Create or update a DraftChangeLogRecord for the active_change_log passed in. + Create, update, or delete a DraftChangeLogRecord for the active_change_log. The an active_change_log may have many DraftChangeLogRecords already associated with it. A DraftChangeLog can only have one DraftChangeLogRecord @@ -602,6 +730,13 @@ def _add_to_existing_draft_change_log( and the same entity_id but with versions: (None, v1), (v1, v2), (v2, v3); we would collapse them into one DraftChangeLogrecord with old_version = None and new_version = v3. + + This also means that if we make a change that undoes the previos change, it + will delete that DraftChangeLogRecord, e.g. (None, v1) -> (None, v2) -> + (None -> None), then this entry can be deleted because it didn't change + anything. This function should never be used for creating side-effect change + log records (the only place where it's normal to have the same old and new + versions). """ try: # Check to see if this PublishableEntity has already been changed in @@ -622,8 +757,11 @@ def _add_to_existing_draft_change_log( # # It's important that we remove these cases, because we use the # old_version == new_version convention to record entities that have - # changed purely due to side-effects. + # changed purely due to side-effects. We could technically still + # differentiate those by actually looking at the DraftSideEffect and + # PublishSideEffect models, but this is less confusing overall. change.delete() + return None else: # Normal case: We update the new_version, but leave the old_version # as is. The old_version represents what the Draft was pointing to @@ -644,87 +782,418 @@ def _add_to_existing_draft_change_log( return change -def _create_container_side_effects_for_draft_change_log(change_log: DraftChangeLog): - """ - Iterate through the whole DraftChangeLog and process side-effects. - """ +def _create_side_effects_for_change_log(change_log: DraftChangeLog | PublishLog): + """ + Create the side-effects for a DraftChangeLog or PublishLog. + + A side-effect is created whenever a dependency of a draft or published + entity version is altered. + + For example, say we have a published Unit at version 1 (``U1.v1``). + ``U1.v1`` is defined to have unpinned references to Components ``C1`` and + ``C2``, i.e. ``U1.v1 = [C1, C2]``. This means that ``U1.v1`` always shows + the latest published versions of ``C1`` and ``C2``. While we do have a more + sophisticated encoding for the ordered parent-child relationships, we + capture the basic dependency relationship using the M:M + ``PublishableEntityVersionDependency`` model. In this scenario, C1 and C2 + are dependencies of ``U1.v1`` + + In the above example, publishing a newer version of ``C1`` does *not* + increment the version for Unit ``U1``. But we still want to record that + ``U1.v1`` was affected by the change in ``C1``. We record this in the + ``DraftSideEffect`` and ``PublishSideEffect`` models. We also add an entry + for ``U1`` in the change log, saying that it went from version 1 to version + 1--i.e nothing about the Unit's defintion changed, but it was still affected + by other changes in the log. + + Only call this function after all the records have already been created. + + Note: The interface between ``DraftChangeLog`` and ``PublishLog`` is similar + enough that this function has been made to work with both. + """ + branch_cls: type[Draft] | type[Published] + change_record_cls: type[DraftChangeLogRecord] | type[PublishLogRecord] + side_effect_cls: type[DraftSideEffect] | type[PublishSideEffect] + if isinstance(change_log, DraftChangeLog): + branch_cls = Draft + change_record_cls = DraftChangeLogRecord + side_effect_cls = DraftSideEffect + log_record_rel = "draft_log_record_id" + elif isinstance(change_log, PublishLog): + branch_cls = Published + change_record_cls = PublishLogRecord + side_effect_cls = PublishSideEffect + log_record_rel = "publish_log_record_id" + else: + raise TypeError( + f"expected DraftChangeLog or PublishLog, not {type(change_log)}" + ) + + # processed_entity_ids holds the entity IDs that we've already calculated + # side-effects for. This is to save us from recalculating side-effects for + # the same dependency relationships over and over again. So if we're calling + # this function in a loop for all the Components in a Unit, we won't be + # recalculating the Unit's side-effect on its Subsection, and its + # Subsection's side-effect on its Section each time through the loop. + # It also guards against infinite parent-child relationship loops, though + # those aren't *supposed* to happen anyhow. processed_entity_ids: set[int] = set() - for change in change_log.records.all(): - _create_container_side_effects_for_draft_change( - change, - processed_entity_ids=processed_entity_ids, + for original_change in change_log.records.all(): + affected_by_original_change = branch_cls.objects.filter( + version__dependencies=original_change.entity ) + changes_and_affected = [ + (original_change, current) for current in affected_by_original_change + ] + # These are the Published or Draft objects where we need to repoint the + # log_record (publish_log_record or draft_change_log_record) to point to + # the side-effect changes, e.g. the DraftChangeLogRecord that says, + # "This Unit's version stayed the same, but its dependency hash changed + # because a child Component's draft version was changed." We gather them + # all up in a list so we can do a bulk_update on them. + branch_objs_to_update_with_side_effects = [] + + while changes_and_affected: + change, affected = changes_and_affected.pop() + change_log_param = {} + if branch_cls == Draft: + change_log_param['draft_change_log'] = change.draft_change_log # type: ignore[union-attr] + elif branch_cls == Published: + change_log_param['publish_log'] = change.publish_log # type: ignore[union-attr] + + # Example: If the original_change is a DraftChangeLogRecord that + # represents editing a Component, the side_effect_change is the + # DraftChangeLogRecord that represents the fact that the containing + # Unit was also altered (even if the Unit version doesn't change). + side_effect_change, _created = change_record_cls.objects.get_or_create( + **change_log_param, + entity_id=affected.entity_id, + defaults={ + # If a change record already exists because the affected + # entity was separately modified, then we don't touch the + # old/new version entries. But if we're creating this change + # record as a pure side-effect, then we use the (old_version + # == new_version) convention to indicate that. + 'old_version_id': affected.version_id, + 'new_version_id': affected.version_id, + } + ) + # Update the current branch pointer (Draft or Published) for this + # entity to point to the side_effect_change (if it's not already). + if branch_cls == Published: + published_obj = affected.entity.published + if published_obj.publish_log_record != side_effect_change: + published_obj.publish_log_record = side_effect_change + branch_objs_to_update_with_side_effects.append(published_obj) + elif branch_cls == Draft: + draft_obj = affected.entity.draft + if draft_obj.draft_log_record != side_effect_change: + draft_obj.draft_log_record = side_effect_change + branch_objs_to_update_with_side_effects.append(draft_obj) + + # Create a new side effect (DraftSideEffect or PublishSideEffect) to + # record the relationship between the ``change`` and the + # corresponding ``side_effect_change``. We'll do this regardless of + # whether we created the ``side_effect_change`` or just pulled back + # an existing one. This addresses two things: + # + # 1. A change in multiple dependencies can generate multiple + # side effects that point to the same change log record, i.e. + # multiple changes can cause the same ``effect``. + # Example: Two draft components in a Unit are changed. Two + # DraftSideEffects will be created and point to the same Unit + # DraftChangeLogRecord. + # 2. A entity and its dependency can change at the same time. + # Example: If a Unit has a Component, and both the Unit and + # Component are edited in the same DraftChangeLog, then the Unit + # has changed in both ways (the Unit's internal metadata as well + # as the new version of the child component). The version of the + # Unit will be incremented, but we'll also create the + # DraftSideEffect. + side_effect_cls.objects.get_or_create( + cause=change, + effect=side_effect_change, + ) -def _create_container_side_effects_for_draft_change( - original_change: DraftChangeLogRecord, - processed_entity_ids: set | None = None -): - """ - Given a draft change, add side effects for all affected containers. + # Now we find the next layer up by looking at Drafts or Published + # that have ``affected.entity`` as a dependency. + next_layer_of_affected = branch_cls.objects.filter( + version__dependencies=affected.entity + ) + + # Make sure we never re-add the change we just processed when we + # queue up the next layer. + processed_entity_ids.add(change.entity_id) + + changes_and_affected.extend( + (side_effect_change, affected) + for affected in next_layer_of_affected + if affected.entity_id not in processed_entity_ids + ) + + branch_cls.objects.bulk_update( + branch_objs_to_update_with_side_effects, # type: ignore[arg-type] + [log_record_rel], + ) - This should only be run after the DraftChangeLogRecord has been otherwise - fully written out. We want to avoid the scenario where we create a - side-effect that a Component change affects a Unit if the Unit version is - also changed (maybe even deleted) in the same DraftChangeLog. + update_dependencies_hash_digests_for_log(change_log) - The `processed_entity_ids` set holds the entity IDs that we've already - calculated side-effects for. This is to save us from recalculating side- - effects for the same ancestor relationships over and over again. So if we're - calling this function in a loop for all the Components in a Unit, we won't - be recalculating the Unit's side-effect on its Subsection, and its - Subsection's side-effect on its Section. - TODO: This could get very expensive with the get_containers_with_entity - calls. We should measure the impact of this. +def update_dependencies_hash_digests_for_log( + change_log: DraftChangeLog | PublishLog, + backfill: bool = False, +) -> None: """ - if processed_entity_ids is None: - # An optimization, but also a guard against infinite side-effect loops. - processed_entity_ids = set() + Update dependencies_hash_digest for Drafts or Published in a change log. - changes_and_containers = [ - (original_change, container) - for container - in get_containers_with_entity(original_change.entity_id, ignore_pinned=True) - ] - while changes_and_containers: - change, container = changes_and_containers.pop() - - # If the container is not already in the DraftChangeLog, we need to - # add it. Since it's being caused as a DraftSideEffect, we're going - # add it with the old_version == new_version convention. - container_draft_version_pk = container.versioning.draft.pk - container_change, _created = DraftChangeLogRecord.objects.get_or_create( - draft_change_log=change.draft_change_log, - entity_id=container.pk, - defaults={ - 'old_version_id': container_draft_version_pk, - 'new_version_id': container_draft_version_pk - } + All the data for Draft/Published, DraftChangeLog/PublishLog, and + DraftChangeLogRecord/PublishLogRecord have been set at this point *except* + the dependencies_hash_digest of DraftChangeLogRecord/PublishLogRecord. Those + log records are newly created at this point, so dependencies_hash_digest are + set to their default values. + + Args: + change_log: A DraftChangeLog or PublishLog that already has all + side-effects added to it. The Draft and Published models should + already be updated to point to the post-change versions. + backfill: If this is true, we will not trust the hash values stored on + log records outside of our log, i.e. things that we would normally + expect to be pre-calculated. This will be important for the initial + data migration. + """ + if isinstance(change_log, DraftChangeLog): + branch = "draft" + log_record_relation = "draft_log_record" + record_cls = DraftChangeLogRecord + elif isinstance(change_log, PublishLog): + branch = "published" + log_record_relation = "publish_log_record" + record_cls = PublishLogRecord # type: ignore[assignment] + else: + raise TypeError( + f"expected DraftChangeLog or PublishLog, not {type(change_log)}" ) - # Mark that change in the current loop has the side effect of changing - # the parent container. We'll do this regardless of whether the - # container version itself also changed. If a Unit has a Component and - # both the Unit and Component have their versions incremented, then the - # Unit has changed in both ways (the Unit's internal metadata as well as - # the new version of the child component). - DraftSideEffect.objects.get_or_create(cause=change, effect=container_change) - processed_entity_ids.add(change.entity_id) - - # Now we find the next layer up of containers. So if the originally - # passed in publishable_entity_id was for a Component, then the - # ``container`` we've been creating the side effect for in this loop - # is the Unit, and ``parents_of_container`` would be any Sequences - # that contain the Unit. - parents_of_container = get_containers_with_entity(container.pk, ignore_pinned=True) - - changes_and_containers.extend( - (container_change, container_parent) - for container_parent in parents_of_container - if container_parent.pk not in processed_entity_ids + dependencies_prefetch = Prefetch( + "new_version__dependencies", + queryset=PublishableEntity.objects + .select_related( + f"{branch}__version", + f"{branch}__{log_record_relation}", + ) + .order_by(f"{branch}__version__uuid") + ) + changed_records: QuerySet[DraftChangeLogRecord] | QuerySet[PublishLogRecord] + changed_records = ( + change_log.records + .select_related("new_version", f"entity__{branch}") + .prefetch_related(dependencies_prefetch) + ) + + record_ids_to_hash_digests: dict[int, str | None] = {} + record_ids_to_live_deps: dict[int, list[PublishableEntity]] = {} + records_that_need_hashes = [] + + for record in changed_records: + # This is a soft-deletion, so the dependency hash is default/blank. We + # set this value in our record_ids_to_hash_digests cache, but we don't + # need to write it to the database because it's just the default value. + if record.new_version is None: + record_ids_to_hash_digests[record.id] = '' + continue + + # Now check to see if the new version has "live" dependencies, i.e. + # dependencies that have not been deleted. + deps = list( + entity for entity in record.new_version.dependencies.all() + if hasattr(entity, branch) and getattr(entity, branch).version ) + # If there are no live dependencies, this log record also gets the + # default/blank value. + if not deps: + record_ids_to_hash_digests[record.id] = '' + continue + + # If we've gotten this far, it means that this record has dependencies + # and does need to get a hash computed for it. + records_that_need_hashes.append(record) + record_ids_to_live_deps[record.id] = deps + + if backfill: + untrusted_record_id_set = None + else: + untrusted_record_id_set = set(rec.id for rec in records_that_need_hashes) + + for record in records_that_need_hashes: + record.dependencies_hash_digest = hash_for_log_record( + record, + record_ids_to_hash_digests, + record_ids_to_live_deps, + untrusted_record_id_set, + ) + + _bulk_update_hashes(record_cls, records_that_need_hashes) + + +def _bulk_update_hashes(model_cls, records): + """ + bulk_update using the model class (PublishLogRecord or DraftChangeLogRecord) + + The only reason this function exists is because mypy 1.18.2 throws an + exception in validate_bulk_update() during "make quality" checks otherwise + (though curiously enough, not when that same version of mypy is called + directly). Given that I'm writing this on the night before the Ulmo release + cut, I'm not really interested in tracking down the underlying issue. + + The lack of type annotations on this function is very intentional. + """ + model_cls.objects.bulk_update(records, ['dependencies_hash_digest']) + + +def hash_for_log_record( + record: DraftChangeLogRecord | PublishLogRecord, + record_ids_to_hash_digests: dict, + record_ids_to_live_deps: dict, + untrusted_record_id_set: set | None, +) -> str: + """ + The hash digest for a given change log record. + + Note that this code is a little convoluted because we're working hard to + minimize the number of database requests. All the data we really need could + be derived from querying various relations off the record that's passed in + as the first parameter, but at a far higher cost. + + The hash calculated here will be used for the dependencies_hash_digest + attribute of DraftChangeLogRecord and PublishLogRecord. The hash is intended + to calculate the currently "live" (current draft or published) state of all + dependencies (and transitive dependencies) of the PublishableEntityVersion + pointed to by DraftChangeLogRecord.new_version/PublishLogRecord.new_version. + + The common case we have at the moment is when a container type like a Unit + has unpinned child Components as dependencies. In the data model, those + dependency relationships are represented by the "dependencies" M:M relation + on PublishableEntityVersion. Since the Unit version's references to its + child Components are unpinned, the draft Unit is always pointing to the + latest draft versions of those Components and the published Unit is always + pointing to the latest published versions of those Components. + + This means that the total draft or published state of any PublishableEntity + depends on the combination of: + + 1. The definition of the current draft/published version of that entity. + Example: Version 1 of a Unit would define that it had children [C1, C2]. + Version 2 of the same Unit might have children [C1, C2, C3]. + 2. The current draft/published versions of all dependencies. Example: What + are the current draft and published versions of C1, C2, and C3. + + This is why it makes sense to capture in a log record, since + PublishLogRecords or DraftChangeLogRecords are created whenever one of the + above two things changes. + + Here are the possible scenarios, including edge cases: + + EntityVersions with no dependencies + If record.new_version has no dependencies, dependencies_hash_digest is + set to the default value of ''. This will be the most common case. + + EntityVersions with dependencies + If an EntityVersion has dependencies, then its draft/published state + hash is based on the concatenation of, for each non-deleted dependency: + (i) the dependency's draft/published EntityVersion primary key, and + (ii) the dependency's own draft/published state hash, recursively re- + calculated if necessary. + + Soft-deletions + If the record.new_version is None, that means we've just soft-deleted + something (or published the soft-delete of something). We adopt the + convention that if something is soft-deleted, its dependencies_hash_digest + is reset to the default value of ''. This is not strictly necessary for + the recursive hash calculation, but deleted entities will not have their + hash updated even as their non-deleted dependencies are updated underneath + them, so we set to '' to avoid falsely implying that the deleted entity's + dep hash is up to date. + + EntityVersions with soft-deleted dependencies + A soft-deleted dependency isn't counted (it's as if the dependency were + removed). If all of an EntityVersion's dependencies are soft-deleted, + then it will go back to having to having the default blank string for its + dependencies_hash_digest. + """ + # Case #1: We've already computed this, or it was bootstrapped for us in the + # cache because the record is a deletion or doesn't have dependencies. + if record.id in record_ids_to_hash_digests: + return record_ids_to_hash_digests[record.id] + + # Case #2: The log_record is a dependency of something that was affected by + # a change, but the dependency itself did not change in any way (neither + # directly, nor as a side-effect). + # + # Example: A Unit has two Components. One of the Components changed, forcing + # us to recalculate the dependencies_hash_digest for that Unit. Doing that + # recalculation requires us to fetch the dependencies_hash_digest of the + # unchanged child Component as well. + # + # If we aren't given an explicit untrusted_record_id_set, it means we can't + # trust anything. This would happen when we're bootstrapping things with an + # initial data migration. + if (untrusted_record_id_set is not None) and (record.id not in untrusted_record_id_set): + return record.dependencies_hash_digest + + # Normal recursive case starts here: + if isinstance(record, DraftChangeLogRecord): + branch = "draft" + elif isinstance(record, PublishLogRecord): + branch = "published" + else: + raise TypeError( + f"expected DraftChangeLogRecord or PublishLogRecord, not {type(record)}" + ) + + # This is extra work that only happens in case of a backfill, where we might + # need to compute dependency hashes for things outside of our log (because + # we don't trust them). + if record.id not in record_ids_to_live_deps: + if record.new_version is None: + record_ids_to_hash_digests[record.id] = '' + return '' + deps = list( + entity for entity in record.new_version.dependencies.all() + if hasattr(entity, branch) and getattr(entity, branch).version + ) + # If there are no live dependencies, this log record also gets the + # default/blank value. + if not deps: + record_ids_to_hash_digests[record.id] = '' + return '' + + record_ids_to_live_deps[record.id] = deps + # End special handling for backfill. + + # Begin normal + dependencies: list[PublishableEntity] = sorted( + record_ids_to_live_deps[record.id], + key=lambda entity: getattr(entity, branch).log_record.new_version_id, + ) + dep_state_entries = [] + for dep_entity in dependencies: + new_version_id = getattr(dep_entity, branch).log_record.new_version_id + hash_digest = hash_for_log_record( + getattr(dep_entity, branch).log_record, + record_ids_to_hash_digests, + record_ids_to_live_deps, + untrusted_record_id_set, + ) + dep_state_entries.append(f"{new_version_id}:{hash_digest}") + summary_text = "\n".join(dep_state_entries) + + digest = create_hash_digest(summary_text.encode(), num_bytes=4) + record_ids_to_hash_digests[record.id] = digest + + return digest + def soft_delete_draft(publishable_entity_id: int, /, deleted_by: int | None = None) -> None: """ @@ -974,7 +1443,6 @@ def create_entity_list_with_rows( raise ValidationError("Container entity versions must belong to the specified entity.") with atomic(savepoint=False): - entity_list = create_entity_list() EntityListRow.objects.bulk_create( [ @@ -1012,6 +1480,11 @@ def _create_container_version( title=title, created=created, created_by=created_by, + dependencies=[ + entity_row.entity_id + for entity_row in entity_list.rows + if entity_row.is_unpinned() + ] ) container_version = container_version_cls.objects.create( publishable_entity_version=publishable_entity_version, @@ -1378,6 +1851,11 @@ def contains_unpublished_changes(container_id: int) -> bool: [ πŸ›‘ UNSTABLE ] Check recursively if a container has any unpublished changes. + Note: I've preserved the API signature for now, but we probably eventually + want to make a more general function that operates on PublishableEntities + and dependencies, once we introduce those with courses and their files, + grading policies, etc. + Note: unlike this method, the similar-sounding `container.versioning.has_unpublished_changes` property only reports if the container itself has unpublished changes, not @@ -1386,50 +1864,37 @@ def contains_unpublished_changes(container_id: int) -> bool: that's in the container, it will be `False`. This method will return `True` in either case. """ - # This is similar to 'get_container(container.container_id)' but pre-loads more data. - container = Container.objects.select_related( - "publishable_entity__draft__version__containerversion__entity_list", - ).get(pk=container_id) - + container = ( + Container.objects + .select_related('publishable_entity__draft__draft_log_record') + .select_related('publishable_entity__published__publish_log_record') + .get(pk=container_id) + ) if container.versioning.has_unpublished_changes: return True - # We only care about children that are un-pinned, since published changes to pinned children don't matter - entity_list = getattr(container.versioning.draft, "entity_list", None) - if entity_list is None: - # This container has been soft-deleted, so it has no children. + draft = container.publishable_entity.draft + published = container.publishable_entity.published + + # Edge case: A container that was created and then immediately soft-deleted + # does not contain any unpublished changes. + if draft is None and published is None: return False - # This is a naive and inefficient implementation but should be correct. - # TODO: Once we have expanded the containers system to support multiple levels (not just Units and Components but - # also subsections and sections) and we have an expanded test suite for correctness, then we can optimize. - # We will likely change to a tracking-based approach rather than a "scan for changes" based approach. - for row in entity_list.entitylistrow_set.filter(entity_version=None).select_related( - "entity__container", - "entity__draft__version", - "entity__published__version", - ): - try: - child_container = row.entity.container - except Container.DoesNotExist: - child_container = None - if child_container: - # This is itself a container - check recursively: - if contains_unpublished_changes(child_container.pk): - return True - else: - # This is not a container: - draft_pk = row.entity.draft.version_id if row.entity.draft else None - published_pk = row.entity.published.version_id if hasattr(row.entity, "published") else None - if draft_pk != published_pk: - return True - return False + # The dependencies_hash_digest captures the state of all descendants, so we + # can do this quick comparison instead of iterating through layers of + # containers. + draft_version_hash_digest = draft.log_record.dependencies_hash_digest + published_version_hash_digest = published.log_record.dependencies_hash_digest + + return draft_version_hash_digest != published_version_hash_digest def get_containers_with_entity( publishable_entity_pk: int, *, ignore_pinned=False, + published=False, ) -> QuerySet[Container]: """ [ πŸ›‘ UNSTABLE ] @@ -1442,20 +1907,31 @@ def get_containers_with_entity( publishable_entity_pk: The ID of the PublishableEntity to search for. ignore_pinned: if true, ignore any pinned references to the entity. """ + branch = "published" if published else "draft" if ignore_pinned: - qs = Container.objects.filter( - # Note: these two conditions must be in the same filter() call, or the query won't be correct. - publishable_entity__draft__version__containerversion__entity_list__entitylistrow__entity_id=publishable_entity_pk, # pylint: disable=line-too-long # noqa: E501 - publishable_entity__draft__version__containerversion__entity_list__entitylistrow__entity_version_id=None, # pylint: disable=line-too-long # noqa: E501 - ) + filter_dict = { + # Note: these two conditions must be in the same filter() call, + # or the query won't be correct. + ( + f"publishable_entity__{branch}__version__" + "containerversion__entity_list__entitylistrow__entity_id" + ): publishable_entity_pk, + ( + f"publishable_entity__{branch}__version__" + "containerversion__entity_list__entitylistrow__entity_version_id" + ): None, + } + qs = Container.objects.filter(**filter_dict) else: - qs = Container.objects.filter( - publishable_entity__draft__version__containerversion__entity_list__entitylistrow__entity_id=publishable_entity_pk, # pylint: disable=line-too-long # noqa: E501 - ) - return qs.select_related( - "publishable_entity__draft__version__containerversion", - "publishable_entity__published__version__containerversion", - ).order_by("pk").distinct() # Ordering is mostly for consistent test cases. + filter_dict = { + ( + f"publishable_entity__{branch}__version__" + "containerversion__entity_list__entitylistrow__entity_id" + ): publishable_entity_pk + } + qs = Container.objects.filter(**filter_dict) + + return qs.order_by("pk").distinct() # Ordering is mostly for consistent test cases. def get_container_children_count( @@ -1539,6 +2015,6 @@ def bulk_draft_changes_for( changed_at=changed_at, changed_by=changed_by, exit_callbacks=[ - _create_container_side_effects_for_draft_change_log, + _create_side_effects_for_change_log, ] ) diff --git a/openedx_learning/apps/authoring/publishing/migrations/0009_dependencies_and_hashing.py b/openedx_learning/apps/authoring/publishing/migrations/0009_dependencies_and_hashing.py new file mode 100644 index 00000000..bbed19bd --- /dev/null +++ b/openedx_learning/apps/authoring/publishing/migrations/0009_dependencies_and_hashing.py @@ -0,0 +1,62 @@ +# Generated by Django 5.2.7 on 2025-10-29 17:59 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('oel_publishing', '0008_alter_draftchangelogrecord_options_and_more'), + ] + + operations = [ + migrations.AddField( + model_name='draft', + name='draft_log_record', + field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, to='oel_publishing.draftchangelogrecord'), + ), + migrations.AddField( + model_name='draftchangelogrecord', + name='dependencies_hash_digest', + field=models.CharField(blank=True, default='', editable=False, max_length=8), + ), + migrations.AddField( + model_name='publishlogrecord', + name='dependencies_hash_digest', + field=models.CharField(blank=True, default='', editable=False, max_length=8), + ), + migrations.CreateModel( + name='PublishableEntityVersionDependency', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('referenced_entity', models.ForeignKey(on_delete=django.db.models.deletion.RESTRICT, to='oel_publishing.publishableentity')), + ('referring_version', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='oel_publishing.publishableentityversion')), + ], + ), + migrations.AddField( + model_name='publishableentityversion', + name='dependencies', + field=models.ManyToManyField(related_name='affects', through='oel_publishing.PublishableEntityVersionDependency', to='oel_publishing.publishableentity'), + ), + migrations.CreateModel( + name='PublishSideEffect', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('cause', models.ForeignKey(on_delete=django.db.models.deletion.RESTRICT, related_name='causes', to='oel_publishing.publishlogrecord')), + ('effect', models.ForeignKey(on_delete=django.db.models.deletion.RESTRICT, related_name='affected_by', to='oel_publishing.publishlogrecord')), + ], + options={ + 'verbose_name': 'Publish Side Effect', + 'verbose_name_plural': 'Publish Side Effects', + }, + ), + migrations.AddConstraint( + model_name='publishableentityversiondependency', + constraint=models.UniqueConstraint(fields=('referring_version', 'referenced_entity'), name='oel_pevd_uniq_rv_re'), + ), + migrations.AddConstraint( + model_name='publishsideeffect', + constraint=models.UniqueConstraint(fields=('cause', 'effect'), name='oel_pub_pse_uniq_c_e'), + ), + ] diff --git a/openedx_learning/apps/authoring/publishing/migrations/0010_backfill_dependencies.py b/openedx_learning/apps/authoring/publishing/migrations/0010_backfill_dependencies.py new file mode 100644 index 00000000..c274a401 --- /dev/null +++ b/openedx_learning/apps/authoring/publishing/migrations/0010_backfill_dependencies.py @@ -0,0 +1,149 @@ +""" +Backfill PublishableEntityVersionDependency entries based on ContainerVersions. + +We're introducing a lower-level publishing concept of a dependency that will be +used by Containers, but this means we have to backfill that dependency info for +existing Containers in the system. +""" +from django.db import migrations +from django.db.models import F + + +def create_backfill(apps, schema_editor): + """ + Create dependency entries and update dep hashes for Draft and Published. + """ + _create_dependencies(apps) + _update_drafts(apps) + _update_draft_dependencies_hashes(apps) + _update_published_dependencies_hashes(apps) + + +def _create_dependencies(apps): + """ + Populate the PublishableEntityVersion.dependencies relation. + + The only ones we should have in the system at this point are the ones from + containers, so we query ContainerVersion for that. + """ + PublishableEntityVersionDependency = apps.get_model( + "oel_publishing", "PublishableEntityVersionDependency" + ) + ContainerVersion = apps.get_model("oel_publishing", "ContainerVersion") + + for container_version in ContainerVersion.objects.all(): + # child_entity_ids is a set to de-dupe. This doesn't handle pinned + # child references yet, but you can't actually make those in a real + # library yet, so we shouldn't have that data lying around to migrate. + child_entity_ids = set( + container_version + .entity_list + .entitylistrow_set + .all() + .values_list("entity_id", flat=True) + ) + PublishableEntityVersionDependency.objects.bulk_create( + [ + PublishableEntityVersionDependency( + referring_version_id=container_version.pk, + referenced_entity_id=entity_id + ) + for entity_id in child_entity_ids + ], + ignore_conflicts=True, + ) + + +def _update_drafts(apps): + """ + Update Draft entries to point to their most recent DraftLogRecord. + + This is slow and expensive. + """ + Draft = apps.get_model("oel_publishing", "Draft") + DraftChangeLogRecord = apps.get_model("oel_publishing", "DraftChangeLogRecord") + for draft in Draft.objects.all(): + draft_log_record = ( + # Find the most recent DraftChangeLogRecord related to this Draft, + DraftChangeLogRecord.objects + .filter(entity_id=draft.pk) + .order_by('-pk') + .first() + ) + draft.draft_log_record = draft_log_record + draft.save() + + +def _update_draft_dependencies_hashes(apps): + """ + Update the dependency_hash_digest for all DraftChangeLogRecords. + + Backfill dependency state hashes. The important thing here is that things + without dependencies will have the default (blank) state hash, so we only + need to query for Draft entries for Containers. + + We are only backfilling the current DraftChangeLogRecords pointed to by the + Draft entries now. We are not backfilling all historical + DraftChangeLogRecords. Full historical reconstruction is probably possible, + but it's not really worth the cost and complexity. + """ + from ..api import update_dependencies_hash_digests_for_log + from ..models import DraftChangeLog + + # All DraftChangeLogs that have records that are pointed to by the current + # Draft and have a possibility of having dependencies. + change_logs = DraftChangeLog.objects.filter( + pk=F('records__entity__draft__draft_log_record__draft_change_log'), + records__entity__draft__version__isnull=False, + records__entity__container__isnull=False, + ).distinct() + for change_log in change_logs: + update_dependencies_hash_digests_for_log(change_log, backfill=True) + +def _update_published_dependencies_hashes(apps): + """ + Update all container Published.dependencies_hash_digest + + Backfill dependency state hashes. The important thing here is that things + without dependencies will have the default (blank) state hash, so we only + need to query for Published entries for Containers. + """ + from ..api import update_dependencies_hash_digests_for_log + from ..models import PublishLog + + # All PublishLogs that have records that are pointed to by the current + # Published and have a possibility of having dependencies. + change_logs = PublishLog.objects.filter( + pk=F('records__entity__published__publish_log_record__publish_log'), + records__entity__published__version__isnull=False, + records__entity__container__isnull=False, + ).distinct() + for change_log in change_logs: + update_dependencies_hash_digests_for_log(change_log, backfill=True) + +def remove_backfill(apps, schema_editor): + """ + Reset all dep hash values to default ('') and remove dependencies. + """ + Draft = apps.get_model("oel_publishing", "Draft") + DraftChangeLogRecord = apps.get_model("oel_publishing", "DraftChangeLogRecord") + PublishLogRecord = apps.get_model("oel_publishing", "PublishLogRecord") + PublishableEntityVersionDependency = apps.get_model( + "oel_publishing", "PublishableEntityVersionDependency" + ) + + PublishLogRecord.objects.all().update(dependencies_hash_digest='') + DraftChangeLogRecord.objects.all().update(dependencies_hash_digest='') + PublishableEntityVersionDependency.objects.all().delete() + Draft.objects.all().update(draft_log_record=None) + + +class Migration(migrations.Migration): + + dependencies = [ + ('oel_publishing', '0009_dependencies_and_hashing'), + ] + + operations = [ + migrations.RunPython(create_backfill, reverse_code=remove_backfill) + ] diff --git a/openedx_learning/apps/authoring/publishing/models/__init__.py b/openedx_learning/apps/authoring/publishing/models/__init__.py index 7e13c426..32a73b21 100644 --- a/openedx_learning/apps/authoring/publishing/models/__init__.py +++ b/openedx_learning/apps/authoring/publishing/models/__init__.py @@ -17,11 +17,12 @@ from .draft_log import Draft, DraftChangeLog, DraftChangeLogRecord, DraftSideEffect from .entity_list import EntityList, EntityListRow from .learning_package import LearningPackage -from .publish_log import Published, PublishLog, PublishLogRecord +from .publish_log import Published, PublishLog, PublishLogRecord, PublishSideEffect from .publishable_entity import ( PublishableContentModelRegistry, PublishableEntity, PublishableEntityMixin, PublishableEntityVersion, + PublishableEntityVersionDependency, PublishableEntityVersionMixin, ) diff --git a/openedx_learning/apps/authoring/publishing/models/draft_log.py b/openedx_learning/apps/authoring/publishing/models/draft_log.py index 6e85a6ec..ed3eb873 100644 --- a/openedx_learning/apps/authoring/publishing/models/draft_log.py +++ b/openedx_learning/apps/authoring/publishing/models/draft_log.py @@ -3,9 +3,10 @@ """ from django.conf import settings from django.db import models +from django.db.models import F, Q from django.utils.translation import gettext_lazy as _ -from openedx_learning.lib.fields import immutable_uuid_field, manual_date_time_field +from openedx_learning.lib.fields import hash_field, immutable_uuid_field, manual_date_time_field from .learning_package import LearningPackage from .publishable_entity import PublishableEntity, PublishableEntityVersion @@ -55,6 +56,60 @@ class Draft(models.Model): null=True, blank=True, ) + # Note: this is actually a 1:1 relation in practice, but I'm keeping the + # definition more consistent with the Published model, which has an fkey + # to PublishLogRecord. Unlike PublishLogRecord, this fkey is a late + # addition to this data model, so we have to allow null values for the + # initial migration. But making this nullable also has another advantage, + # in that it allows us to set the draft_log_record to the most recent change + # while inside a bulk_draft_changes_for operation, and then delete that log + # record if it is undone in the same bulk operation. + draft_log_record = models.ForeignKey( + "DraftChangeLogRecord", + on_delete=models.SET_NULL, + null=True, + ) + + @property + def log_record(self): + return self.draft_log_record + + class DraftQuerySet(models.QuerySet): + """ + Custom QuerySet/Manager so we can chain common queries. + """ + + def with_unpublished_changes(self): + """ + Drafts with versions that are different from what is Published. + + This will not return Drafts that have unpublished changes in their + dependencies. Example: A Unit is published with a Component as one + of its child. Then someone modifies the draft of the Component. If + both the Unit and the Component Drafts were part of the queryset, + this method would return only the changed Component, and not the + Unit. (We can add this as an optional flag later if we want.) + """ + return ( + self.select_related("entity__published__version") + + # Exclude where draft and published versions are the same + .exclude(entity__published__version_id=F("version_id")) + + # Account for soft-deletes: + # NULL != NULL in SQL, so simply excluding entities where + # the Draft and Published versions match will not catch the + # case where a soft-delete has been published (i.e. both the + # Draft and Published versions are NULL). We need to + # explicitly check for that case instead, or else we will + # re-publish the same soft-deletes over and over again. + .exclude( + Q(version__isnull=True) & + Q(entity__published__version__isnull=True) + ) + ) + + objects = DraftQuerySet.as_manager() class DraftChangeLog(models.Model): @@ -202,6 +257,22 @@ class DraftChangeLogRecord(models.Model): PublishableEntityVersion, on_delete=models.RESTRICT, null=True, blank=True ) + # The dependencies_hash_digest is used when the version alone isn't enough + # to let us know the full draft state of an entity. This happens any time a + # Draft version has dependencies (see the PublishableEntityVersionDependency + # model), because changes in those dependencies will cause changes to the + # state of the Draft. The main example of this is containers, where changing + # an unpinned child affects the state of the parent container, even if that + # container's definition (and thus version) does not change. + # + # If a Draft has no dependencies, then its entire state is captured by its + # version, and the dependencies_hash_digest is blank. (Blank is slightly + # more convenient for database comparisons than NULL.) + # + # Note: There is an equivalent of this field in the Published model and the + # the values may drift away from each other. + dependencies_hash_digest = hash_field(blank=True, default='', max_length=8) + class Meta: constraints = [ # A PublishableEntity can have only one DraftLogRecord per DraftLog. @@ -228,6 +299,11 @@ class Meta: verbose_name = _("Draft Change Log Record") verbose_name_plural = _("Draft Change Log Records") + def __str__(self): + old_version_num = None if self.old_version is None else self.old_version.version_num + new_version_num = None if self.new_version is None else self.new_version.version_num + return f"DraftChangeLogRecord: {self.entity} ({old_version_num} -> {new_version_num})" + class DraftSideEffect(models.Model): """ diff --git a/openedx_learning/apps/authoring/publishing/models/entity_list.py b/openedx_learning/apps/authoring/publishing/models/entity_list.py index 19dab09a..252ea2e3 100644 --- a/openedx_learning/apps/authoring/publishing/models/entity_list.py +++ b/openedx_learning/apps/authoring/publishing/models/entity_list.py @@ -1,6 +1,8 @@ """ Entity List models """ +from functools import cached_property + from django.db import models from .publishable_entity import PublishableEntity, PublishableEntityVersion @@ -16,6 +18,17 @@ class EntityList(models.Model): anonymous in a sense–they're pointed to by ContainerVersions and other models, rather than being looked up by their own identifiers. """ + @cached_property + def rows(self): + """ + Convenience method to iterate rows. + + I'd normally make this the reverse lookup name for the EntityListRow -> + EntityList foreign key relation, but we already have references to + entitylistrow_set in various places, and I thought this would be better + than breaking compatibility. + """ + return self.entitylistrow_set.order_by("order_num") class EntityListRow(models.Model): @@ -59,6 +72,12 @@ class EntityListRow(models.Model): related_name="+", # Do we need the reverse relation? ) + def is_pinned(self): + return self.entity_version_id is not None + + def is_unpinned(self): + return self.entity_version_id is None + class Meta: ordering = ["order_num"] constraints = [ diff --git a/openedx_learning/apps/authoring/publishing/models/publish_log.py b/openedx_learning/apps/authoring/publishing/models/publish_log.py index 037f1b8b..d3bc1241 100644 --- a/openedx_learning/apps/authoring/publishing/models/publish_log.py +++ b/openedx_learning/apps/authoring/publishing/models/publish_log.py @@ -3,8 +3,14 @@ """ from django.conf import settings from django.db import models +from django.utils.translation import gettext_lazy as _ -from openedx_learning.lib.fields import case_insensitive_char_field, immutable_uuid_field, manual_date_time_field +from openedx_learning.lib.fields import ( + case_insensitive_char_field, + hash_field, + immutable_uuid_field, + manual_date_time_field, +) from .learning_package import LearningPackage from .publishable_entity import PublishableEntity, PublishableEntityVersion @@ -61,6 +67,14 @@ class PublishLogRecord(models.Model): To revert a publish, we would make a new publish that swaps ``old_version`` and ``new_version`` field values. + + If the old_version and new_version of a PublishLogRecord match, it means + that the definition of the entity itself did not change (i.e. no new + PublishableEntityVersion was created), but something else was published that + had the side-effect of changing the published state of this entity. For + instance, if a Unit has unpinned references to its child Components (which + it almost always will), then publishing one of those Components will alter + the published state of the Unit, even if the UnitVersion does not change. """ publish_log = models.ForeignKey( @@ -80,6 +94,23 @@ class PublishLogRecord(models.Model): PublishableEntityVersion, on_delete=models.RESTRICT, null=True, blank=True ) + # The dependencies_hash_digest is used when the version alone isn't enough + # to let us know the full draft state of an entity. This happens any time a + # Published version has dependencies (see the + # PublishableEntityVersionDependency model), because changes in those + # dependencies will cause changes to the state of the Draft. The main + # example of this is containers, where changing an unpinned child affects + # the state of the parent container, even if that container's definition + # (and thus version) does not change. + # + # If a Published version has no dependencies, then its entire state is + # captured by its version, and the dependencies_hash_digest is blank. (Blank + # is slightly more convenient for database comparisons than NULL.) + # + # Note: There is an equivalent of this field in the Draft model and the + # the values may drift away from each other. + dependencies_hash_digest = hash_field(blank=True, default='', max_length=8) + class Meta: constraints = [ # A Publishable can have only one PublishLogRecord per PublishLog. @@ -105,6 +136,11 @@ class Meta: verbose_name = "Publish Log Record" verbose_name_plural = "Publish Log Records" + def __str__(self): + old_version_num = None if self.old_version is None else self.old_version.version_num + new_version_num = None if self.new_version is None else self.new_version.version_num + return f"PublishLogRecord: {self.entity} ({old_version_num} -> {new_version_num})" + class Published(models.Model): """ @@ -145,6 +181,56 @@ class Published(models.Model): on_delete=models.RESTRICT, ) + @property + def log_record(self): + return self.publish_log_record + class Meta: verbose_name = "Published Entity" verbose_name_plural = "Published Entities" + + +class PublishSideEffect(models.Model): + """ + Model to track when a change in one Published entity affects others. + + Our first use case for this is that changes involving child components are + thought to affect parent Units, even if the parent's version doesn't change. + + Side-effects are recorded in a collapsed form that only captures one level. + So if Components C1 and C2 are both published and they are part of Unit U1, + which is in turn a part of Subsection SS1, then the PublishSideEffect + entries are:: + + (C1, U1) + (C2, U1) + (U1, SS1) + + We do not keep entries for (C1, SS1) or (C2, SS1). This is to make the model + simpler, so we don't have to differentiate between direct side-effects and + transitive side-effects in the model. + + .. no_pii: + """ + cause = models.ForeignKey( + PublishLogRecord, + on_delete=models.RESTRICT, + related_name='causes', + ) + effect = models.ForeignKey( + PublishLogRecord, + on_delete=models.RESTRICT, + related_name='affected_by', + ) + + class Meta: + constraints = [ + # Duplicate entries for cause & effect are just redundant. This is + # here to guard against weird bugs that might introduce this state. + models.UniqueConstraint( + fields=["cause", "effect"], + name="oel_pub_pse_uniq_c_e", + ) + ] + verbose_name = _("Publish Side Effect") + verbose_name_plural = _("Publish Side Effects") diff --git a/openedx_learning/apps/authoring/publishing/models/publishable_entity.py b/openedx_learning/apps/authoring/publishing/models/publishable_entity.py index d967a343..176735a4 100644 --- a/openedx_learning/apps/authoring/publishing/models/publishable_entity.py +++ b/openedx_learning/apps/authoring/publishing/models/publishable_entity.py @@ -1,6 +1,8 @@ """ PublishableEntity model and PublishableEntityVersion + mixins """ +from __future__ import annotations + from datetime import datetime from functools import cached_property from typing import ClassVar, Self @@ -185,6 +187,8 @@ class PublishableEntityVersion(models.Model): connect them using a OneToOneField with primary_key=True. The easiest way to do this is to inherit from PublishableEntityVersionMixin. Be sure to treat these versioned models in your app as immutable as well. + + .. no_pii """ uuid = immutable_uuid_field() @@ -221,6 +225,14 @@ class PublishableEntityVersion(models.Model): blank=True, ) + dependencies: models.ManyToManyField[ + PublishableEntity, PublishableEntityVersionDependency + ] = models.ManyToManyField( + PublishableEntity, + through="PublishableEntityVersionDependency", + related_name="affects", + ) + def __str__(self): return f"{self.entity.key} @ v{self.version_num} - {self.title}" @@ -265,6 +277,42 @@ class Meta: verbose_name_plural = "Publishable Entity Versions" +class PublishableEntityVersionDependency(models.Model): + """ + Track the PublishableEntities that a PublishableEntityVersion depends on. + + For example, a partcular version of a Unit (U1.v1) might be defined to have + unpinned references to Components C1 and C2. That means that any changes in + C1 or C2 will affect U1.v1 via DraftSideEffects and PublishedSideEffects. We + say that C1 and C2 are dependencies of U1.v1. + + An important restriction is that a PublishableEntityVersion's list of + dependencies are defined when the version is created. It is not modified + after that. No matter what happens to C1 or C2 (e.g. edit, deletion, + un-deletion, reset-draft-version-to-published), they will always be + dependencies of U1.v1. + + If someone removes C2 from U1, then that requires creating a new version of + U1 (so U1.v2). + + This restriction is important because our ability to calculate and cache the + state of "this version of this publishable entity and all its dependencies + (children)" relies on this being true. + + .. no_pii + """ + referring_version = models.ForeignKey(PublishableEntityVersion, on_delete=models.CASCADE) + referenced_entity = models.ForeignKey(PublishableEntity, on_delete=models.RESTRICT) + + class Meta: + constraints = [ + models.UniqueConstraint( + fields=["referring_version", "referenced_entity"], + name="oel_pevd_uniq_rv_re", + ) + ] + + class PublishableEntityMixin(models.Model): """ Convenience mixin to link your models against PublishableEntity. diff --git a/openedx_learning/apps/authoring/units/api.py b/openedx_learning/apps/authoring/units/api.py index 3c5f4a81..ca9a2468 100644 --- a/openedx_learning/apps/authoring/units/api.py +++ b/openedx_learning/apps/authoring/units/api.py @@ -194,7 +194,7 @@ def create_unit_and_version( can_stand_alone: Set to False when created as part of containers """ entity_rows = _pub_entities_for_components(components) - with atomic(): + with atomic(savepoint=False): unit = create_unit( learning_package_id, key, diff --git a/openedx_learning/lib/fields.py b/openedx_learning/lib/fields.py index 99b7d525..c4167899 100644 --- a/openedx_learning/lib/fields.py +++ b/openedx_learning/lib/fields.py @@ -19,11 +19,12 @@ from .validators import validate_utc_datetime -def create_hash_digest(data_bytes: bytes) -> str: +def create_hash_digest(data_bytes: bytes, num_bytes=20) -> str: """ - Create a 40-byte, lower-case hex string representation of a hash digest. + Create a lower-case hex string representation of a hash digest. - The hash digest itself is 20-bytes using BLAKE2b. + The hash itself is 20-bytes by default, so 40 characters when we return it + as a hex-encoded string. We use BLAKE2b for the hashing algorithm. DON'T JUST MODIFY THIS HASH BEHAVIOR!!! We use hashing for de-duplication purposes. If this hash function ever changes, that deduplication will fail @@ -32,7 +33,7 @@ def create_hash_digest(data_bytes: bytes) -> str: If we want to change this representation one day, we should create a new function for that and do the appropriate data migration. """ - return hashlib.blake2b(data_bytes, digest_size=20).hexdigest() + return hashlib.blake2b(data_bytes, digest_size=num_bytes).hexdigest() def case_insensitive_char_field(**kwargs) -> MultiCollationCharField: @@ -123,7 +124,7 @@ def key_field(**kwargs) -> MultiCollationCharField: return case_sensitive_char_field(max_length=500, blank=False, **kwargs) -def hash_field() -> models.CharField: +def hash_field(**kwargs) -> models.CharField: """ Holds a hash digest meant to identify a piece of content. @@ -144,12 +145,13 @@ def hash_field() -> models.CharField: didn't seem worthwhile, particularly the possibility of case-sensitivity related bugs. """ - return models.CharField( - max_length=40, - blank=False, - null=False, - editable=False, - ) + default_kwargs = { + "max_length": 40, + "blank": False, + "null": False, + "editable": False, + } + return models.CharField(**(default_kwargs | kwargs)) def manual_date_time_field() -> models.DateTimeField: diff --git a/test_settings.py b/test_settings.py index a50efb17..c4b22926 100644 --- a/test_settings.py +++ b/test_settings.py @@ -26,6 +26,21 @@ def root(*args): } } +# If you provision the 'oel'@'%' with broad permissions on your MySQL instance, +# running the tests will auto-generate a database for running tests. This is +# slower than the default sqlite3 setup above, but it's sometimes helpful for +# finding things that only break in CI. +# +# DATABASES = { +# "default": { +# "ENGINE": "django.db.backends.mysql", +# "USER": "oel", +# "PASSWORD": "oel-test-pass", +# "HOST": "mysql", +# "PORT": "3306", +# } +# } + INSTALLED_APPS = [ "django.contrib.auth", "django.contrib.contenttypes", diff --git a/tests/openedx_learning/apps/authoring/publishing/test_api.py b/tests/openedx_learning/apps/authoring/publishing/test_api.py index a966ce4b..f4466717 100644 --- a/tests/openedx_learning/apps/authoring/publishing/test_api.py +++ b/tests/openedx_learning/apps/authoring/publishing/test_api.py @@ -20,6 +20,7 @@ DraftSideEffect, LearningPackage, PublishableEntity, + PublishLog, ) from openedx_learning.lib.test_utils import TestCase @@ -835,6 +836,108 @@ def test_multiple_draft_changes_all_cancel_out(self) -> None: assert DraftChangeLog.objects.all().count() == 1 +class PublishLogTestCase(TestCase): + """ + Test basic operations with PublishLogs and PublishSideEffects. + """ + now: datetime + learning_package_1: LearningPackage + learning_package_2: LearningPackage + + @classmethod + def setUpTestData(cls) -> None: + cls.now = datetime(2024, 1, 28, 16, 45, 30, tzinfo=timezone.utc) + cls.learning_package_1 = publishing_api.create_learning_package( + "my_package_key_1", + "PublishLog Testing LearningPackage πŸ”₯ 1", + created=cls.now, + ) + cls.learning_package_2 = publishing_api.create_learning_package( + "my_package_key_2", + "PublishLog Testing LearningPackage πŸ”₯ 2", + created=cls.now, + ) + + def test_simple_publish_log(self) -> None: + """ + Simplest test that multiple writes make it into one PublishLog. + """ + with publishing_api.bulk_draft_changes_for(self.learning_package_1.id): + entity1 = publishing_api.create_publishable_entity( + self.learning_package_1.id, + "my_entity", + created=self.now, + created_by=None, + ) + entity1_v1 = publishing_api.create_publishable_entity_version( + entity1.id, + version_num=1, + title="An Entity 🌴", + created=self.now, + created_by=None, + ) + entity2 = publishing_api.create_publishable_entity( + self.learning_package_1.id, + "my_entity2", + created=self.now, + created_by=None, + ) + entity2_v1 = publishing_api.create_publishable_entity_version( + entity2.id, + version_num=1, + title="An Entity 🌴 2", + created=self.now, + created_by=None, + ) + + # Check simplest publish of two things... + publish_log = publishing_api.publish_all_drafts(self.learning_package_1.id) + assert PublishLog.objects.all().count() == 1 + assert publish_log.records.all().count() == 2 + + record_1 = publish_log.records.get(entity=entity1) + assert record_1 is not None + assert record_1.old_version is None + assert record_1.new_version == entity1_v1 + + record_2 = publish_log.records.get(entity=entity2) + assert record_2 is not None + assert record_2.old_version is None + assert record_2.new_version == entity2_v1 + + # Check that an empty publish still creates a PublishLog, just one with + # no records. This may be useful for triggering tasks that trigger off + # of publishing events, though I'm not confident about that at the + # moment. + publish_log = publishing_api.publish_all_drafts(self.learning_package_1.id) + assert publish_log.records.count() == 0 + + # Check that we can publish a subset... + entity1_v2 = publishing_api.create_publishable_entity_version( + entity1.id, + version_num=2, + title="An Entity 🌴", + created=self.now, + created_by=None, + ) + publishing_api.create_publishable_entity_version( + entity2.id, + version_num=2, + title="An Entity 🌴 2", + created=self.now, + created_by=None, + ) + publish_e1_log = publishing_api.publish_from_drafts( + self.learning_package_1.id, + Draft.objects.filter(pk=entity1.pk), + ) + assert publish_e1_log.records.count() == 1 + e1_pub_record = publish_e1_log.records.get(entity=entity1) + assert e1_pub_record is not None + assert e1_pub_record.old_version == entity1_v1 + assert e1_pub_record.new_version == entity1_v2 + + class ContainerTestCase(TestCase): """ Test basic operations with Drafts. @@ -1020,6 +1123,52 @@ def test_bulk_parent_child_side_effects(self) -> None: assert caused_by_child_1.effect == container_change assert caused_by_child_2.effect == container_change + def test_draft_dependency_multiple_parents(self): + """ + Test that a change in a draft component affects multiple parents. + + This is the scenario where one Component is contained by multiple Units. + """ + # Set up a Component that lives in two Units + component = publishing_api.create_publishable_entity( + self.learning_package.id, "component_1", created=self.now, created_by=None, + ) + publishing_api.create_publishable_entity_version( + component.id, version_num=1, title="Component 1 🌴", created=self.now, created_by=None, + ) + unit_1 = publishing_api.create_container( + self.learning_package.id, "unit_1", created=self.now, created_by=None, + ) + unit_2 = publishing_api.create_container( + self.learning_package.id, "unit_2", created=self.now, created_by=None, + ) + for unit in [unit_1, unit_2]: + publishing_api.create_container_version( + unit.pk, + 1, + title="My Unit", + entity_rows=[ + publishing_api.ContainerEntityRow(entity_pk=component.pk), + ], + created=self.now, + created_by=None, + ) + + # At this point there should be no side effects because we created + # everything from the bottom-up. + assert not DraftSideEffect.objects.all().exists() + + # Now let's change the Component and make sure it created side-effects + # for both Units. + publishing_api.create_publishable_entity_version( + component.id, version_num=2, title="Component 1.2 🌴", created=self.now, created_by=None, + ) + side_effects = DraftSideEffect.objects.all() + assert side_effects.count() == 2 + assert side_effects.filter(cause__entity=component).count() == 2 + assert side_effects.filter(effect__entity=unit_1.publishable_entity).count() == 1 + assert side_effects.filter(effect__entity=unit_2.publishable_entity).count() == 1 + def test_multiple_layers_of_containers(self): """Test stacking containers three layers deep.""" # Note that these aren't real "components" and "units". Everything being @@ -1079,6 +1228,74 @@ def test_multiple_layers_of_containers(self): assert subsection_change.affected_by.count() == 1 assert subsection_change.affected_by.first().cause == unit_change + publish_log = publishing_api.publish_all_drafts(self.learning_package.id) + assert publish_log.records.count() == 3 + + publishing_api.create_publishable_entity_version( + component.pk, version_num=3, title="Component v2", created=self.now, created_by=None, + ) + publish_log = publishing_api.publish_from_drafts( + self.learning_package.id, + Draft.objects.filter(entity_id=component.pk), + ) + assert publish_log.records.count() == 3 + component_publish = publish_log.records.get(entity=component) + unit_publish = publish_log.records.get(entity=unit.publishable_entity) + subsection_publish = publish_log.records.get(entity=subsection.publishable_entity) + + assert not component_publish.affected_by.exists() + assert unit_publish.affected_by.count() == 1 + assert unit_publish.affected_by.first().cause == component_publish + assert subsection_publish.affected_by.count() == 1 + assert subsection_publish.affected_by.first().cause == unit_publish + + def test_publish_all_layers(self): + """Test that we can publish multiple layers from one root.""" + # Note that these aren't real "components" and "units". Everything being + # tested is confined to the publishing app, so those concepts shouldn't + # be imported here. They're just named this way to make it more obvious + # what the intended hierarchy is for testing container nesting. + component = publishing_api.create_publishable_entity( + self.learning_package.id, "component_1", created=self.now, created_by=None, + ) + publishing_api.create_publishable_entity_version( + component.id, version_num=1, title="Component 1 🌴", created=self.now, created_by=None, + ) + unit = publishing_api.create_container( + self.learning_package.id, "unit_1", created=self.now, created_by=None, + ) + publishing_api.create_container_version( + unit.pk, + 1, + title="My Unit", + entity_rows=[ + publishing_api.ContainerEntityRow(entity_pk=component.pk), + ], + created=self.now, + created_by=None, + ) + subsection = publishing_api.create_container( + self.learning_package.id, "subsection_1", created=self.now, created_by=None, + ) + publishing_api.create_container_version( + subsection.pk, + 1, + title="My Subsection", + entity_rows=[ + publishing_api.ContainerEntityRow(entity_pk=unit.pk), + ], + created=self.now, + created_by=None, + ) + publish_log = publishing_api.publish_from_drafts( + self.learning_package.id, + Draft.objects.filter(pk=subsection.pk), + ) + + # The component, unit, and subsection should all be accounted for in + # the publish log records. + assert publish_log.records.count() == 3 + def test_container_next_version(self): """Test that next_version works for containers.""" child_1 = publishing_api.create_publishable_entity( diff --git a/tests/openedx_learning/apps/authoring/sections/test_api.py b/tests/openedx_learning/apps/authoring/sections/test_api.py index 427eb711..9ac49673 100644 --- a/tests/openedx_learning/apps/authoring/sections/test_api.py +++ b/tests/openedx_learning/apps/authoring/sections/test_api.py @@ -214,9 +214,9 @@ def test_create_section_queries(self): Test how many database queries are required to create a section """ # The exact numbers here aren't too important - this is just to alert us if anything significant changes. - with self.assertNumQueries(25): + with self.assertNumQueries(28): _empty_section = self.create_section_with_subsections([]) - with self.assertNumQueries(30): + with self.assertNumQueries(35): # And try with a non-empty section: self.create_section_with_subsections([self.subsection_1, self.subsection_2_v1], key="u2") @@ -705,12 +705,12 @@ def test_query_count_of_contains_unpublished_changes(self): section = self.create_section_with_subsections(subsections) authoring_api.publish_all_drafts(self.learning_package.id) section.refresh_from_db() - with self.assertNumQueries(6): + with self.assertNumQueries(1): assert authoring_api.contains_unpublished_changes(section.pk) is False # Modify the most recently created subsection: self.modify_subsection(subsection, title="Modified Subsection") - with self.assertNumQueries(5): + with self.assertNumQueries(1): assert authoring_api.contains_unpublished_changes(section.pk) is True def test_metadata_change_doesnt_create_entity_list(self): diff --git a/tests/openedx_learning/apps/authoring/subsections/test_api.py b/tests/openedx_learning/apps/authoring/subsections/test_api.py index 0c16ae20..577881a9 100644 --- a/tests/openedx_learning/apps/authoring/subsections/test_api.py +++ b/tests/openedx_learning/apps/authoring/subsections/test_api.py @@ -197,9 +197,9 @@ def test_create_subsection_queries(self): Test how many database queries are required to create a subsection """ # The exact numbers here aren't too important - this is just to alert us if anything significant changes. - with self.assertNumQueries(25): + with self.assertNumQueries(28): _empty_subsection = self.create_subsection_with_units([]) - with self.assertNumQueries(30): + with self.assertNumQueries(35): # And try with a non-empty subsection: self.create_subsection_with_units([self.unit_1, self.unit_2_v1], key="u2") @@ -716,12 +716,12 @@ def test_query_count_of_contains_unpublished_changes(self): subsection = self.create_subsection_with_units(units) authoring_api.publish_all_drafts(self.learning_package.id) subsection.refresh_from_db() - with self.assertNumQueries(6): + with self.assertNumQueries(1): assert authoring_api.contains_unpublished_changes(subsection.pk) is False # Modify the most recently created unit: self.modify_unit(unit, title="Modified Unit") - with self.assertNumQueries(5): + with self.assertNumQueries(1): assert authoring_api.contains_unpublished_changes(subsection.pk) is True def test_metadata_change_doesnt_create_entity_list(self): diff --git a/tests/openedx_learning/apps/authoring/units/test_api.py b/tests/openedx_learning/apps/authoring/units/test_api.py index 522d9b5c..ebef4810 100644 --- a/tests/openedx_learning/apps/authoring/units/test_api.py +++ b/tests/openedx_learning/apps/authoring/units/test_api.py @@ -185,9 +185,9 @@ def test_create_unit_queries(self): Test how many database queries are required to create a unit """ # The exact numbers here aren't too important - this is just to alert us if anything significant changes. - with self.assertNumQueries(25): + with self.assertNumQueries(26): _empty_unit = self.create_unit_with_components([]) - with self.assertNumQueries(29): + with self.assertNumQueries(32): # And try with a non-empty unit: self.create_unit_with_components([self.component_1, self.component_2_v1], key="u2") @@ -700,12 +700,12 @@ def test_query_count_of_contains_unpublished_changes(self): unit = self.create_unit_with_components(components) authoring_api.publish_all_drafts(self.learning_package.id) unit.refresh_from_db() - with self.assertNumQueries(2): + with self.assertNumQueries(1): assert authoring_api.contains_unpublished_changes(unit.pk) is False # Modify the most recently created component: self.modify_component(component, title="Modified Component") - with self.assertNumQueries(2): + with self.assertNumQueries(1): assert authoring_api.contains_unpublished_changes(unit.pk) is True def test_metadata_change_doesnt_create_entity_list(self): diff --git a/tox.ini b/tox.ini index 005ddb6c..8c5fe2fc 100644 --- a/tox.ini +++ b/tox.ini @@ -76,7 +76,7 @@ deps = -r{toxinidir}/requirements/quality.txt commands = pylint openedx_learning openedx_tagging tests test_utils manage.py setup.py - mypy + mypy --show-traceback pycodestyle openedx_learning openedx_tagging tests manage.py setup.py pydocstyle openedx_learning openedx_tagging tests manage.py setup.py isort --check-only --diff tests test_utils openedx_learning openedx_tagging manage.py setup.py test_settings.py