From 2e9cc4cd1fb0e19cfb4c14bdffb6c6bef1d3c3f5 Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Fri, 18 Apr 2025 16:12:33 +0200 Subject: [PATCH 01/25] feat: track publishing dependencies and side-effects Create the PublishSideEffect model as the publishing analog for the DraftSideEffect model, and create these side effects for parent containers when one of the children is published. This allows us to efficiently query when any subtree of content was affected by a publish. Also introduces PublishableEntityVersionDependency as a way of tracking unpinned dependencies of a version, e.g. the children of a version of a Unit. Also introduces a new dependencies_hash_digest field to Draft and Published models to track dependency state. This PR adds Django admin functionality for this new data, as well as optimizing some existing calls that used to have to do more complex container -> entity list traversal. --- .../apps/authoring/publishing/admin.py | 106 ++- .../apps/authoring/publishing/api.py | 629 ++++++++++++------ ...draft_dependencies_hash_digest_and_more.py | 57 ++ .../migrations/0010_backfill_dependencies.py | 145 ++++ .../authoring/publishing/models/__init__.py | 10 +- .../authoring/publishing/models/draft_log.py | 61 +- .../publishing/models/entity_list.py | 19 + .../publishing/models/publish_log.py | 83 ++- .../publishing/models/publishable_entity.py | 46 +- openedx_learning/apps/authoring/units/api.py | 2 +- openedx_learning/lib/fields.py | 25 +- .../apps/authoring/publishing/test_api.py | 220 ++++++ .../apps/authoring/sections/test_api.py | 8 +- .../apps/authoring/subsections/test_api.py | 8 +- .../apps/authoring/units/test_api.py | 8 +- 15 files changed, 1204 insertions(+), 223 deletions(-) create mode 100644 openedx_learning/apps/authoring/publishing/migrations/0009_draft_dependencies_hash_digest_and_more.py create mode 100644 openedx_learning/apps/authoring/publishing/migrations/0010_backfill_dependencies.py diff --git a/openedx_learning/apps/authoring/publishing/admin.py b/openedx_learning/apps/authoring/publishing/admin.py index 7b144fa8c..e24b7dfa4 100644 --- a/openedx_learning/apps/authoring/publishing/admin.py +++ b/openedx_learning/apps/authoring/publishing/admin.py @@ -6,7 +6,7 @@ import functools from django.contrib import admin -from django.db.models import Count +from django.db.models import Count, F, Q from django.utils.html import format_html from django.utils.safestring import SafeText @@ -21,6 +21,8 @@ EntityListRow, LearningPackage, PublishableEntity, + PublishableEntityVersion, + PublishableEntityVersionDependency, PublishLog, PublishLogRecord, ) @@ -89,28 +91,84 @@ class PublishLogAdmin(ReadOnlyModelAdmin): list_filter = ["learning_package"] +class PublishableEntityVersionTabularInline(admin.TabularInline): + """ + Tabular inline for a single Draft change. + """ + model = PublishableEntityVersion + + fields = ( + "version_num", + "title", + "created", + "created_by", + "dependencies_list", + ) + readonly_fields = fields + + def dependencies_list(self, version: PublishableEntityVersion): + identifiers = sorted( + [str(dep.key) for dep in version.dependencies.all()] + ) + return "\n".join(identifiers) + + def get_queryset(self, request): + queryset = super().get_queryset(request) + return ( + queryset + .order_by('-version_num') + .select_related('created_by', 'entity') + .prefetch_related('dependencies') + ) + +class PublishStatusFilter(admin.SimpleListFilter): + title = "publish status" + parameter_name = "publish_status" + + def lookups(self, request, model_admin): + return [ + ("unpublished_changes", "Has unpublished changes"), + ] + + def queryset(self, request, queryset): + if self.value() == "unpublished_changes": + return ( + queryset + .exclude( + published__version__isnull=True, + draft__version__isnull=True, + ) + .exclude( + published__version=F("draft__version"), + published__dependencies_hash_digest=F("draft__dependencies_hash_digest") + ) + ) + + @admin.register(PublishableEntity) class PublishableEntityAdmin(ReadOnlyModelAdmin): """ Read-only admin view for Publishable Entities """ + inlines = [PublishableEntityVersionTabularInline] + list_display = [ "key", - "draft_version", "published_version", + "draft_version", "uuid", "learning_package", "created", "created_by", "can_stand_alone", ] - list_filter = ["learning_package"] + list_filter = ["learning_package", PublishStatusFilter] search_fields = ["key", "uuid"] fields = [ "key", - "draft_version", "published_version", + "draft_version", "uuid", "learning_package", "created", @@ -120,8 +178,8 @@ class PublishableEntityAdmin(ReadOnlyModelAdmin): ] readonly_fields = [ "key", - "draft_version", "published_version", + "draft_version", "uuid", "learning_package", "created", @@ -131,7 +189,23 @@ class PublishableEntityAdmin(ReadOnlyModelAdmin): ] def draft_version(self, entity: PublishableEntity): - return entity.draft.version.version_num if entity.draft.version else None + from django.utils.html import format_html + + if hasattr(entity, "draft") and entity.draft.version: + if entity.draft.dependencies_hash_digest: + version_str = ( + f"{entity.draft.version.version_num} " + f"({entity.draft.dependencies_hash_digest})" + ) + else: + version_str = str(entity.draft.version.version_num) + + if version_str == self.published_version(entity): + return version_str + else: + return format_html("{}", version_str) + + return None def published_version(self, entity: PublishableEntity): return entity.published.version.version_num if entity.published and entity.published.version else None @@ -139,12 +213,30 @@ def published_version(self, entity: PublishableEntity): def get_queryset(self, request): queryset = super().get_queryset(request) return queryset.select_related( - "learning_package", "published__version", + "learning_package", "published__version", "draft__version", "created_by" ) def see_also(self, entity): return one_to_one_related_model_html(entity) + def published_version(self, entity): + if entity.published.version: + return entity.published.version.version_num + return None + + def published_version(self, entity: PublishableEntity): + if hasattr(entity, "published") and entity.published.version: + if entity.published.dependencies_hash_digest: + return ( + f"{entity.published.version.version_num} " + f"({entity.published.dependencies_hash_digest})" + ) + else: + return str(entity.published.version.version_num) + + return None + + @admin.register(Published) class PublishedAdmin(ReadOnlyModelAdmin): diff --git a/openedx_learning/apps/authoring/publishing/api.py b/openedx_learning/apps/authoring/publishing/api.py index 68183c64f..233f25664 100644 --- a/openedx_learning/apps/authoring/publishing/api.py +++ b/openedx_learning/apps/authoring/publishing/api.py @@ -13,8 +13,9 @@ from typing import ContextManager, Optional, TypeVar from django.core.exceptions import ObjectDoesNotExist, ValidationError -from django.db.models import F, Q, QuerySet +from django.db.models import F, Prefetch, Q, QuerySet from django.db.transaction import atomic +from openedx_learning.lib.fields import create_hash_digest from .contextmanagers import DraftChangeLogContext from .models import ( @@ -31,9 +32,11 @@ PublishableEntity, PublishableEntityMixin, PublishableEntityVersion, + PublishableEntityVersionDependency, PublishableEntityVersionMixin, PublishLog, PublishLogRecord, + PublishSideEffect, ) from .models.publish_log import Published @@ -212,6 +215,7 @@ def create_publishable_entity_version( title: str, created: datetime, created_by: int | None, + dependencies: list[int] | None = None, # PublishableEntity IDs ) -> PublishableEntityVersion: """ Create a PublishableEntityVersion. @@ -219,7 +223,7 @@ def create_publishable_entity_version( You'd typically want to call this right before creating your own content version model that points to it. """ - with atomic(): + with atomic(savepoint=False): version = PublishableEntityVersion.objects.create( entity_id=entity_id, version_num=version_num, @@ -227,16 +231,67 @@ def create_publishable_entity_version( created=created, created_by_id=created_by, ) + if dependencies: + set_version_dependencies(version.id, dependencies) + set_draft_version( entity_id, version.id, set_at=created, set_by=created_by, ) - return version +def set_version_dependencies( + version_id: int, # PublishableEntityVersion.id, + /, + dependencies: list[int] # List of PublishableEntity.id +) -> None: + """ + Set the depenencies of a publishable entity version. + + In general, callers should not modify dependencies after creation (i.e. use + the optional param in create_publishable_entity_version() instead of using + this function). **This function is not atomic.** If you're doing backfills, + you must wrap calls to this function with a transaction.atomic() call. + + The idea behind dependencies is that a PublishableEntityVersion may be + declared to reference unpinned PublishableEntities. Changes to those + referenced PublishableEntities still affect the draft or published state of + the PublisahbleEntity, even if the version of the PublishableEntity is not + incremented. + + For example, a Unit does not get a new version when unpinned child + Components are updated, but we do consider a Unit version to be changed when + that happens. So the child Components would be dependencies of the Unit + version in this case. + + Guidelines: + + 1. Only declare one level of dependencies, e.g. immediate parent-child + relationships. The publishing app will calculate transitive dependencies + like "all descendants" based on this. This is important for saving space, + because the DAG of trasitive dependency relationships can explode out to + tens of thousands of nodes per version. + 2. Declare dependencies from the bottom-up when possible. In other words, if + you're building an entire Subsection, set the Component dependencies for + the Units before you set the Unit dependencies for the Subsection. This + code will still work if you build from the top-down, but we'll end up + doing many redundant re-calculations, since every change to a lower layer + will cause recalculation to the higher levels that depend on it. + 3. Do not create circular dependencies. + """ + PublishableEntityVersionDependency.objects.bulk_create( + [ + PublishableEntityVersionDependency( + version_id=version_id, entity_id=dep_entity_id + ) + for dep_entity_id in set(dependencies) # dependencies have no ordering + ], + ) + + def get_publishable_entity(publishable_entity_id: int, /) -> PublishableEntity: return PublishableEntity.objects.get(id=publishable_entity_id) @@ -284,7 +339,8 @@ def get_entities_with_unpublished_changes( """ Fetch entities that have unpublished changes. - By default, this excludes soft-deleted drafts but can be included using include_deleted_drafts option. + By default, this excludes soft-deleted drafts but can be included using + include_deleted_drafts option. """ entities_qs = ( PublishableEntity.objects @@ -332,65 +388,74 @@ def publish_all_drafts( Publish everything that is a Draft and is not already published. """ draft_qset = ( - Draft.objects.select_related("entity__published") - .filter(entity__learning_package_id=learning_package_id) - - # Exclude entities where the Published version already matches the - # Draft version. - .exclude(entity__published__version_id=F("version_id")) - - # Account for soft-deletes: - # NULL != NULL in SQL, so simply excluding entities where the Draft - # and Published versions match will not catch the case where a - # soft-delete has been published (i.e. both the Draft and Published - # versions are NULL). We need to explicitly check for that case - # instead, or else we will re-publish the same soft-deletes over - # and over again. - .exclude(Q(version__isnull=True) & Q(entity__published__version__isnull=True)) + Draft.objects + .filter(entity__learning_package_id=learning_package_id) + .with_unpublished_changes() ) return publish_from_drafts( learning_package_id, draft_qset, message, published_at, published_by ) +def _get_dependencies_with_unpublished_changes( + draft_qset: QuerySet[Draft] +) -> list[QuerySet[Draft]]: + """ + Return all dependencies to publish as a list of Draft QuerySets. + + This should only return the Drafts that have actual changes, not pure side- + effects. The side-effect calculations will happen separately. + """ + # First we have to do a full crawl of *all* dependencies, regardless of + # whether they have unpublished changes or not. this is because we might + # have a dependency-of-a-dependency that has changed somewhere down the + # line. Example: The draft_qset includes a Subsection. Even if the Unit + # versions are still the same, there might be a changed Component that needs + # to be published. + all_dependent_drafts = [] + dependent_drafts = Draft.objects.filter( + entity__affects__in=draft_qset.all().values_list("entity_id", flat=True) + ).distinct() + + while dependent_drafts: + all_dependent_drafts.append(dependent_drafts) + dependent_drafts = Draft.objects.filter( + entity__affects__in=dependent_drafts.all().values_list("entity_id", flat=True) + ).distinct() + + if not all_dependent_drafts: + return Draft.objects.none() + + unpublished_dependent_drafts = [ + dependent_drafts_qset.with_unpublished_changes() + for dependent_drafts_qset in all_dependent_drafts + ] + return unpublished_dependent_drafts + + def publish_from_drafts( learning_package_id: int, # LearningPackage.id /, - draft_qset: QuerySet, + draft_qset: QuerySet[Draft], message: str = "", published_at: datetime | None = None, published_by: int | None = None, # User.id + publish_dependencies: bool = True, ) -> PublishLog: """ Publish the rows in the ``draft_model_qsets`` args passed in. + + By default, this will also publish all dependencies (e.g. children) of the + Drafts that are passed in. """ if published_at is None: published_at = datetime.now(tz=timezone.utc) with atomic(): - # If the drafts include any containers, we need to auto-publish their descendants: - # TODO: this only handles one level deep and would need to be updated to support sections > subsections > units - - # Get the IDs of the ContainerVersion for any Containers whose drafts are slated to be published. - container_version_ids = ( - Container.objects.filter(publishable_entity__draft__in=draft_qset) - .values_list("publishable_entity__draft__version__containerversion__pk", flat=True) - ) - if container_version_ids: - # We are publishing at least one container. Check if it has any child components that aren't already slated - # to be published. - unpublished_draft_children = EntityListRow.objects.filter( - entity_list__container_versions__pk__in=container_version_ids, - entity_version=None, # Unpinned entities only - ).exclude( - entity__draft__version=F("entity__published__version") # Exclude already published things - ).values_list("entity__draft__pk", flat=True) - if unpublished_draft_children: - # Force these additional child components to be published at the same time by adding them to the qset: - draft_qset = Draft.objects.filter( - Q(pk__in=draft_qset.values_list("pk", flat=True)) | - Q(pk__in=unpublished_draft_children) - ) + if publish_dependencies: + dependency_drafts_qsets = _get_dependencies_with_unpublished_changes(draft_qset) + else: + dependency_drafts_qsets = [] # One PublishLog for this entire publish operation. publish_log = PublishLog( @@ -402,32 +467,51 @@ def publish_from_drafts( publish_log.full_clean() publish_log.save(force_insert=True) - for draft in draft_qset.select_related("entity__published__version"): - try: - old_version = draft.entity.published.version - except ObjectDoesNotExist: - # This means there is no published version yet. - old_version = None - - # Create a record describing publishing this particular Publishable - # (useful for auditing and reverting). - publish_log_record = PublishLogRecord( - publish_log=publish_log, - entity=draft.entity, - old_version=old_version, - new_version=draft.version, - ) - publish_log_record.full_clean() - publish_log_record.save(force_insert=True) + # We're intentionally avoiding union() here because Django ORM unions + # introduce cumbersome restrictions (can only union once, can't + # select_related on it after, the extra rows must be exactly compatible + # in unioned qsets, etc.) Instead, we're going to have one queryset per + # dependency layer. + all_draft_qsets = [ + draft_qset.select_related("entity__published__version"), + *dependency_drafts_qsets, # one QuerySet per layer of dependencies + ] + published_draft_ids = set() + for qset in all_draft_qsets: + for draft in qset: + # Skip duplicates that we might get from expanding dependencies. + if draft.pk in published_draft_ids: + continue + + try: + old_version = draft.entity.published.version + except ObjectDoesNotExist: + # This means there is no published version yet. + old_version = None + + # Create a record describing publishing this particular + # Publishable (useful for auditing and reverting). + publish_log_record = PublishLogRecord( + publish_log=publish_log, + entity=draft.entity, + old_version=old_version, + new_version=draft.version, + ) + publish_log_record.full_clean() + publish_log_record.save(force_insert=True) + + # Update the lookup we use to fetch the published versions + Published.objects.update_or_create( + entity=draft.entity, + defaults={ + "version": draft.version, + "publish_log_record": publish_log_record, + }, + ) - # Update the lookup we use to fetch the published versions - Published.objects.update_or_create( - entity=draft.entity, - defaults={ - "version": draft.version, - "publish_log_record": publish_log_record, - }, - ) + published_draft_ids.add(draft.pk) + + _create_side_effects_for_change_log(publish_log) return publish_log @@ -571,14 +655,13 @@ def set_draft_version( changed_at=set_at, changed_by_id=set_by, ) - change = DraftChangeLogRecord.objects.create( + DraftChangeLogRecord.objects.create( draft_change_log=change_log, entity_id=draft.entity_id, old_version_id=old_version_id, new_version_id=publishable_entity_version_pk, ) - _create_container_side_effects_for_draft_change(change) - + _create_side_effects_for_change_log(change_log) def _add_to_existing_draft_change_log( active_change_log: DraftChangeLog, @@ -643,86 +726,270 @@ def _add_to_existing_draft_change_log( return change -def _create_container_side_effects_for_draft_change_log(change_log: DraftChangeLog): +def _create_side_effects_for_change_log(change_log: DraftChangeLog | PublishLog): """ - Iterate through the whole DraftChangeLog and process side-effects. + Create the side-effects for a DraftChangeLog or PublishLog. + + A side-effect is created whenever a dependency of a draft or published + entity version is altered. + + For example, say we have a published Unit at version 1 (``U1.v1``). + ``U1.v1`` is defined to have unpinned references to Components ``C1`` and + ``C2``, i.e. ``U1.v1 = [C1, C2]``. This means that ``U1.v1`` always shows + the latest published versions of ``C1`` and ``C2``. While we do have a more + sophisticated encoding for the ordered parent-child relationships, we + capture the basic dependency relationship using the M:M + ``PublishableEntityVersionDependency`` model. In this scenario, C1 and C2 + are dependencies of ``U1.v1`` + + In the above example, publishing a newer version of ``C1`` does *not* + increment the version for Unit ``U1``. But we still want to record that + ``U1.v1`` was affected by the change in ``C1``. We record this in the + ``DraftSideEffect`` and ``PublishSideEffect`` models. We also add an entry + for ``U1`` in the change log, saying that it went from version 1 to version + 1--i.e nothing about the Unit's defintion changed, but it was still affected + by other changes in the log. + + Only call this function after all the records have already been created. + + Note: The interface between ``DraftChangeLog`` and ``PublishLog`` is similar + enough that this function has been made to work with both. """ + if isinstance(change_log, DraftChangeLog): + branch_cls = Draft + change_record_cls = DraftChangeLogRecord + side_effect_cls = DraftSideEffect + elif isinstance(change_log, PublishLog): + branch_cls = Published + change_record_cls = PublishLogRecord + side_effect_cls = PublishSideEffect + else: + raise TypeError( + f"expected DraftChangeLog or PublishLog, not {type(change_log)}" + ) + + # processed_entity_ids holds the entity IDs that we've already calculated + # side-effects for. This is to save us from recalculating side-effects for + # the same dependency relationships over and over again. So if we're calling + # this function in a loop for all the Components in a Unit, we won't be + # recalculating the Unit's side-effect on its Subsection, and its + # Subsection's side-effect on its Section each time through the loop. + # It also guards against infinite parent-child relationship loops, though + # those aren't *supposed* to be allowed anyhow. processed_entity_ids: set[int] = set() - for change in change_log.records.all(): - _create_container_side_effects_for_draft_change( - change, - processed_entity_ids=processed_entity_ids, + for original_change in change_log.records.all(): + affected_by_original_change = branch_cls.objects.filter( + version__dependencies=original_change.entity ) + changes_and_affected = [ + (original_change, current) for current in affected_by_original_change + ] + while changes_and_affected: + original_change, affected = changes_and_affected.pop() + + # If the Draft or Published that is affected by this change is not + # already in the change_log, then we have to add it. + affected_version_pk = affected.version_id + + if branch_cls == Draft: + change_log_param = {'draft_change_log': original_change.draft_change_log} + elif branch_cls == Published: + change_log_param = {'publish_log': original_change.publish_log} + + # Example: If the original_change is a DraftChangeLogRecord that + # represents editing a Component, the side_effect_change is the + # DraftChangeLogRecord that represents the fact that the containing + # Unit was also altered (even if the Unit version doesn't change). + side_effect_change, _created = change_record_cls.objects.get_or_create( + **change_log_param, + entity_id=affected.entity_id, + defaults={ + # If a change record already exists because the affected + # entity was separately modified, then we don't touch the + # old/new version entries. But if we're creating this change + # record as a pure side-effect, then we use the (old_version + # == new_version) convention to indicate that. + 'old_version_id': affected_version_pk, + 'new_version_id': affected_version_pk + } + ) + # Create a new side effect (DraftSideEffect or PublishSideEffect) to + # record the relationship between the ``original_change`` and + # the corresponding ``side_effect_change``. We'll do this regardless + # of whether we created the ``side_effect_change`` or just pulled + # back an existing one. This addresses two things: + # + # 1. A change in multiple dependencies can generate multiple + # side effects that point to the same change log record, i.e. + # multiple changes can cause the same ``effect``. + # Example: Two draft components in a Unit are changed. Two + # DraftSideEffects will be created and point to the same Unit + # DraftChangeLogRecord. + # 2. A entity and its dependency can change at the same time. + # Example: If a Unit has a Component, and both the Unit and + # Component are edited in the same DraftChangeLog, then the Unit + # has changed in both ways (the Unit's internal metadata as well + # as the new version of the child component). The version of the + # Unit will be incremented, but we'll also create the + # DraftSideEffect. + side_effect_cls.objects.get_or_create( + cause=original_change, + effect=side_effect_change, + ) -def _create_container_side_effects_for_draft_change( - original_change: DraftChangeLogRecord, - processed_entity_ids: set | None = None -): - """ - Given a draft change, add side effects for all affected containers. + # Now we find the next layer up by looking at Drafts or Published + # that have ``affected.entity`` as a dependency. + next_layer_of_affected = branch_cls.objects.filter( + version__dependencies=affected.entity + ) + + # Make sure we never re-add the change we just processed when we + # queue up the next layer. + processed_entity_ids.add(original_change.entity_id) - This should only be run after the DraftChangeLogRecord has been otherwise - fully written out. We want to avoid the scenario where we create a - side-effect that a Component change affects a Unit if the Unit version is - also changed (maybe even deleted) in the same DraftChangeLog. + changes_and_affected.extend( + (side_effect_change, affected) + for affected in next_layer_of_affected + if affected.entity_id not in processed_entity_ids + ) - The `processed_entity_ids` set holds the entity IDs that we've already - calculated side-effects for. This is to save us from recalculating side- - effects for the same ancestor relationships over and over again. So if we're - calling this function in a loop for all the Components in a Unit, we won't - be recalculating the Unit's side-effect on its Subsection, and its - Subsection's side-effect on its Section. + _update_branch_dependencies_hash_digests(change_log) - TODO: This could get very expensive with the get_containers_with_entity - calls. We should measure the impact of this. + +def _update_branch_dependencies_hash_digests( + change_log: DraftChangeLog | PublishLog, +) -> None: """ - if processed_entity_ids is None: - # An optimization, but also a guard against infinite side-effect loops. - processed_entity_ids = set() + Update dependencies_hash_digest for Drafts or Published in a change log. - changes_and_containers = [ - (original_change, container) - for container - in get_containers_with_entity(original_change.entity_id, ignore_pinned=True) - ] - while changes_and_containers: - change, container = changes_and_containers.pop() - - # If the container is not already in the DraftChangeLog, we need to - # add it. Since it's being caused as a DraftSideEffect, we're going - # add it with the old_version == new_version convention. - container_draft_version_pk = container.versioning.draft.pk - container_change, _created = DraftChangeLogRecord.objects.get_or_create( - draft_change_log=change.draft_change_log, - entity_id=container.pk, - defaults={ - 'old_version_id': container_draft_version_pk, - 'new_version_id': container_draft_version_pk - } + Args: + change_log: A DraftChangeLog or PublishLog that already has all + side-effects added to it. The Draft and Published models should + already be updated to point to the post-change versions. + """ + if isinstance(change_log, DraftChangeLog): + branch = "draft" + branch_cls = Draft + elif isinstance(change_log, PublishLog): + branch = "published" + branch_cls = Published + else: + raise TypeError( + f"expected DraftChangeLog or PublishLog, not {type(change_log)}" + ) + + dependencies_versions_prefetch = Prefetch( + "new_version__dependencies", + queryset=( + PublishableEntity.objects + .select_related(f"{branch}__version") + .order_by(f"{branch}__version__uuid") ) + ) + records = ( + change_log.records + .filter(new_version__isnull=False) + .select_related("new_version", f"entity__{branch}") + .prefetch_related(dependencies_versions_prefetch) + ) + + changed_to_dependencies: dict[Draft, list[Draft]] | dict[Published, list[Published]] + changed_to_dependencies = {} + for record in records: + if record.new_version is None: + continue + + changed_branch_item = getattr(record.entity, branch, None) + if (not changed_branch_item) or (not changed_branch_item.version): + branch_item_dependencies = [] + else: + branch_item_dependencies = [ + getattr(entity, branch) + for entity in record.new_version.dependencies.all() + if hasattr(entity, branch) and getattr(entity, branch).version + ] + + changed_to_dependencies[changed_branch_item] = branch_item_dependencies - # Mark that change in the current loop has the side effect of changing - # the parent container. We'll do this regardless of whether the - # container version itself also changed. If a Unit has a Component and - # both the Unit and Component have their versions incremented, then the - # Unit has changed in both ways (the Unit's internal metadata as well as - # the new version of the child component). - DraftSideEffect.objects.get_or_create(cause=change, effect=container_change) - processed_entity_ids.add(change.entity_id) - - # Now we find the next layer up of containers. So if the originally - # passed in publishable_entity_id was for a Component, then the - # ``container`` we've been creating the side effect for in this loop - # is the Unit, and ``parents_of_container`` would be any Sequences - # that contain the Unit. - parents_of_container = get_containers_with_entity(container.pk, ignore_pinned=True) - - changes_and_containers.extend( - (container_change, container_parent) - for container_parent in parents_of_container - if container_parent.pk not in processed_entity_ids + branch_items_to_update = [] + already_computed_hashes = {} + for changed_branch_item in changed_to_dependencies: + new_digest = hash_for_branch_item( + changed_branch_item, changed_to_dependencies, already_computed_hashes ) + if new_digest != changed_branch_item.dependencies_hash_digest: + changed_branch_item.dependencies_hash_digest = new_digest + branch_items_to_update.append(changed_branch_item) + + branch_cls.objects.bulk_update( + branch_items_to_update, + ["dependencies_hash_digest"], + ) + + +def hash_for_branch_item( + branch_item: Draft | Published, + changed_items: dict[Draft, list[Draft]] | dict[Published, list[Published]], + already_computed_hashes: dict[Draft, str] | dict[Published, str], +) -> str: + """ + Recursively calculate the dependencies_hash_digest for Draft or Published. + + We have to be extremely careful about changing the signature or behavior of + this function, as it's used in the 0010_backfill_dependencies data migration + that will be used to upgrade people from Teak to future versions. + + Args: + branch_item: The Draft or Published entry we want to recalculate the + dependencies hash for. + changed_items: A dict mapping Draft or Published to a list of Dependency + Draft or Published. The keys represent the full set of what has + changed in the change log (DraftChangeLog or PublishLog), and + includes things that were added as side-effects. The value lists + in this dict can include things that *didn't* change, if they happen + to be dependencies of things that did change. + already_computed_hashes: A cache to ensure we don't recompute the same + nodes over and over again. + """ + if branch_item in already_computed_hashes: + return already_computed_hashes[branch_item] + + # Base case #1: The branch_item is a dependency of something that was + # affected by a change, but the dependency itself did not change in any way + # (neither directly, nor as a side-effect). + # + # Example: A Unit has two Components. One of the Components changed, forcing + # us to recalculate the dependencies_hash_digest for that Unit. Doing that + # recalculation requires us to fetch the dependencies_hash_digest of the + # unchanged child as well. + # + # These are the only values for dependencies_hash_digest that we can trust, + # because we know that the change log (DraftChangeLog or PublishLog) being + # processed right now will not alter them. + if branch_item not in changed_items: + return branch_item.dependencies_hash_digest + + # Base case #2: If there are no dependencies, the hash digest is set to ''. + # Note that it's possible that something that used to have a hash digest to + # get reset to '' if those dependencies are later removed. + dependencies = changed_items[branch_item] + if not dependencies: + return '' + + # Normal recursive case + dependencies.sort(key=lambda d: d.version.uuid) + dep_state_entries = [ + f"{dep_item.version.uuid}:" + f"{hash_for_branch_item(dep_item, changed_items, already_computed_hashes)}" + for dep_item in dependencies + ] + summary_text = "\n".join(dep_state_entries) + + digest = create_hash_digest(summary_text.encode(), num_bytes=4) + already_computed_hashes[branch_item] = digest + + return digest def soft_delete_draft(publishable_entity_id: int, /, deleted_by: int | None = None) -> None: @@ -973,7 +1240,6 @@ def create_entity_list_with_rows( raise ValidationError("Container entity versions must belong to the specified entity.") with atomic(savepoint=False): - entity_list = create_entity_list() EntityListRow.objects.bulk_create( [ @@ -1011,6 +1277,11 @@ def _create_container_version( title=title, created=created, created_by=created_by, + dependencies=[ + entity_row.entity_id + for entity_row in entity_list.rows + if entity_row.is_unpinned() + ] ) container_version = container_version_cls.objects.create( publishable_entity_version=publishable_entity_version, @@ -1362,6 +1633,11 @@ def contains_unpublished_changes(container_id: int) -> bool: [ 🛑 UNSTABLE ] Check recursively if a container has any unpublished changes. + Note: I've preserved the API signature for now, but we probably eventually + want to make a more general function that operates on PublishableEntities + and dependencies, once we introduce those with courses and their files, + grading policies, etc. + Note: unlike this method, the similar-sounding `container.versioning.has_unpublished_changes` property only reports if the container itself has unpublished changes, not @@ -1370,50 +1646,20 @@ def contains_unpublished_changes(container_id: int) -> bool: that's in the container, it will be `False`. This method will return `True` in either case. """ - # This is similar to 'get_container(container.container_id)' but pre-loads more data. - container = Container.objects.select_related( - "publishable_entity__draft__version__containerversion__entity_list", - ).get(pk=container_id) - + container = Container.objects.get(pk=container_id) if container.versioning.has_unpublished_changes: return True - # We only care about children that are un-pinned, since published changes to pinned children don't matter - entity_list = getattr(container.versioning.draft, "entity_list", None) - if entity_list is None: - # This container has been soft-deleted, so it has no children. - return False - - # This is a naive and inefficient implementation but should be correct. - # TODO: Once we have expanded the containers system to support multiple levels (not just Units and Components but - # also subsections and sections) and we have an expanded test suite for correctness, then we can optimize. - # We will likely change to a tracking-based approach rather than a "scan for changes" based approach. - for row in entity_list.entitylistrow_set.filter(entity_version=None).select_related( - "entity__container", - "entity__draft__version", - "entity__published__version", - ): - try: - child_container = row.entity.container - except Container.DoesNotExist: - child_container = None - if child_container: - # This is itself a container - check recursively: - if contains_unpublished_changes(child_container.pk): - return True - else: - # This is not a container: - draft_pk = row.entity.draft.version_id if row.entity.draft else None - published_pk = row.entity.published.version_id if hasattr(row.entity, "published") else None - if draft_pk != published_pk: - return True - return False + draft = container.publishable_entity.draft + published = container.publishable_entity.published + return draft.dependencies_hash_digest != published.dependencies_hash_digest def get_containers_with_entity( publishable_entity_pk: int, *, ignore_pinned=False, + published=False, ) -> QuerySet[Container]: """ [ 🛑 UNSTABLE ] @@ -1426,20 +1672,31 @@ def get_containers_with_entity( publishable_entity_pk: The ID of the PublishableEntity to search for. ignore_pinned: if true, ignore any pinned references to the entity. """ + branch = "published" if published else "draft" if ignore_pinned: - qs = Container.objects.filter( - # Note: these two conditions must be in the same filter() call, or the query won't be correct. - publishable_entity__draft__version__containerversion__entity_list__entitylistrow__entity_id=publishable_entity_pk, # pylint: disable=line-too-long # noqa: E501 - publishable_entity__draft__version__containerversion__entity_list__entitylistrow__entity_version_id=None, # pylint: disable=line-too-long # noqa: E501 - ) + filter_dict = { + # Note: these two conditions must be in the same filter() call, + # or the query won't be correct. + ( + f"publishable_entity__{branch}__version__" + "containerversion__entity_list__entitylistrow__entity_id" + ): publishable_entity_pk, + ( + f"publishable_entity__{branch}__version__" + "containerversion__entity_list__entitylistrow__entity_version_id" + ): None, + } + qs = Container.objects.filter(**filter_dict) else: - qs = Container.objects.filter( - publishable_entity__draft__version__containerversion__entity_list__entitylistrow__entity_id=publishable_entity_pk, # pylint: disable=line-too-long # noqa: E501 - ) - return qs.select_related( - "publishable_entity__draft__version__containerversion", - "publishable_entity__published__version__containerversion", - ).order_by("pk").distinct() # Ordering is mostly for consistent test cases. + filter_dict = { + ( + f"publishable_entity__{branch}__version__" + "containerversion__entity_list__entitylistrow__entity_id" + ): publishable_entity_pk + } + qs = Container.objects.filter(**filter_dict) + + return qs.order_by("pk").distinct() # Ordering is mostly for consistent test cases. def get_container_children_count( @@ -1507,6 +1764,6 @@ def bulk_draft_changes_for( changed_at=changed_at, changed_by=changed_by, exit_callbacks=[ - _create_container_side_effects_for_draft_change_log, + _create_side_effects_for_change_log, ] ) diff --git a/openedx_learning/apps/authoring/publishing/migrations/0009_draft_dependencies_hash_digest_and_more.py b/openedx_learning/apps/authoring/publishing/migrations/0009_draft_dependencies_hash_digest_and_more.py new file mode 100644 index 000000000..a249eee62 --- /dev/null +++ b/openedx_learning/apps/authoring/publishing/migrations/0009_draft_dependencies_hash_digest_and_more.py @@ -0,0 +1,57 @@ +# Generated by Django 4.2.23 on 2025-08-27 03:46 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('oel_publishing', '0008_alter_draftchangelogrecord_options_and_more'), + ] + + operations = [ + migrations.AddField( + model_name='draft', + name='dependencies_hash_digest', + field=models.CharField(blank=True, default='', editable=False, max_length=8), + ), + migrations.AddField( + model_name='published', + name='dependencies_hash_digest', + field=models.CharField(blank=True, default='', editable=False, max_length=8), + ), + migrations.CreateModel( + name='PublishSideEffect', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('cause', models.ForeignKey(on_delete=django.db.models.deletion.RESTRICT, related_name='causes', to='oel_publishing.publishlogrecord')), + ('effect', models.ForeignKey(on_delete=django.db.models.deletion.RESTRICT, related_name='affected_by', to='oel_publishing.publishlogrecord')), + ], + options={ + 'verbose_name': 'Publish Side Effect', + 'verbose_name_plural': 'Publish Side Effects', + }, + ), + migrations.CreateModel( + name='PublishableEntityVersionDependency', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('entity', models.ForeignKey(on_delete=django.db.models.deletion.RESTRICT, to='oel_publishing.publishableentity')), + ('version', models.ForeignKey(on_delete=django.db.models.deletion.RESTRICT, to='oel_publishing.publishableentityversion')), + ], + ), + migrations.AddField( + model_name='publishableentityversion', + name='dependencies', + field=models.ManyToManyField(related_name='affects', through='oel_publishing.PublishableEntityVersionDependency', to='oel_publishing.publishableentity'), + ), + migrations.AddConstraint( + model_name='publishsideeffect', + constraint=models.UniqueConstraint(fields=('cause', 'effect'), name='oel_pub_pse_uniq_c_e'), + ), + migrations.AddConstraint( + model_name='publishableentityversiondependency', + constraint=models.UniqueConstraint(fields=('version', 'entity'), name='oel_pevd_uniq_version_entity'), + ), + ] diff --git a/openedx_learning/apps/authoring/publishing/migrations/0010_backfill_dependencies.py b/openedx_learning/apps/authoring/publishing/migrations/0010_backfill_dependencies.py new file mode 100644 index 000000000..b7b0a73dd --- /dev/null +++ b/openedx_learning/apps/authoring/publishing/migrations/0010_backfill_dependencies.py @@ -0,0 +1,145 @@ +""" +Backfill PublishableEntityVersionDependency entries based on ContainerVersions. + +We're introducing a lower-level publishing concept of a dependency that will be +used by Containers, but this means we have to backfill that dependency info for +existing Containers in the system. +""" +# Generated by Django 4.2.23 on 2025-09-01 19:16 + +from django.db import migrations +from django.db.models import Prefetch + + +def create_backfill(apps, schema_editor): + """ + Create dependency entries and update dep hashes for Draft and Published. + """ + _create_dependencies(apps) + _update_draft_dependencies_hash(apps) + _update_published_dependencies_hash(apps) + + +def _create_dependencies(apps): + """ + Populate the PublishableEntityVersion.dependencies relation. + + The only ones we should have in the system at this point are the ones from + containers, so we query ContainerVersion for that. + """ + PublishableEntityVersionDependency = apps.get_model( + "oel_publishing", "PublishableEntityVersionDependency" + ) + ContainerVersion = apps.get_model("oel_publishing", "ContainerVersion") + + for container_version in ContainerVersion.objects.all(): + # using a set to de-dupe + child_entity_ids = set( + container_version + .entity_list + .entitylistrow_set + .all() + .values_list("entity_id", flat=True) + ) + PublishableEntityVersionDependency.objects.bulk_create( + [ + PublishableEntityVersionDependency( + version_id=container_version.pk, + entity_id=entity_id + ) + for entity_id in child_entity_ids + ], + ignore_conflicts=True, + ) + +def _update_draft_dependencies_hash(apps): + """ + Update all container Draft.dependencies_hash_digest + + Backfill dependency state hashes. The important thing here is that things + without dependencies will have the default (blank) state hash, so we only + need to query for Draft entries for Containers. + """ + from ..api import hash_for_branch_item + + Draft = apps.get_model("oel_publishing", "Draft") + drafts_that_can_have_deps = ( + Draft + .objects + .filter(version__isnull=False, entity__container__isnull=False) + .prefetch_related("version__dependencies") + ) + already_computed_hashes = {} + drafts_to_deps = { + draft: [ + entity.draft + for entity in draft.version.dependencies.all() + if hasattr(entity, "draft") and entity.draft.version + ] + for draft in drafts_that_can_have_deps + } + for draft in drafts_to_deps: + draft.dependencies_hash_digest = hash_for_branch_item( + draft, drafts_to_deps, already_computed_hashes + ) + Draft.objects.bulk_update(drafts_to_deps.keys(), ["dependencies_hash_digest"]) + + +def _update_published_dependencies_hash(apps): + """ + Update all container Published.dependencies_hash_digest + + Backfill dependency state hashes. The important thing here is that things + without dependencies will have the default (blank) state hash, so we only + need to query for Published entries for Containers. + """ + from ..api import hash_for_branch_item + + Published = apps.get_model("oel_publishing", "Published") + published_that_can_have_deps = ( + Published + .objects + .filter(version__isnull=False, entity__container__isnull=False) + .prefetch_related("version__dependencies") + ) + + already_computed_hashes = {} + published_to_deps = { + published: [ + entity.published + for entity in published.version.dependencies.all() + if hasattr(entity, "published") and entity.published.version + ] + for published in published_that_can_have_deps + } + for published in published_to_deps: + published.dependencies_hash_digest = hash_for_branch_item( + published, published_to_deps, already_computed_hashes + ) + Published.objects.bulk_update(published_to_deps.keys(), ["dependencies_hash_digest"]) + + +def remove_backfill(apps, schema_editor): + """ + Reset all dep hash values to default ('') and remove dependencies. + """ + Draft = apps.get_model("oel_publishing", "Draft") + Published = apps.get_model("oel_publishing", "Published") + PublishableEntityVersionDependency = apps.get_model( + "oel_publishing", "PublishableEntityVersionDependency" + ) + + Draft.objects.all().update(dependencies_hash_digest='') + Published.objects.all().update(dependencies_hash_digest='') + PublishableEntityVersionDependency.objects.all().delete() + + +class Migration(migrations.Migration): + + dependencies = [ + ('oel_publishing', '0009_draft_dependencies_hash_digest_and_more'), + ] + + operations = [ + migrations.RunPython(create_backfill, reverse_code=remove_backfill) + ] diff --git a/openedx_learning/apps/authoring/publishing/models/__init__.py b/openedx_learning/apps/authoring/publishing/models/__init__.py index 7e13c426a..a3f5d35c6 100644 --- a/openedx_learning/apps/authoring/publishing/models/__init__.py +++ b/openedx_learning/apps/authoring/publishing/models/__init__.py @@ -14,14 +14,20 @@ """ from .container import Container, ContainerVersion -from .draft_log import Draft, DraftChangeLog, DraftChangeLogRecord, DraftSideEffect +from .draft_log import ( + Draft, + DraftChangeLog, + DraftChangeLogRecord, + DraftSideEffect, +) from .entity_list import EntityList, EntityListRow from .learning_package import LearningPackage -from .publish_log import Published, PublishLog, PublishLogRecord +from .publish_log import Published, PublishLog, PublishLogRecord, PublishSideEffect from .publishable_entity import ( PublishableContentModelRegistry, PublishableEntity, PublishableEntityMixin, PublishableEntityVersion, + PublishableEntityVersionDependency, PublishableEntityVersionMixin, ) diff --git a/openedx_learning/apps/authoring/publishing/models/draft_log.py b/openedx_learning/apps/authoring/publishing/models/draft_log.py index 6e85a6ec8..88e45718d 100644 --- a/openedx_learning/apps/authoring/publishing/models/draft_log.py +++ b/openedx_learning/apps/authoring/publishing/models/draft_log.py @@ -3,9 +3,14 @@ """ from django.conf import settings from django.db import models +from django.db.models import F, Q, QuerySet from django.utils.translation import gettext_lazy as _ -from openedx_learning.lib.fields import immutable_uuid_field, manual_date_time_field +from openedx_learning.lib.fields import ( + hash_field, + immutable_uuid_field, + manual_date_time_field, +) from .learning_package import LearningPackage from .publishable_entity import PublishableEntity, PublishableEntityVersion @@ -56,6 +61,60 @@ class Draft(models.Model): blank=True, ) + # The dependencies_hash_digest is used when the version alone isn't enough + # to let us know the full draft state of an entity. This happens any time a + # Draft version has dependencies (see the PublishableEntityVersionDependency + # model), because changes in those dependencies will cause changes to the + # state of the Draft. The main example of this is containers, where changing + # an unpinned child affects the state of the parent container, even if that + # container's definition (and thus version) does not change. + # + # If a Draft has no dependencies, then its entire state is captured by its + # version, and the dependencies_hash_digest is blank. (Blank is slightly + # more convenient for database comparisons than NULL.) + # + # Note: There is an equivalent of this field in the Published model and the + # the values may drift away from each other. + dependencies_hash_digest = hash_field(blank=True, default='', max_length=8) + + + class DraftQuerySet(models.QuerySet): + """ + Custom QuerySet/Manager so we can chain common queries. + """ + + def with_unpublished_changes(self): + """ + Drafts with versions that are different from what is Published. + + This will not return Drafts that have unpublished changes in their + dependencies. Example: A Unit is published with a Component as one + of its child. Then someone modifies the draft of the Component. If + both the Unit and the Component Drafts were part of the queryset, + this method would return only the changed Component, and not the + Unit. (We can add this as an optional flag later if we want.) + """ + return ( + self.select_related("entity__published__version") + # Exclude where draft and published versions are the same + .exclude(entity__published__version_id=F("version_id")) + + # Account for soft-deletes: + # NULL != NULL in SQL, so simply excluding entities where + # the Draft and Published versions match will not catch the + # case where a soft-delete has been published (i.e. both the + # Draft and Published versions are NULL). We need to + # explicitly check for that case instead, or else we will + # re-publish the same soft-deletes over and over again. + .exclude( + Q(version__isnull=True) & + Q(entity__published__version__isnull=True) + ) + ) + + objects = DraftQuerySet.as_manager() + + class DraftChangeLog(models.Model): """ diff --git a/openedx_learning/apps/authoring/publishing/models/entity_list.py b/openedx_learning/apps/authoring/publishing/models/entity_list.py index 19dab09ae..252ea2e37 100644 --- a/openedx_learning/apps/authoring/publishing/models/entity_list.py +++ b/openedx_learning/apps/authoring/publishing/models/entity_list.py @@ -1,6 +1,8 @@ """ Entity List models """ +from functools import cached_property + from django.db import models from .publishable_entity import PublishableEntity, PublishableEntityVersion @@ -16,6 +18,17 @@ class EntityList(models.Model): anonymous in a sense–they're pointed to by ContainerVersions and other models, rather than being looked up by their own identifiers. """ + @cached_property + def rows(self): + """ + Convenience method to iterate rows. + + I'd normally make this the reverse lookup name for the EntityListRow -> + EntityList foreign key relation, but we already have references to + entitylistrow_set in various places, and I thought this would be better + than breaking compatibility. + """ + return self.entitylistrow_set.order_by("order_num") class EntityListRow(models.Model): @@ -59,6 +72,12 @@ class EntityListRow(models.Model): related_name="+", # Do we need the reverse relation? ) + def is_pinned(self): + return self.entity_version_id is not None + + def is_unpinned(self): + return self.entity_version_id is None + class Meta: ordering = ["order_num"] constraints = [ diff --git a/openedx_learning/apps/authoring/publishing/models/publish_log.py b/openedx_learning/apps/authoring/publishing/models/publish_log.py index 037f1b8b1..63b885269 100644 --- a/openedx_learning/apps/authoring/publishing/models/publish_log.py +++ b/openedx_learning/apps/authoring/publishing/models/publish_log.py @@ -3,8 +3,14 @@ """ from django.conf import settings from django.db import models +from django.utils.translation import gettext_lazy as _ -from openedx_learning.lib.fields import case_insensitive_char_field, immutable_uuid_field, manual_date_time_field +from openedx_learning.lib.fields import ( + case_insensitive_char_field, + hash_field, + immutable_uuid_field, + manual_date_time_field, +) from .learning_package import LearningPackage from .publishable_entity import PublishableEntity, PublishableEntityVersion @@ -61,6 +67,15 @@ class PublishLogRecord(models.Model): To revert a publish, we would make a new publish that swaps ``old_version`` and ``new_version`` field values. + + If the old_version and new_version of a PublishLogRecord match, it means + that the definition of the entity itself did not change (i.e. no new + PublishableEntityVersion was created), but something else was published that + had the side-effect of changing the published state of this entity. For + instance, if a Unit has unpinned references to its child Components (which + it almost always will), then publishing one of those Components will alter + the published state of the Unit, even if the UnitVersion does not change. In + that case, we still consider the Unit to have been "published". """ publish_log = models.ForeignKey( @@ -145,6 +160,72 @@ class Published(models.Model): on_delete=models.RESTRICT, ) + # The dependencies_hash_digest is used when the version alone isn't enough + # to let us know the full draft state of an entity. This happens any time a + # Published version has dependencies (see the + # PublishableEntityVersionDependency model), because changes in those + # dependencies will cause changes to the state of the Draft. The main + # example of this is containers, where changing an unpinned child affects + # the state of the parent container, even if that container's definition + # (and thus version) does not change. + # + # If a Published version has no dependencies, then its entire state is + # captured by its version, and the dependencies_hash_digest is blank. (Blank + # is slightly more convenient for database comparisons than NULL.) + # + # Note: There is an equivalent of this field in the Draft model and the + # the values may drift away from each other. + dependencies_hash_digest = hash_field(blank=True, default='', max_length=8) + class Meta: verbose_name = "Published Entity" verbose_name_plural = "Published Entities" + + +class PublishSideEffect(models.Model): + """ + Model to track when a change in one Published entity affects others. + + Our first use case for this is that changes involving child components are + thought to affect parent Units, even if the parent's version doesn't change. + + Side-effects are recorded in a collapsed form that only captures one level. + So if Components C1 and C2 are both published and they are part of Unit U1, + which is in turn a part of Subsection SS1, then the PublishSideEffect + entries are:: + + (C1, U1) + (C2, U1) + (U1, SS1) + + We do not keep entries for (C1, SS1) or (C2, SS1). This is to make the model + simpler, so we don't have to differentiate between direct side-effects and + transitive side-effects in the model. + + .. no_pii: + """ + cause = models.ForeignKey( + PublishLogRecord, + on_delete=models.RESTRICT, + related_name='causes', + ) + effect = models.ForeignKey( + PublishLogRecord, + on_delete=models.RESTRICT, + related_name='affected_by', + ) + + class Meta: + constraints = [ + # Duplicate entries for cause & effect are just redundant. This is + # here to guard against weird bugs that might introduce this state. + models.UniqueConstraint( + fields=["cause", "effect"], + name="oel_pub_pse_uniq_c_e", + ) + ] + verbose_name = _("Publish Side Effect") + verbose_name_plural = _("Publish Side Effects") + + + diff --git a/openedx_learning/apps/authoring/publishing/models/publishable_entity.py b/openedx_learning/apps/authoring/publishing/models/publishable_entity.py index d967a343b..ceadacb73 100644 --- a/openedx_learning/apps/authoring/publishing/models/publishable_entity.py +++ b/openedx_learning/apps/authoring/publishing/models/publishable_entity.py @@ -185,6 +185,8 @@ class PublishableEntityVersion(models.Model): connect them using a OneToOneField with primary_key=True. The easiest way to do this is to inherit from PublishableEntityVersionMixin. Be sure to treat these versioned models in your app as immutable as well. + + .. no_pii """ uuid = immutable_uuid_field() @@ -221,6 +223,12 @@ class PublishableEntityVersion(models.Model): blank=True, ) + dependencies = models.ManyToManyField( + PublishableEntity, + through="PublishableEntityVersionDependency", + related_name="affects", + ) + def __str__(self): return f"{self.entity.key} @ v{self.version_num} - {self.title}" @@ -265,11 +273,47 @@ class Meta: verbose_name_plural = "Publishable Entity Versions" +class PublishableEntityVersionDependency(models.Model): + """ + Track the PublishableEntities that a PublishableEntityVersion depends on. + + For example, a partcular version of a Unit (U1.v1) might be defined to have + unpinned references to Components C1 and C2. That means that any changes in + C1 or C2 will affect U1.v1 via DraftSideEffects and PublishedSideEffects. We + say that C1 and C2 are dependencies of U1.v1. + + An important restriction is that a PublishableEntityVersion's list of + dependencies are defined when the version is created. It is not modified + after that. No matter what happens to C1 or C2 (e.g. edit, deletion, + un-deletion, reset-draft-version-to-published), they will always be + dependencies of U1.v1. + + If someone removes C2 from U1, then that requires creating a new version of + U1 (so U2.v2). + + This restriction is important because our ability to calculate and cache the + state of "this version of this publishable entity and all its dependencies + (children)" relies on this being true. + + .. no_pii + """ + version = models.ForeignKey(PublishableEntityVersion, on_delete=models.RESTRICT) + entity = models.ForeignKey(PublishableEntity, on_delete=models.RESTRICT) + + class Meta: + constraints = [ + models.UniqueConstraint( + fields=["version", "entity"], + name="oel_pevd_uniq_version_entity", + ) + ] + + class PublishableEntityMixin(models.Model): """ Convenience mixin to link your models against PublishableEntity. - Please see docstring for PublishableEntity for more details. + Please see docstring for PublishableEntity for more details.FF If you use this class, you *MUST* also use PublishableEntityVersionMixin and the publishing app's api.register_publishable_models (see its docstring for diff --git a/openedx_learning/apps/authoring/units/api.py b/openedx_learning/apps/authoring/units/api.py index 7256c7060..2bce34eea 100644 --- a/openedx_learning/apps/authoring/units/api.py +++ b/openedx_learning/apps/authoring/units/api.py @@ -177,7 +177,7 @@ def create_unit_and_version( can_stand_alone: Set to False when created as part of containers """ entity_rows = _pub_entities_for_components(components) - with atomic(): + with atomic(savepoint=False): unit = create_unit( learning_package_id, key, diff --git a/openedx_learning/lib/fields.py b/openedx_learning/lib/fields.py index 99b7d5251..084c96ff9 100644 --- a/openedx_learning/lib/fields.py +++ b/openedx_learning/lib/fields.py @@ -19,11 +19,12 @@ from .validators import validate_utc_datetime -def create_hash_digest(data_bytes: bytes) -> str: +def create_hash_digest(data_bytes: bytes, num_bytes=20) -> str: """ - Create a 40-byte, lower-case hex string representation of a hash digest. + Create a lower-case hex string representation of a hash digest. - The hash digest itself is 20-bytes using BLAKE2b. + The hash digest itself is 20-bytes using BLAKE2b (so 40 characters when hex + encoded). DON'T JUST MODIFY THIS HASH BEHAVIOR!!! We use hashing for de-duplication purposes. If this hash function ever changes, that deduplication will fail @@ -32,7 +33,7 @@ def create_hash_digest(data_bytes: bytes) -> str: If we want to change this representation one day, we should create a new function for that and do the appropriate data migration. """ - return hashlib.blake2b(data_bytes, digest_size=20).hexdigest() + return hashlib.blake2b(data_bytes, digest_size=num_bytes).hexdigest() def case_insensitive_char_field(**kwargs) -> MultiCollationCharField: @@ -123,7 +124,7 @@ def key_field(**kwargs) -> MultiCollationCharField: return case_sensitive_char_field(max_length=500, blank=False, **kwargs) -def hash_field() -> models.CharField: +def hash_field(**kwargs) -> models.CharField: """ Holds a hash digest meant to identify a piece of content. @@ -144,13 +145,13 @@ def hash_field() -> models.CharField: didn't seem worthwhile, particularly the possibility of case-sensitivity related bugs. """ - return models.CharField( - max_length=40, - blank=False, - null=False, - editable=False, - ) - + default_kwargs = { + "max_length": 40, + "blank": False, + "null": False, + "editable": False, + } + return models.CharField(**(default_kwargs | kwargs)) def manual_date_time_field() -> models.DateTimeField: """ diff --git a/tests/openedx_learning/apps/authoring/publishing/test_api.py b/tests/openedx_learning/apps/authoring/publishing/test_api.py index 0ab4d33c8..899f129d9 100644 --- a/tests/openedx_learning/apps/authoring/publishing/test_api.py +++ b/tests/openedx_learning/apps/authoring/publishing/test_api.py @@ -20,6 +20,7 @@ DraftSideEffect, LearningPackage, PublishableEntity, + PublishLog, ) from openedx_learning.lib.test_utils import TestCase @@ -835,6 +836,108 @@ def test_multiple_draft_changes_all_cancel_out(self) -> None: assert DraftChangeLog.objects.all().count() == 1 +class PublishLogTestCase(TestCase): + """ + Test basic operations with PublishLogs and PublishSideEffects. + """ + now: datetime + learning_package_1: LearningPackage + learning_package_2: LearningPackage + + @classmethod + def setUpTestData(cls) -> None: + cls.now = datetime(2024, 1, 28, 16, 45, 30, tzinfo=timezone.utc) + cls.learning_package_1 = publishing_api.create_learning_package( + "my_package_key_1", + "PublishLog Testing LearningPackage 🔥 1", + created=cls.now, + ) + cls.learning_package_2 = publishing_api.create_learning_package( + "my_package_key_2", + "PublishLog Testing LearningPackage 🔥 2", + created=cls.now, + ) + + def test_simple_publish_log(self) -> None: + """ + Simplest test that multiple writes make it into one PublishLog. + """ + with publishing_api.bulk_draft_changes_for(self.learning_package_1.id): + entity1 = publishing_api.create_publishable_entity( + self.learning_package_1.id, + "my_entity", + created=self.now, + created_by=None, + ) + entity1_v1 = publishing_api.create_publishable_entity_version( + entity1.id, + version_num=1, + title="An Entity 🌴", + created=self.now, + created_by=None, + ) + entity2 = publishing_api.create_publishable_entity( + self.learning_package_1.id, + "my_entity2", + created=self.now, + created_by=None, + ) + entity2_v1 = publishing_api.create_publishable_entity_version( + entity2.id, + version_num=1, + title="An Entity 🌴 2", + created=self.now, + created_by=None, + ) + + # Check simplest publish of two things... + publish_log = publishing_api.publish_all_drafts(self.learning_package_1.id) + assert PublishLog.objects.all().count() == 1 + assert publish_log.records.all().count() == 2 + + record_1 = publish_log.records.get(entity=entity1) + assert record_1 is not None + assert record_1.old_version is None + assert record_1.new_version == entity1_v1 + + record_2 = publish_log.records.get(entity=entity2) + assert record_2 is not None + assert record_2.old_version is None + assert record_2.new_version == entity2_v1 + + # Check that an empty publish still creates a PublishLog, just one with + # no records. This may be useful for triggering tasks that trigger off + # of publishing events, though I'm not confident about that at the + # moment. + publish_log = publishing_api.publish_all_drafts(self.learning_package_1.id) + assert publish_log.records.count() == 0 + + # Check that we can publish a subset... + entity1_v2 = publishing_api.create_publishable_entity_version( + entity1.id, + version_num=2, + title="An Entity 🌴", + created=self.now, + created_by=None, + ) + publishing_api.create_publishable_entity_version( + entity2.id, + version_num=2, + title="An Entity 🌴 2", + created=self.now, + created_by=None, + ) + publish_e1_log = publishing_api.publish_from_drafts( + self.learning_package_1.id, + Draft.objects.filter(pk=entity1.pk), + ) + assert publish_e1_log.records.count() == 1 + e1_pub_record = publish_e1_log.records.get(entity=entity1) + assert e1_pub_record is not None + assert e1_pub_record.old_version == entity1_v1 + assert e1_pub_record.new_version == entity1_v2 + + class ContainerTestCase(TestCase): """ Test basic operations with Drafts. @@ -1020,6 +1123,54 @@ def test_bulk_parent_child_side_effects(self) -> None: assert caused_by_child_1.effect == container_change assert caused_by_child_2.effect == container_change + def test_draft_dependency_multiple_parents(self): + """ + Test that a change in a draft component affects multiple parents. + + This is the scenario where one Component is contained by multiple Units. + """ + # Set up a Component that lives in two Units + component = publishing_api.create_publishable_entity( + self.learning_package.id, "component_1", created=self.now, created_by=None, + ) + publishing_api.create_publishable_entity_version( + component.id, version_num=1, title="Component 1 🌴", created=self.now, created_by=None, + ) + unit_1 = publishing_api.create_container( + self.learning_package.id, "unit_1", created=self.now, created_by=None, + ) + unit_2 = publishing_api.create_container( + self.learning_package.id, "unit_2", created=self.now, created_by=None, + ) + for unit in [unit_1, unit_2]: + publishing_api.create_container_version( + unit.pk, + 1, + title="My Unit", + entity_rows=[ + publishing_api.ContainerEntityRow(entity_pk=component.pk), + ], + created=self.now, + created_by=None, + ) + + # At this point there should be no side effects because we created + # everything from the bottom-up. + assert not DraftSideEffect.objects.all().exists() + + # Now let's change the Component and make sure it created side-effects + # for both Units. + publishing_api.create_publishable_entity_version( + component.id, version_num=2, title="Component 1.2 🌴", created=self.now, created_by=None, + ) + side_effects = DraftSideEffect.objects.all() + assert side_effects.count() == 2 + assert side_effects.filter(cause__entity=component).count() == 2 + assert side_effects.filter(effect__entity=unit_1.publishable_entity).count() == 1 + assert side_effects.filter(effect__entity=unit_2.publishable_entity).count() == 1 + + + def test_multiple_layers_of_containers(self): """Test stacking containers three layers deep.""" # Note that these aren't real "components" and "units". Everything being @@ -1079,6 +1230,75 @@ def test_multiple_layers_of_containers(self): assert subsection_change.affected_by.count() == 1 assert subsection_change.affected_by.first().cause == unit_change + publish_log = publishing_api.publish_all_drafts(self.learning_package.id) + assert publish_log.records.count() == 3 + + publishing_api.create_publishable_entity_version( + component.pk, version_num=3, title="Component v2", created=self.now, created_by=None, + ) + publish_log = publishing_api.publish_from_drafts( + self.learning_package.id, + Draft.objects.filter(entity_id=component.pk), + ) + assert publish_log.records.count() == 3 + component_publish = publish_log.records.get(entity=component) + unit_publish = publish_log.records.get(entity=unit.publishable_entity) + subsection_publish = publish_log.records.get(entity=subsection.publishable_entity) + + assert not component_publish.affected_by.exists() + assert unit_publish.affected_by.count() == 1 + assert unit_publish.affected_by.first().cause == component_publish + assert subsection_publish.affected_by.count() == 1 + assert subsection_publish.affected_by.first().cause == unit_publish + + def test_publish_all_layers(self): + """Test that we can publish multiple layers from one root.""" + # Note that these aren't real "components" and "units". Everything being + # tested is confined to the publishing app, so those concepts shouldn't + # be imported here. They're just named this way to make it more obvious + # what the intended hierarchy is for testing container nesting. + component = publishing_api.create_publishable_entity( + self.learning_package.id, "component_1", created=self.now, created_by=None, + ) + publishing_api.create_publishable_entity_version( + component.id, version_num=1, title="Component 1 🌴", created=self.now, created_by=None, + ) + unit = publishing_api.create_container( + self.learning_package.id, "unit_1", created=self.now, created_by=None, + ) + publishing_api.create_container_version( + unit.pk, + 1, + title="My Unit", + entity_rows=[ + publishing_api.ContainerEntityRow(entity_pk=component.pk), + ], + created=self.now, + created_by=None, + ) + subsection = publishing_api.create_container( + self.learning_package.id, "subsection_1", created=self.now, created_by=None, + ) + publishing_api.create_container_version( + subsection.pk, + 1, + title="My Subsection", + entity_rows=[ + publishing_api.ContainerEntityRow(entity_pk=unit.pk), + ], + created=self.now, + created_by=None, + ) + publish_log = publishing_api.publish_from_drafts( + self.learning_package.id, + Draft.objects.filter(pk=subsection.pk), + ) + + # The component, unit, and subsection should all be accounted for in + # the publish log records. + assert publish_log.records.count() == 3 + + class EntitiesQueryTestCase(TestCase): """ diff --git a/tests/openedx_learning/apps/authoring/sections/test_api.py b/tests/openedx_learning/apps/authoring/sections/test_api.py index 4b269d3ef..16038b6ad 100644 --- a/tests/openedx_learning/apps/authoring/sections/test_api.py +++ b/tests/openedx_learning/apps/authoring/sections/test_api.py @@ -214,9 +214,9 @@ def test_create_section_queries(self): Test how many database queries are required to create a section """ # The exact numbers here aren't too important - this is just to alert us if anything significant changes. - with self.assertNumQueries(25): + with self.assertNumQueries(29): _empty_section = self.create_section_with_subsections([]) - with self.assertNumQueries(30): + with self.assertNumQueries(36): # And try with a non-empty section: self.create_section_with_subsections([self.subsection_1, self.subsection_2_v1], key="u2") @@ -684,12 +684,12 @@ def test_query_count_of_contains_unpublished_changes(self): section = self.create_section_with_subsections(subsections) authoring_api.publish_all_drafts(self.learning_package.id) section.refresh_from_db() - with self.assertNumQueries(6): + with self.assertNumQueries(1): assert authoring_api.contains_unpublished_changes(section.pk) is False # Modify the most recently created subsection: self.modify_subsection(subsection, title="Modified Subsection") - with self.assertNumQueries(5): + with self.assertNumQueries(1): assert authoring_api.contains_unpublished_changes(section.pk) is True def test_metadata_change_doesnt_create_entity_list(self): diff --git a/tests/openedx_learning/apps/authoring/subsections/test_api.py b/tests/openedx_learning/apps/authoring/subsections/test_api.py index 847336611..4866deaee 100644 --- a/tests/openedx_learning/apps/authoring/subsections/test_api.py +++ b/tests/openedx_learning/apps/authoring/subsections/test_api.py @@ -197,9 +197,9 @@ def test_create_subsection_queries(self): Test how many database queries are required to create a subsection """ # The exact numbers here aren't too important - this is just to alert us if anything significant changes. - with self.assertNumQueries(25): + with self.assertNumQueries(29): _empty_subsection = self.create_subsection_with_units([]) - with self.assertNumQueries(30): + with self.assertNumQueries(36): # And try with a non-empty subsection: self.create_subsection_with_units([self.unit_1, self.unit_2_v1], key="u2") @@ -695,12 +695,12 @@ def test_query_count_of_contains_unpublished_changes(self): subsection = self.create_subsection_with_units(units) authoring_api.publish_all_drafts(self.learning_package.id) subsection.refresh_from_db() - with self.assertNumQueries(6): + with self.assertNumQueries(1): assert authoring_api.contains_unpublished_changes(subsection.pk) is False # Modify the most recently created unit: self.modify_unit(unit, title="Modified Unit") - with self.assertNumQueries(5): + with self.assertNumQueries(1): assert authoring_api.contains_unpublished_changes(subsection.pk) is True def test_metadata_change_doesnt_create_entity_list(self): diff --git a/tests/openedx_learning/apps/authoring/units/test_api.py b/tests/openedx_learning/apps/authoring/units/test_api.py index 3c3caa7d9..100aff7d2 100644 --- a/tests/openedx_learning/apps/authoring/units/test_api.py +++ b/tests/openedx_learning/apps/authoring/units/test_api.py @@ -185,9 +185,9 @@ def test_create_unit_queries(self): Test how many database queries are required to create a unit """ # The exact numbers here aren't too important - this is just to alert us if anything significant changes. - with self.assertNumQueries(25): + with self.assertNumQueries(27): _empty_unit = self.create_unit_with_components([]) - with self.assertNumQueries(29): + with self.assertNumQueries(33): # And try with a non-empty unit: self.create_unit_with_components([self.component_1, self.component_2_v1], key="u2") @@ -679,12 +679,12 @@ def test_query_count_of_contains_unpublished_changes(self): unit = self.create_unit_with_components(components) authoring_api.publish_all_drafts(self.learning_package.id) unit.refresh_from_db() - with self.assertNumQueries(2): + with self.assertNumQueries(1): assert authoring_api.contains_unpublished_changes(unit.pk) is False # Modify the most recently created component: self.modify_component(component, title="Modified Component") - with self.assertNumQueries(2): + with self.assertNumQueries(1): assert authoring_api.contains_unpublished_changes(unit.pk) is True def test_metadata_change_doesnt_create_entity_list(self): From 0af01c095a0d20e32446985989cde84f24d0ae30 Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Wed, 3 Sep 2025 12:15:09 -0400 Subject: [PATCH 02/25] temp: linter fixups --- .../apps/authoring/publishing/admin.py | 42 ++++++++++--------- .../apps/authoring/publishing/api.py | 14 +++++-- .../authoring/publishing/models/draft_log.py | 2 +- .../publishing/models/publish_log.py | 3 -- .../publishing/models/publishable_entity.py | 6 ++- .../apps/authoring/publishing/test_api.py | 2 - 6 files changed, 38 insertions(+), 31 deletions(-) diff --git a/openedx_learning/apps/authoring/publishing/admin.py b/openedx_learning/apps/authoring/publishing/admin.py index e24b7dfa4..31a201729 100644 --- a/openedx_learning/apps/authoring/publishing/admin.py +++ b/openedx_learning/apps/authoring/publishing/admin.py @@ -6,7 +6,7 @@ import functools from django.contrib import admin -from django.db.models import Count, F, Q +from django.db.models import Count, F from django.utils.html import format_html from django.utils.safestring import SafeText @@ -22,7 +22,6 @@ LearningPackage, PublishableEntity, PublishableEntityVersion, - PublishableEntityVersionDependency, PublishLog, PublishLogRecord, ) @@ -122,6 +121,9 @@ def get_queryset(self, request): ) class PublishStatusFilter(admin.SimpleListFilter): + """ + Custom filter for entities that have unpublished changes. + """ title = "publish status" parameter_name = "publish_status" @@ -143,6 +145,7 @@ def queryset(self, request, queryset): published__dependencies_hash_digest=F("draft__dependencies_hash_digest") ) ) + return queryset @admin.register(PublishableEntity) @@ -188,9 +191,22 @@ class PublishableEntityAdmin(ReadOnlyModelAdmin): "can_stand_alone", ] + def get_queryset(self, request): + queryset = super().get_queryset(request) + return queryset.select_related( + "learning_package", "published__version", "draft__version", "created_by" + ) + + def see_also(self, entity): + return one_to_one_related_model_html(entity) + def draft_version(self, entity: PublishableEntity): - from django.utils.html import format_html + """ + Version num + dependency hash if applicable, e.g. "5" or "5 (825064c2)" + If the version info is different from the published version, we + italicize the text for emphasis. + """ if hasattr(entity, "draft") and entity.draft.version: if entity.draft.dependencies_hash_digest: version_str = ( @@ -208,23 +224,9 @@ def draft_version(self, entity: PublishableEntity): return None def published_version(self, entity: PublishableEntity): - return entity.published.version.version_num if entity.published and entity.published.version else None - - def get_queryset(self, request): - queryset = super().get_queryset(request) - return queryset.select_related( - "learning_package", "published__version", "draft__version", "created_by" - ) - - def see_also(self, entity): - return one_to_one_related_model_html(entity) - - def published_version(self, entity): - if entity.published.version: - return entity.published.version.version_num - return None - - def published_version(self, entity: PublishableEntity): + """ + Version num + dependency hash if applicable, e.g. "5" or "5 (825064c2)" + """ if hasattr(entity, "published") and entity.published.version: if entity.published.dependencies_hash_digest: return ( diff --git a/openedx_learning/apps/authoring/publishing/api.py b/openedx_learning/apps/authoring/publishing/api.py index 233f25664..fea69d9b6 100644 --- a/openedx_learning/apps/authoring/publishing/api.py +++ b/openedx_learning/apps/authoring/publishing/api.py @@ -215,6 +215,7 @@ def create_publishable_entity_version( title: str, created: datetime, created_by: int | None, + *, dependencies: list[int] | None = None, # PublishableEntity IDs ) -> PublishableEntityVersion: """ @@ -424,7 +425,7 @@ def _get_dependencies_with_unpublished_changes( ).distinct() if not all_dependent_drafts: - return Draft.objects.none() + return [] unpublished_dependent_drafts = [ dependent_drafts_qset.with_unpublished_changes() @@ -440,6 +441,7 @@ def publish_from_drafts( message: str = "", published_at: datetime | None = None, published_by: int | None = None, # User.id + *, publish_dependencies: bool = True, ) -> PublishLog: """ @@ -631,7 +633,7 @@ def set_draft_version( learning_package_id ) if active_change_log: - change = _add_to_existing_draft_change_log( + _add_to_existing_draft_change_log( active_change_log, draft.entity_id, old_version_id=old_version_id, @@ -755,6 +757,9 @@ def _create_side_effects_for_change_log(change_log: DraftChangeLog | PublishLog) Note: The interface between ``DraftChangeLog`` and ``PublishLog`` is similar enough that this function has been made to work with both. """ + branch_cls: type[Draft] | type[Published] + change_record_cls: type[DraftChangeLogRecord] | type[PublishLogRecord] + side_effect_cls: type[DraftSideEffect] | type[PublishSideEffect] if isinstance(change_log, DraftChangeLog): branch_cls = Draft change_record_cls = DraftChangeLogRecord @@ -791,10 +796,11 @@ def _create_side_effects_for_change_log(change_log: DraftChangeLog | PublishLog) # already in the change_log, then we have to add it. affected_version_pk = affected.version_id + change_log_param = {} if branch_cls == Draft: - change_log_param = {'draft_change_log': original_change.draft_change_log} + change_log_param['draft_change_log'] = original_change.draft_change_log elif branch_cls == Published: - change_log_param = {'publish_log': original_change.publish_log} + change_log_param['publish_log'] = original_change.publish_log # Example: If the original_change is a DraftChangeLogRecord that # represents editing a Component, the side_effect_change is the diff --git a/openedx_learning/apps/authoring/publishing/models/draft_log.py b/openedx_learning/apps/authoring/publishing/models/draft_log.py index 88e45718d..798c91a2a 100644 --- a/openedx_learning/apps/authoring/publishing/models/draft_log.py +++ b/openedx_learning/apps/authoring/publishing/models/draft_log.py @@ -3,7 +3,7 @@ """ from django.conf import settings from django.db import models -from django.db.models import F, Q, QuerySet +from django.db.models import F, Q from django.utils.translation import gettext_lazy as _ from openedx_learning.lib.fields import ( diff --git a/openedx_learning/apps/authoring/publishing/models/publish_log.py b/openedx_learning/apps/authoring/publishing/models/publish_log.py index 63b885269..a350b4167 100644 --- a/openedx_learning/apps/authoring/publishing/models/publish_log.py +++ b/openedx_learning/apps/authoring/publishing/models/publish_log.py @@ -226,6 +226,3 @@ class Meta: ] verbose_name = _("Publish Side Effect") verbose_name_plural = _("Publish Side Effects") - - - diff --git a/openedx_learning/apps/authoring/publishing/models/publishable_entity.py b/openedx_learning/apps/authoring/publishing/models/publishable_entity.py index ceadacb73..020d48b5e 100644 --- a/openedx_learning/apps/authoring/publishing/models/publishable_entity.py +++ b/openedx_learning/apps/authoring/publishing/models/publishable_entity.py @@ -1,6 +1,8 @@ """ PublishableEntity model and PublishableEntityVersion + mixins """ +from __future__ import annotations + from datetime import datetime from functools import cached_property from typing import ClassVar, Self @@ -223,7 +225,9 @@ class PublishableEntityVersion(models.Model): blank=True, ) - dependencies = models.ManyToManyField( + dependencies: models.ManyToManyField[ + PublishableEntity, PublishableEntityVersionDependency + ] = models.ManyToManyField( PublishableEntity, through="PublishableEntityVersionDependency", related_name="affects", diff --git a/tests/openedx_learning/apps/authoring/publishing/test_api.py b/tests/openedx_learning/apps/authoring/publishing/test_api.py index 899f129d9..d652863fe 100644 --- a/tests/openedx_learning/apps/authoring/publishing/test_api.py +++ b/tests/openedx_learning/apps/authoring/publishing/test_api.py @@ -1170,7 +1170,6 @@ def test_draft_dependency_multiple_parents(self): assert side_effects.filter(effect__entity=unit_2.publishable_entity).count() == 1 - def test_multiple_layers_of_containers(self): """Test stacking containers three layers deep.""" # Note that these aren't real "components" and "units". Everything being @@ -1299,7 +1298,6 @@ def test_publish_all_layers(self): assert publish_log.records.count() == 3 - class EntitiesQueryTestCase(TestCase): """ Tests for querying PublishableEntity objects. From 00e6ea67b269f47f2bcf8315837738b2198ca101 Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Wed, 3 Sep 2025 13:19:58 -0400 Subject: [PATCH 03/25] temp: remove redundant early return, add comments --- openedx_learning/apps/authoring/publishing/api.py | 5 +---- openedx_learning/lib/fields.py | 4 ++-- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/openedx_learning/apps/authoring/publishing/api.py b/openedx_learning/apps/authoring/publishing/api.py index fea69d9b6..f569263ce 100644 --- a/openedx_learning/apps/authoring/publishing/api.py +++ b/openedx_learning/apps/authoring/publishing/api.py @@ -250,7 +250,7 @@ def set_version_dependencies( dependencies: list[int] # List of PublishableEntity.id ) -> None: """ - Set the depenencies of a publishable entity version. + Set the dependencies of a publishable entity version. In general, callers should not modify dependencies after creation (i.e. use the optional param in create_publishable_entity_version() instead of using @@ -424,9 +424,6 @@ def _get_dependencies_with_unpublished_changes( entity__affects__in=dependent_drafts.all().values_list("entity_id", flat=True) ).distinct() - if not all_dependent_drafts: - return [] - unpublished_dependent_drafts = [ dependent_drafts_qset.with_unpublished_changes() for dependent_drafts_qset in all_dependent_drafts diff --git a/openedx_learning/lib/fields.py b/openedx_learning/lib/fields.py index 084c96ff9..5d2bbddbb 100644 --- a/openedx_learning/lib/fields.py +++ b/openedx_learning/lib/fields.py @@ -23,8 +23,8 @@ def create_hash_digest(data_bytes: bytes, num_bytes=20) -> str: """ Create a lower-case hex string representation of a hash digest. - The hash digest itself is 20-bytes using BLAKE2b (so 40 characters when hex - encoded). + The hash itself is 20-bytes by default, so 40 characters when we return it + as a hex-encoded string. We use BLAKE2b for the hashing algorithm. DON'T JUST MODIFY THIS HASH BEHAVIOR!!! We use hashing for de-duplication purposes. If this hash function ever changes, that deduplication will fail From a74d8b1b4ce9ff67c6ef06cc8166fecc0c59e971 Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Thu, 2 Oct 2025 12:20:26 -0400 Subject: [PATCH 04/25] refactor: models part of the refactoring of putting the dependency data in the log models --- ...draft_dependencies_hash_digest_and_more.py | 57 ------- .../migrations/0010_backfill_dependencies.py | 145 ------------------ .../authoring/publishing/models/draft_log.py | 42 +++-- .../publishing/models/publish_log.py | 34 ++-- 4 files changed, 42 insertions(+), 236 deletions(-) delete mode 100644 openedx_learning/apps/authoring/publishing/migrations/0009_draft_dependencies_hash_digest_and_more.py delete mode 100644 openedx_learning/apps/authoring/publishing/migrations/0010_backfill_dependencies.py diff --git a/openedx_learning/apps/authoring/publishing/migrations/0009_draft_dependencies_hash_digest_and_more.py b/openedx_learning/apps/authoring/publishing/migrations/0009_draft_dependencies_hash_digest_and_more.py deleted file mode 100644 index a249eee62..000000000 --- a/openedx_learning/apps/authoring/publishing/migrations/0009_draft_dependencies_hash_digest_and_more.py +++ /dev/null @@ -1,57 +0,0 @@ -# Generated by Django 4.2.23 on 2025-08-27 03:46 - -from django.db import migrations, models -import django.db.models.deletion - - -class Migration(migrations.Migration): - - dependencies = [ - ('oel_publishing', '0008_alter_draftchangelogrecord_options_and_more'), - ] - - operations = [ - migrations.AddField( - model_name='draft', - name='dependencies_hash_digest', - field=models.CharField(blank=True, default='', editable=False, max_length=8), - ), - migrations.AddField( - model_name='published', - name='dependencies_hash_digest', - field=models.CharField(blank=True, default='', editable=False, max_length=8), - ), - migrations.CreateModel( - name='PublishSideEffect', - fields=[ - ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('cause', models.ForeignKey(on_delete=django.db.models.deletion.RESTRICT, related_name='causes', to='oel_publishing.publishlogrecord')), - ('effect', models.ForeignKey(on_delete=django.db.models.deletion.RESTRICT, related_name='affected_by', to='oel_publishing.publishlogrecord')), - ], - options={ - 'verbose_name': 'Publish Side Effect', - 'verbose_name_plural': 'Publish Side Effects', - }, - ), - migrations.CreateModel( - name='PublishableEntityVersionDependency', - fields=[ - ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('entity', models.ForeignKey(on_delete=django.db.models.deletion.RESTRICT, to='oel_publishing.publishableentity')), - ('version', models.ForeignKey(on_delete=django.db.models.deletion.RESTRICT, to='oel_publishing.publishableentityversion')), - ], - ), - migrations.AddField( - model_name='publishableentityversion', - name='dependencies', - field=models.ManyToManyField(related_name='affects', through='oel_publishing.PublishableEntityVersionDependency', to='oel_publishing.publishableentity'), - ), - migrations.AddConstraint( - model_name='publishsideeffect', - constraint=models.UniqueConstraint(fields=('cause', 'effect'), name='oel_pub_pse_uniq_c_e'), - ), - migrations.AddConstraint( - model_name='publishableentityversiondependency', - constraint=models.UniqueConstraint(fields=('version', 'entity'), name='oel_pevd_uniq_version_entity'), - ), - ] diff --git a/openedx_learning/apps/authoring/publishing/migrations/0010_backfill_dependencies.py b/openedx_learning/apps/authoring/publishing/migrations/0010_backfill_dependencies.py deleted file mode 100644 index b7b0a73dd..000000000 --- a/openedx_learning/apps/authoring/publishing/migrations/0010_backfill_dependencies.py +++ /dev/null @@ -1,145 +0,0 @@ -""" -Backfill PublishableEntityVersionDependency entries based on ContainerVersions. - -We're introducing a lower-level publishing concept of a dependency that will be -used by Containers, but this means we have to backfill that dependency info for -existing Containers in the system. -""" -# Generated by Django 4.2.23 on 2025-09-01 19:16 - -from django.db import migrations -from django.db.models import Prefetch - - -def create_backfill(apps, schema_editor): - """ - Create dependency entries and update dep hashes for Draft and Published. - """ - _create_dependencies(apps) - _update_draft_dependencies_hash(apps) - _update_published_dependencies_hash(apps) - - -def _create_dependencies(apps): - """ - Populate the PublishableEntityVersion.dependencies relation. - - The only ones we should have in the system at this point are the ones from - containers, so we query ContainerVersion for that. - """ - PublishableEntityVersionDependency = apps.get_model( - "oel_publishing", "PublishableEntityVersionDependency" - ) - ContainerVersion = apps.get_model("oel_publishing", "ContainerVersion") - - for container_version in ContainerVersion.objects.all(): - # using a set to de-dupe - child_entity_ids = set( - container_version - .entity_list - .entitylistrow_set - .all() - .values_list("entity_id", flat=True) - ) - PublishableEntityVersionDependency.objects.bulk_create( - [ - PublishableEntityVersionDependency( - version_id=container_version.pk, - entity_id=entity_id - ) - for entity_id in child_entity_ids - ], - ignore_conflicts=True, - ) - -def _update_draft_dependencies_hash(apps): - """ - Update all container Draft.dependencies_hash_digest - - Backfill dependency state hashes. The important thing here is that things - without dependencies will have the default (blank) state hash, so we only - need to query for Draft entries for Containers. - """ - from ..api import hash_for_branch_item - - Draft = apps.get_model("oel_publishing", "Draft") - drafts_that_can_have_deps = ( - Draft - .objects - .filter(version__isnull=False, entity__container__isnull=False) - .prefetch_related("version__dependencies") - ) - already_computed_hashes = {} - drafts_to_deps = { - draft: [ - entity.draft - for entity in draft.version.dependencies.all() - if hasattr(entity, "draft") and entity.draft.version - ] - for draft in drafts_that_can_have_deps - } - for draft in drafts_to_deps: - draft.dependencies_hash_digest = hash_for_branch_item( - draft, drafts_to_deps, already_computed_hashes - ) - Draft.objects.bulk_update(drafts_to_deps.keys(), ["dependencies_hash_digest"]) - - -def _update_published_dependencies_hash(apps): - """ - Update all container Published.dependencies_hash_digest - - Backfill dependency state hashes. The important thing here is that things - without dependencies will have the default (blank) state hash, so we only - need to query for Published entries for Containers. - """ - from ..api import hash_for_branch_item - - Published = apps.get_model("oel_publishing", "Published") - published_that_can_have_deps = ( - Published - .objects - .filter(version__isnull=False, entity__container__isnull=False) - .prefetch_related("version__dependencies") - ) - - already_computed_hashes = {} - published_to_deps = { - published: [ - entity.published - for entity in published.version.dependencies.all() - if hasattr(entity, "published") and entity.published.version - ] - for published in published_that_can_have_deps - } - for published in published_to_deps: - published.dependencies_hash_digest = hash_for_branch_item( - published, published_to_deps, already_computed_hashes - ) - Published.objects.bulk_update(published_to_deps.keys(), ["dependencies_hash_digest"]) - - -def remove_backfill(apps, schema_editor): - """ - Reset all dep hash values to default ('') and remove dependencies. - """ - Draft = apps.get_model("oel_publishing", "Draft") - Published = apps.get_model("oel_publishing", "Published") - PublishableEntityVersionDependency = apps.get_model( - "oel_publishing", "PublishableEntityVersionDependency" - ) - - Draft.objects.all().update(dependencies_hash_digest='') - Published.objects.all().update(dependencies_hash_digest='') - PublishableEntityVersionDependency.objects.all().delete() - - -class Migration(migrations.Migration): - - dependencies = [ - ('oel_publishing', '0009_draft_dependencies_hash_digest_and_more'), - ] - - operations = [ - migrations.RunPython(create_backfill, reverse_code=remove_backfill) - ] diff --git a/openedx_learning/apps/authoring/publishing/models/draft_log.py b/openedx_learning/apps/authoring/publishing/models/draft_log.py index 798c91a2a..9be43b70a 100644 --- a/openedx_learning/apps/authoring/publishing/models/draft_log.py +++ b/openedx_learning/apps/authoring/publishing/models/draft_log.py @@ -60,23 +60,15 @@ class Draft(models.Model): null=True, blank=True, ) - - # The dependencies_hash_digest is used when the version alone isn't enough - # to let us know the full draft state of an entity. This happens any time a - # Draft version has dependencies (see the PublishableEntityVersionDependency - # model), because changes in those dependencies will cause changes to the - # state of the Draft. The main example of this is containers, where changing - # an unpinned child affects the state of the parent container, even if that - # container's definition (and thus version) does not change. - # - # If a Draft has no dependencies, then its entire state is captured by its - # version, and the dependencies_hash_digest is blank. (Blank is slightly - # more convenient for database comparisons than NULL.) - # - # Note: There is an equivalent of this field in the Published model and the - # the values may drift away from each other. - dependencies_hash_digest = hash_field(blank=True, default='', max_length=8) - + # Note: this is actually a 1:1 relation in practice, but I'm keeping the + # definition more consistent with the Published model, which has an fkey + # to PublishLogRecord. Unlike PublishLogRecord, this fkey is a late + # addition to this data model, so we have to allow null values. + draft_log_record = models.ForeignKey( + "DraftChangeLogRecord", + on_delete=models.RESTRICT, + null=True, + ) class DraftQuerySet(models.QuerySet): """ @@ -261,6 +253,22 @@ class DraftChangeLogRecord(models.Model): PublishableEntityVersion, on_delete=models.RESTRICT, null=True, blank=True ) + # The dependencies_hash_digest is used when the version alone isn't enough + # to let us know the full draft state of an entity. This happens any time a + # Draft version has dependencies (see the PublishableEntityVersionDependency + # model), because changes in those dependencies will cause changes to the + # state of the Draft. The main example of this is containers, where changing + # an unpinned child affects the state of the parent container, even if that + # container's definition (and thus version) does not change. + # + # If a Draft has no dependencies, then its entire state is captured by its + # version, and the dependencies_hash_digest is blank. (Blank is slightly + # more convenient for database comparisons than NULL.) + # + # Note: There is an equivalent of this field in the Published model and the + # the values may drift away from each other. + dependencies_hash_digest = hash_field(blank=True, default='', max_length=8) + class Meta: constraints = [ # A PublishableEntity can have only one DraftLogRecord per DraftLog. diff --git a/openedx_learning/apps/authoring/publishing/models/publish_log.py b/openedx_learning/apps/authoring/publishing/models/publish_log.py index a350b4167..8efce56ae 100644 --- a/openedx_learning/apps/authoring/publishing/models/publish_log.py +++ b/openedx_learning/apps/authoring/publishing/models/publish_log.py @@ -95,6 +95,23 @@ class PublishLogRecord(models.Model): PublishableEntityVersion, on_delete=models.RESTRICT, null=True, blank=True ) + # The dependencies_hash_digest is used when the version alone isn't enough + # to let us know the full draft state of an entity. This happens any time a + # Published version has dependencies (see the + # PublishableEntityVersionDependency model), because changes in those + # dependencies will cause changes to the state of the Draft. The main + # example of this is containers, where changing an unpinned child affects + # the state of the parent container, even if that container's definition + # (and thus version) does not change. + # + # If a Published version has no dependencies, then its entire state is + # captured by its version, and the dependencies_hash_digest is blank. (Blank + # is slightly more convenient for database comparisons than NULL.) + # + # Note: There is an equivalent of this field in the Draft model and the + # the values may drift away from each other. + dependencies_hash_digest = hash_field(blank=True, default='', max_length=8) + class Meta: constraints = [ # A Publishable can have only one PublishLogRecord per PublishLog. @@ -160,23 +177,6 @@ class Published(models.Model): on_delete=models.RESTRICT, ) - # The dependencies_hash_digest is used when the version alone isn't enough - # to let us know the full draft state of an entity. This happens any time a - # Published version has dependencies (see the - # PublishableEntityVersionDependency model), because changes in those - # dependencies will cause changes to the state of the Draft. The main - # example of this is containers, where changing an unpinned child affects - # the state of the parent container, even if that container's definition - # (and thus version) does not change. - # - # If a Published version has no dependencies, then its entire state is - # captured by its version, and the dependencies_hash_digest is blank. (Blank - # is slightly more convenient for database comparisons than NULL.) - # - # Note: There is an equivalent of this field in the Draft model and the - # the values may drift away from each other. - dependencies_hash_digest = hash_field(blank=True, default='', max_length=8) - class Meta: verbose_name = "Published Entity" verbose_name_plural = "Published Entities" From dee35da5d3abc84acefe22dcc202955e2bc0486b Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Fri, 17 Oct 2025 14:08:23 -0400 Subject: [PATCH 05/25] temp: transitional state to deps in logs --- .../apps/authoring/publishing/api.py | 84 ++++++++++++++++++- 1 file changed, 81 insertions(+), 3 deletions(-) diff --git a/openedx_learning/apps/authoring/publishing/api.py b/openedx_learning/apps/authoring/publishing/api.py index f569263ce..55613f078 100644 --- a/openedx_learning/apps/authoring/publishing/api.py +++ b/openedx_learning/apps/authoring/publishing/api.py @@ -620,7 +620,6 @@ def set_draft_version( # The actual update of the Draft model is here. Everything after this # block is bookkeeping in our DraftChangeLog. draft.version_id = publishable_entity_version_pk - draft.save() # Check to see if we're inside a context manager for an active # DraftChangeLog (i.e. what happens if the caller is using the public @@ -630,12 +629,14 @@ def set_draft_version( learning_package_id ) if active_change_log: - _add_to_existing_draft_change_log( + draft.draft_log_record = _add_to_existing_draft_change_log( active_change_log, draft.entity_id, old_version_id=old_version_id, new_version_id=publishable_entity_version_pk, ) + draft.save() + # We explicitly *don't* create container side effects here because # there may be many changes in this DraftChangeLog, some of which # haven't been made yet. It wouldn't make sense to create a side @@ -654,12 +655,13 @@ def set_draft_version( changed_at=set_at, changed_by_id=set_by, ) - DraftChangeLogRecord.objects.create( + draft.draft_log_record = DraftChangeLogRecord.objects.create( draft_change_log=change_log, entity_id=draft.entity_id, old_version_id=old_version_id, new_version_id=publishable_entity_version_pk, ) + draft.save() _create_side_effects_for_change_log(change_log) def _add_to_existing_draft_change_log( @@ -995,6 +997,82 @@ def hash_for_branch_item( return digest +def hash_for_branch_item( + log_record: DraftChangeLogRecord | PublishLogRecord, + changed_records: dict[DraftChangeLogRecord, list[Draft]] | dict[PublishLogRecord, list[Published]], + already_computed_hashes: dict[DraftChangeLogRecord, str] | dict[PublishLogRecord, str], +) -> str: + """ + Calculate the dependencies_hash_digest for a log record. + + This will recursively go through the dependencies of the Draft or Published + altered by a DraftChangeLogRecord or a PublishLogRecord. + + We have to be extremely careful about changing the signature or behavior of + this function, as it's used in the 0010_backfill_dependencies data migration + that will be used to upgrade people from Teak to future versions. + + Args: + log_record: The DraftChangeLogRecord or PublishChangeLogRecord entry we + want to recalculate the dependencies hash for. + changed_items: A dict mapping Draft or Published to a list of Dependency + Draft or Published. The keys represent the full set of what has + changed in the change log (DraftChangeLog or PublishLog), and + includes things that were added as side-effects. The value lists + in this dict can include things that *didn't* change, if they happen + to be dependencies of things that did change. + already_computed_hashes: A cache to ensure we don't recompute the same + nodes over and over again. + + Note that we are doing potentially re-doing calculations for Draft and + Published, when that data already exists in DraftChangeLogRecord or + PublishLogRecord. This is intentional. The dependencies_hash_digest is a late + addition to the data model. We will backfill all existing Draft and Published + entries so that the DraftChangeLogRecord and PublishChangeLogRecord that they + point to will have dependencies_hash_digest properly set. However, we're not + going to go back through the history and re-compute the state for every log + record in the database, meaning that sometimes an old container's PublishLogRecord + """ + if log_record in already_computed_hashes: + return already_computed_hashes[log_record] + + # Base case #1: The log_record is a dependency of something that was + # affected by a change, but the dependency itself did not change in any way + # (neither directly, nor as a side-effect). + # + # Example: A Unit has two Components. One of the Components changed, forcing + # us to recalculate the dependencies_hash_digest for that Unit. Doing that + # recalculation requires us to fetch the dependencies_hash_digest of the + # unchanged child as well. + # + # These are the only values for dependencies_hash_digest that we can trust, + # because we know that the change log (DraftChangeLog or PublishLog) being + # processed right now will not alter them. + if log_record not in changed_records: + return log_record.dependencies_hash_digest + + # Base case #2: If there are no dependencies, the hash digest is set to ''. + # Note that it's possible that something that used to have a hash digest to + # get reset to '' if those dependencies are later removed. + dependencies = changed_records[log_record] + if not dependencies: + return '' + + # Normal recursive case + dependencies.sort(key=lambda d: d.version.uuid) + dep_state_entries = [ + f"{dep_record.version.uuid}:" + f"{hash_for_branch_item(dep_record, changed_records, already_computed_hashes)}" + for dep_record in dependencies + ] + summary_text = "\n".join(dep_state_entries) + + digest = create_hash_digest(summary_text.encode(), num_bytes=4) + already_computed_hashes[log_record] = digest + + return digest + + def soft_delete_draft(publishable_entity_id: int, /, deleted_by: int | None = None) -> None: """ Sets the Draft version to None. From 65793f0ed1395d97f57b1b76c6257f95d56e31b8 Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Tue, 28 Oct 2025 17:45:54 -0400 Subject: [PATCH 06/25] temp: mostly through refactor to putting hashes on the log records --- .../apps/authoring/publishing/api.py | 312 +++++++++++++++--- ...draft_log_record_and_dependency_hashing.py | 62 ++++ .../authoring/publishing/models/draft_log.py | 9 + .../publishing/models/publish_log.py | 4 + .../apps/authoring/sections/test_api.py | 4 +- .../apps/authoring/subsections/test_api.py | 4 +- .../apps/authoring/units/test_api.py | 4 +- 7 files changed, 341 insertions(+), 58 deletions(-) create mode 100644 openedx_learning/apps/authoring/publishing/migrations/0009_draft_draft_log_record_and_dependency_hashing.py diff --git a/openedx_learning/apps/authoring/publishing/api.py b/openedx_learning/apps/authoring/publishing/api.py index 55613f078..58f26792f 100644 --- a/openedx_learning/apps/authoring/publishing/api.py +++ b/openedx_learning/apps/authoring/publishing/api.py @@ -587,8 +587,8 @@ def set_draft_version( from Studio's editing point of view (see ``soft_delete_draft`` for more details). - Calling this function attaches a new DraftChangeLogRecordand attaches it to a - DraftChangeLog. + Calling this function attaches a new DraftChangeLogRecordand attaches it to + a DraftChangeLog. This function will create DraftSideEffect entries and properly add any containers that may have been affected by this draft update, UNLESS it is @@ -613,6 +613,21 @@ def set_draft_version( ) # If the Draft is already pointing at this version, there's nothing to do. + # + # THIS IS THE EDGE CASE: + # + # If we create something and soft delete it immediately after, then + # The key is this bit and the part after where we're writing the draft_version_id + # -- we can't do that this early. We have to let the whole log accumulate, and + # then write it out at once. If not, then we won't be able to see the + # previous version of that Draft's version... unless we look at the old + # draft_change_log_record? That might be even more confusing though. + + # Intuitively, we might build up the log and then execute it. But that + # doesn't work because in the middle of a bulk operation, you'd expect + # to see the new value for the draft. So we actually want to optimistically + # change it and then be able to roll it back in case there was no actual change. + old_version_id = draft.version_id if old_version_id == publishable_entity_version_pk: return @@ -629,19 +644,22 @@ def set_draft_version( learning_package_id ) if active_change_log: - draft.draft_log_record = _add_to_existing_draft_change_log( + _add_to_existing_draft_change_log( active_change_log, draft.entity_id, old_version_id=old_version_id, new_version_id=publishable_entity_version_pk, ) - draft.save() - - # We explicitly *don't* create container side effects here because - # there may be many changes in this DraftChangeLog, some of which - # haven't been made yet. It wouldn't make sense to create a side - # effect that says, "this Unit changed because this Component in it - # changed" if we were changing that same Unit later on in the same + # We don't set the draft.draft_log_record value yet, because it's + # still possible that future changes in the same DraftChangeLog will + # undo this one. When that happens, the DraftChangeLogRecord is + # removed (so as not to confuse it for side-effect changes). + # + # We also *don't* create container side effects here because there + # may be many changes in this DraftChangeLog, some of which haven't + # been made yet. It wouldn't make sense to create a side effect that + # says, "this Unit changed because this Component in it changed" if + # we were changing that same Unit later on in the same # DraftChangeLog, because that new Unit version might not even # include the child Component. So we'll let DraftChangeLogContext # do that work when it exits its context. @@ -655,23 +673,34 @@ def set_draft_version( changed_at=set_at, changed_by_id=set_by, ) - draft.draft_log_record = DraftChangeLogRecord.objects.create( + DraftChangeLogRecord.objects.create( draft_change_log=change_log, entity_id=draft.entity_id, old_version_id=old_version_id, new_version_id=publishable_entity_version_pk, ) - draft.save() - _create_side_effects_for_change_log(change_log) + +# draft.draft_log_record = DraftChangeLogRecord.objects.create( +# draft_change_log=change_log, +# entity_id=draft.entity_id, +# old_version_id=old_version_id, +# new_version_id=publishable_entity_version_pk, +# ) +# draft.save() + + # Rename this to something like process_draft_change_log, which + # then calls the fn() below, and then also update the contextmanager callback. + _process_draft_change_log(change_log) +# _create_side_effects_for_change_log(change_log) def _add_to_existing_draft_change_log( active_change_log: DraftChangeLog, entity_id: int, old_version_id: int | None, new_version_id: int | None, -) -> DraftChangeLogRecord: +) -> DraftChangeLogRecord | None: """ - Create or update a DraftChangeLogRecord for the active_change_log passed in. + Create, update, or delete a DraftChangeLogRecord for the active_change_log. The an active_change_log may have many DraftChangeLogRecords already associated with it. A DraftChangeLog can only have one DraftChangeLogRecord @@ -685,6 +714,13 @@ def _add_to_existing_draft_change_log( and the same entity_id but with versions: (None, v1), (v1, v2), (v2, v3); we would collapse them into one DraftChangeLogrecord with old_version = None and new_version = v3. + + This also means that if we make a change that undoes the previos change, it + will delete that DraftChangeLogRecord, e.g. (None, v1) -> (None, v2) -> + (None -> None), then this entry can be deleted because it didn't change + anything. This function should never be used for creating side-effect change + log records (the only place where it's normal to have the same old and new + versions). """ try: # Check to see if this PublishableEntity has already been changed in @@ -707,6 +743,7 @@ def _add_to_existing_draft_change_log( # old_version == new_version convention to record entities that have # changed purely due to side-effects. change.delete() + return None else: # Normal case: We update the new_version, but leave the old_version # as is. The old_version represents what the Draft was pointing to @@ -727,6 +764,22 @@ def _add_to_existing_draft_change_log( return change +def _process_draft_change_log(change_log: DraftChangeLog) -> None: + draft_objs_to_update = [] + for draft_log_record in change_log.records.select_related("entity__draft").all(): + draft = draft_log_record.entity.draft + draft.draft_log_record = draft_log_record + draft.version_id = draft_log_record.next_version_id + draft_objs_to_update.append(draft) + + Draft.objects.bulk_update( + draft_objs_to_update, + ['draft_log_record'] + ) + + _create_side_effects_for_change_log(change_log) + + def _create_side_effects_for_change_log(change_log: DraftChangeLog | PublishLog): """ Create the side-effects for a DraftChangeLog or PublishLog. @@ -788,6 +841,12 @@ def _create_side_effects_for_change_log(change_log: DraftChangeLog | PublishLog) changes_and_affected = [ (original_change, current) for current in affected_by_original_change ] + + # These are the Published or Draft objects where we need to repoint the + # log_record (publish_log_record or draft_change_log) to point to the + # side-effects. + branch_objs_to_update_with_side_effects = [] + while changes_and_affected: original_change, affected = changes_and_affected.pop() @@ -818,6 +877,20 @@ def _create_side_effects_for_change_log(change_log: DraftChangeLog | PublishLog) 'new_version_id': affected_version_pk } ) + # Update the current branch pointer (Draft or Published) for this + # entity to point to the side_effect_change (if it's not already). + # + # TODO: Make this more resilient to these objects not existing yet. + if branch_cls == Published: + published_obj = affected.entity.published + if published_obj.publish_log_record != side_effect_change: + published_obj.publish_log_record = side_effect_change + branch_objs_to_update_with_side_effects.append(published_obj) + elif branch_cls == Draft: + draft_obj = affected.entity.draft + if draft_obj.draft_log_record != side_effect_change: + draft_obj.draft_log_record = side_effect_change + branch_objs_to_update_with_side_effects.append(draft_obj) # Create a new side effect (DraftSideEffect or PublishSideEffect) to # record the relationship between the ``original_change`` and @@ -859,6 +932,12 @@ def _create_side_effects_for_change_log(change_log: DraftChangeLog | PublishLog) if affected.entity_id not in processed_entity_ids ) + if branch_cls == Published: + log_record_rel = "publish_log_record_id" + elif branch_cls == Draft: + log_record_rel = "draft_log_record_id" + branch_cls.objects.bulk_update(branch_objs_to_update_with_side_effects, [log_record_rel]) + _update_branch_dependencies_hash_digests(change_log) @@ -868,6 +947,12 @@ def _update_branch_dependencies_hash_digests( """ Update dependencies_hash_digest for Drafts or Published in a change log. + All the data for Draft/Published, DraftChangeLog/PublishLog, and + DraftChangeLogRecord/PublishLogRecord have been set at this point *except* + the dependencies_hash_digest of DraftChangeLogRecord/PublishLogRecord. Those + log records are newly created at this point, so dependencies_hash_digest are + set to their default values. + Args: change_log: A DraftChangeLog or PublishLog that already has all side-effects added to it. The Draft and Published models should @@ -875,65 +960,180 @@ def _update_branch_dependencies_hash_digests( """ if isinstance(change_log, DraftChangeLog): branch = "draft" - branch_cls = Draft + log_record_relation = "draft_log_record" + record_cls = DraftChangeLogRecord elif isinstance(change_log, PublishLog): branch = "published" - branch_cls = Published + log_record_relation = "publish_log_record" + record_cls = PublishLogRecord else: raise TypeError( f"expected DraftChangeLog or PublishLog, not {type(change_log)}" ) - dependencies_versions_prefetch = Prefetch( + dependencies_prefetch = Prefetch( "new_version__dependencies", queryset=( PublishableEntity.objects - .select_related(f"{branch}__version") - .order_by(f"{branch}__version__uuid") + .select_related( + f"{branch}__version", + f"{branch}__{log_record_relation}", + ) + .order_by(f"{branch}__version__uuid") ) ) - records = ( + changed_records = list( change_log.records - .filter(new_version__isnull=False) .select_related("new_version", f"entity__{branch}") - .prefetch_related(dependencies_versions_prefetch) + .prefetch_related(dependencies_prefetch) ) - changed_to_dependencies: dict[Draft, list[Draft]] | dict[Published, list[Published]] - changed_to_dependencies = {} - for record in records: + record_ids_to_hash_digests: dict[int, str | None] = {} + record_ids_to_live_deps: dict[int, list[PublishableEntity]] = {} + records_that_need_hashes = [] + + for record in changed_records: + # This is a soft-deletion, so the dependency hash is default/blank. We + # set this value in our record_ids_to_hash_digests cache, but we don't + # need to write it to the database because it's just the default value. if record.new_version is None: + record_ids_to_hash_digests[record.id] = '' continue - changed_branch_item = getattr(record.entity, branch, None) - if (not changed_branch_item) or (not changed_branch_item.version): - branch_item_dependencies = [] - else: - branch_item_dependencies = [ - getattr(entity, branch) - for entity in record.new_version.dependencies.all() - if hasattr(entity, branch) and getattr(entity, branch).version - ] + # Now check to see if the new version has "live" dependencies, i.e. + # dependencies that have not been deleted. + deps = list( + entity for entity in record.new_version.dependencies.all() + if hasattr(entity, branch) and getattr(entity, branch).version + ) - changed_to_dependencies[changed_branch_item] = branch_item_dependencies + # If there are no live dependencies, this log record also gets the + # default/blank value. + if not deps: + record_ids_to_hash_digests[record.id] = '' + continue + + # If we've gotten this far, it means that this record has dependencies + # and does need to get a hash computed for it. + records_that_need_hashes.append(record) + record_ids_to_live_deps[record.id] = deps + + untrusted_record_id_set = set(rec.id for rec in records_that_need_hashes) + for record in records_that_need_hashes: + record.dependencies_hash_digest = hash_for_log_record( + record, + record_ids_to_hash_digests, + record_ids_to_live_deps, + untrusted_record_id_set, + ) + + record_cls.objects.bulk_update( + records_that_need_hashes, + ['dependencies_hash_digest'], + ) + + + +def hash_for_log_record( + record: DraftChangeLogRecord | PublishLogRecord, + record_ids_to_hash_digests: dict, + record_ids_to_live_deps: dict, + untrusted_record_id_set: set, +) -> str: + """ + This code is a little convoluted because we're working hard to minimize + requests. + + When we say "dependency", we mean when one or more PublishableEntities are + dependencies of a PublishableEntityVersion. A change in the Draft or + Published state of the PublishableEntity causes an implicit change to the + PublishableEntityVersion. The common case we have at the moment is when a + container type like a Unit has Components as dependencies, i.e. the + UnitVersion specifies Component Entities as dependencies. + + This means that the dependencies_hash_digest that summarizes the total state + of a PublishableEntity depends on: + + 1. The definition of the current draft/published version of that entity. + 2. The current draft/published versions of all dependencies. + + Which is why it makes sense to capture in a log record, since + PublishLogRecords or DraftChagneLogRecords are created whenever one of the + above two things changes. + + Here are the possible scenarios, including edge cases: + + Soft-deletions + If the record.new_version is None, that means we've just soft-deleted + something (or published the soft-delete of something). We adopt the + convention that if something is soft-deleted, its dependencies_hash_digest + is reset to the default value of ''. + + EntityVersions with no dependencies + If record.new_version has no dependencies, dependencies_hash_digest is + likewise set to the default value of ''. This will be the most common + case. + + EntityVersions with dependencies + If an EntityVersion has dependencies, we calculate the dependency hash + digest by taking a hash of all the UUIDs of the active version. [ fill in + more later] + + EntityVersions with soft-deleted dependencies + A soft-deleted dependency isn't counted (it's as if the dependency were + removed). If all of an EntityVersion's dependencies are soft-deleted, + then it will go back to having to having the default blank string for its + dependencies_hash_digest. + """ + # Case #1: We've already computed this, or it was bootstrapped for us in the + # cache because the record is a deletion or doesn't have dependencies. + if record.id in record_ids_to_hash_digests: + return record_ids_to_hash_digests[record.id] + + # Case #2: The log_record is a dependency of something that was affected by + # a change, but the dependency itself did not change in any way (neither + # directly, nor as a side-effect). + # + # Example: A Unit has two Components. One of the Components changed, forcing + # us to recalculate the dependencies_hash_digest for that Unit. Doing that + # recalculation requires us to fetch the dependencies_hash_digest of the + # unchanged child Component as well. + if record.id not in untrusted_record_id_set: + return record.dependencies_hash_digest - branch_items_to_update = [] - already_computed_hashes = {} - for changed_branch_item in changed_to_dependencies: - new_digest = hash_for_branch_item( - changed_branch_item, changed_to_dependencies, already_computed_hashes + # Normal recursive case: + if isinstance(record, DraftChangeLogRecord): + branch = "draft" + elif isinstance(record, PublishLogRecord): + branch = "published" + else: + raise TypeError( + f"expected DraftChangeLogRecord or PublishLogRecord, not {type(record)}" ) - if new_digest != changed_branch_item.dependencies_hash_digest: - changed_branch_item.dependencies_hash_digest = new_digest - branch_items_to_update.append(changed_branch_item) - branch_cls.objects.bulk_update( - branch_items_to_update, - ["dependencies_hash_digest"], + dependencies: list[PublishableEntity] = sorted( + record_ids_to_live_deps[record.id], + key=lambda entity: getattr(entity, branch).log_record.new_version_id, ) + dep_state_entries = [] + for dep_entity in dependencies: + new_version_id = getattr(dep_entity, branch).log_record.new_version_id + hash_digest = hash_for_log_record( + getattr(dep_entity, branch).log_record, + record_ids_to_hash_digests, + record_ids_to_live_deps, + untrusted_record_id_set, + ) + dep_state_entries.append(f"{new_version_id}:{hash_digest}") + summary_text = "\n".join(dep_state_entries) + digest = create_hash_digest(summary_text.encode(), num_bytes=4) + record_ids_to_hash_digests[record.id] = digest + + return digest -def hash_for_branch_item( + +def __hash_for_branch_item( branch_item: Draft | Published, changed_items: dict[Draft, list[Draft]] | dict[Published, list[Published]], already_computed_hashes: dict[Draft, str] | dict[Published, str], @@ -997,7 +1197,7 @@ def hash_for_branch_item( return digest -def hash_for_branch_item( +def __hash_for_branch_item( log_record: DraftChangeLogRecord | PublishLogRecord, changed_records: dict[DraftChangeLogRecord, list[Draft]] | dict[PublishLogRecord, list[Published]], already_computed_hashes: dict[DraftChangeLogRecord, str] | dict[PublishLogRecord, str], @@ -1727,14 +1927,22 @@ def contains_unpublished_changes(container_id: int) -> bool: that's in the container, it will be `False`. This method will return `True` in either case. """ - container = Container.objects.get(pk=container_id) + container = ( + Container.objects + .select_related('publishable_entity__draft__draft_log_record') + .select_related('publishable_entity__published__publish_log_record') + .get(pk=container_id) + ) if container.versioning.has_unpublished_changes: return True draft = container.publishable_entity.draft published = container.publishable_entity.published - return draft.dependencies_hash_digest != published.dependencies_hash_digest + if draft is None and published is None: + return False + + return draft.log_record.dependencies_hash_digest != published.log_record.dependencies_hash_digest def get_containers_with_entity( publishable_entity_pk: int, @@ -1845,6 +2053,6 @@ def bulk_draft_changes_for( changed_at=changed_at, changed_by=changed_by, exit_callbacks=[ - _create_side_effects_for_change_log, + _process_draft_change_log, ] ) diff --git a/openedx_learning/apps/authoring/publishing/migrations/0009_draft_draft_log_record_and_dependency_hashing.py b/openedx_learning/apps/authoring/publishing/migrations/0009_draft_draft_log_record_and_dependency_hashing.py new file mode 100644 index 000000000..3cfbcaa16 --- /dev/null +++ b/openedx_learning/apps/authoring/publishing/migrations/0009_draft_draft_log_record_and_dependency_hashing.py @@ -0,0 +1,62 @@ +# Generated by Django 4.2.23 on 2025-10-02 16:16 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('oel_publishing', '0008_alter_draftchangelogrecord_options_and_more'), + ] + + operations = [ + migrations.AddField( + model_name='draft', + name='draft_log_record', + field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.RESTRICT, to='oel_publishing.draftchangelogrecord'), + ), + migrations.AddField( + model_name='draftchangelogrecord', + name='dependencies_hash_digest', + field=models.CharField(blank=True, default='', editable=False, max_length=8), + ), + migrations.AddField( + model_name='publishlogrecord', + name='dependencies_hash_digest', + field=models.CharField(blank=True, default='', editable=False, max_length=8), + ), + migrations.CreateModel( + name='PublishSideEffect', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('cause', models.ForeignKey(on_delete=django.db.models.deletion.RESTRICT, related_name='causes', to='oel_publishing.publishlogrecord')), + ('effect', models.ForeignKey(on_delete=django.db.models.deletion.RESTRICT, related_name='affected_by', to='oel_publishing.publishlogrecord')), + ], + options={ + 'verbose_name': 'Publish Side Effect', + 'verbose_name_plural': 'Publish Side Effects', + }, + ), + migrations.CreateModel( + name='PublishableEntityVersionDependency', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('entity', models.ForeignKey(on_delete=django.db.models.deletion.RESTRICT, to='oel_publishing.publishableentity')), + ('version', models.ForeignKey(on_delete=django.db.models.deletion.RESTRICT, to='oel_publishing.publishableentityversion')), + ], + ), + migrations.AddField( + model_name='publishableentityversion', + name='dependencies', + field=models.ManyToManyField(related_name='affects', through='oel_publishing.PublishableEntityVersionDependency', to='oel_publishing.publishableentity'), + ), + migrations.AddConstraint( + model_name='publishsideeffect', + constraint=models.UniqueConstraint(fields=('cause', 'effect'), name='oel_pub_pse_uniq_c_e'), + ), + migrations.AddConstraint( + model_name='publishableentityversiondependency', + constraint=models.UniqueConstraint(fields=('version', 'entity'), name='oel_pevd_uniq_version_entity'), + ), + ] diff --git a/openedx_learning/apps/authoring/publishing/models/draft_log.py b/openedx_learning/apps/authoring/publishing/models/draft_log.py index 9be43b70a..376c7da87 100644 --- a/openedx_learning/apps/authoring/publishing/models/draft_log.py +++ b/openedx_learning/apps/authoring/publishing/models/draft_log.py @@ -70,6 +70,10 @@ class Draft(models.Model): null=True, ) + @property + def log_record(self): + return self.draft_log_record + class DraftQuerySet(models.QuerySet): """ Custom QuerySet/Manager so we can chain common queries. @@ -294,6 +298,11 @@ class Meta: ] verbose_name = _("Draft Change Log Record") verbose_name_plural = _("Draft Change Log Records") + + def __str__(self): + old_version_num = None if self.old_version is None else self.old_version.version_num + new_version_num = None if self.new_version is None else self.new_version.version_num + return f"DraftChangeLogRecord: {self.entity} ({old_version_num} -> {new_version_num})" class DraftSideEffect(models.Model): diff --git a/openedx_learning/apps/authoring/publishing/models/publish_log.py b/openedx_learning/apps/authoring/publishing/models/publish_log.py index 8efce56ae..40d0c1755 100644 --- a/openedx_learning/apps/authoring/publishing/models/publish_log.py +++ b/openedx_learning/apps/authoring/publishing/models/publish_log.py @@ -177,6 +177,10 @@ class Published(models.Model): on_delete=models.RESTRICT, ) + @property + def log_record(self): + return self.publish_log_record + class Meta: verbose_name = "Published Entity" verbose_name_plural = "Published Entities" diff --git a/tests/openedx_learning/apps/authoring/sections/test_api.py b/tests/openedx_learning/apps/authoring/sections/test_api.py index 16038b6ad..90ba6b537 100644 --- a/tests/openedx_learning/apps/authoring/sections/test_api.py +++ b/tests/openedx_learning/apps/authoring/sections/test_api.py @@ -214,9 +214,9 @@ def test_create_section_queries(self): Test how many database queries are required to create a section """ # The exact numbers here aren't too important - this is just to alert us if anything significant changes. - with self.assertNumQueries(29): + with self.assertNumQueries(28): _empty_section = self.create_section_with_subsections([]) - with self.assertNumQueries(36): + with self.assertNumQueries(35): # And try with a non-empty section: self.create_section_with_subsections([self.subsection_1, self.subsection_2_v1], key="u2") diff --git a/tests/openedx_learning/apps/authoring/subsections/test_api.py b/tests/openedx_learning/apps/authoring/subsections/test_api.py index 4866deaee..bba137400 100644 --- a/tests/openedx_learning/apps/authoring/subsections/test_api.py +++ b/tests/openedx_learning/apps/authoring/subsections/test_api.py @@ -197,9 +197,9 @@ def test_create_subsection_queries(self): Test how many database queries are required to create a subsection """ # The exact numbers here aren't too important - this is just to alert us if anything significant changes. - with self.assertNumQueries(29): + with self.assertNumQueries(28): _empty_subsection = self.create_subsection_with_units([]) - with self.assertNumQueries(36): + with self.assertNumQueries(35): # And try with a non-empty subsection: self.create_subsection_with_units([self.unit_1, self.unit_2_v1], key="u2") diff --git a/tests/openedx_learning/apps/authoring/units/test_api.py b/tests/openedx_learning/apps/authoring/units/test_api.py index 100aff7d2..cc1bf9dd1 100644 --- a/tests/openedx_learning/apps/authoring/units/test_api.py +++ b/tests/openedx_learning/apps/authoring/units/test_api.py @@ -185,9 +185,9 @@ def test_create_unit_queries(self): Test how many database queries are required to create a unit """ # The exact numbers here aren't too important - this is just to alert us if anything significant changes. - with self.assertNumQueries(27): + with self.assertNumQueries(26): _empty_unit = self.create_unit_with_components([]) - with self.assertNumQueries(33): + with self.assertNumQueries(32): # And try with a non-empty unit: self.create_unit_with_components([self.component_1, self.component_2_v1], key="u2") From 9d9ec94545da46c9b6d94e10ee8007501e153a56 Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Tue, 28 Oct 2025 21:31:55 -0400 Subject: [PATCH 07/25] temp: all the tests pass, finally --- .../apps/authoring/publishing/api.py | 45 ++++++++++--------- .../0010_alter_draft_draft_log_record.py | 19 ++++++++ .../authoring/publishing/models/draft_log.py | 2 +- 3 files changed, 43 insertions(+), 23 deletions(-) create mode 100644 openedx_learning/apps/authoring/publishing/migrations/0010_alter_draft_draft_log_record.py diff --git a/openedx_learning/apps/authoring/publishing/api.py b/openedx_learning/apps/authoring/publishing/api.py index 58f26792f..9c0ecf312 100644 --- a/openedx_learning/apps/authoring/publishing/api.py +++ b/openedx_learning/apps/authoring/publishing/api.py @@ -644,12 +644,31 @@ def set_draft_version( learning_package_id ) if active_change_log: - _add_to_existing_draft_change_log( + draft_log_record = _add_to_existing_draft_change_log( active_change_log, draft.entity_id, old_version_id=old_version_id, new_version_id=publishable_entity_version_pk, ) + if draft_log_record: + # Normal case: a DraftChangeLogRecord was created or updated. + draft.draft_log_record = draft_log_record + else: + # Edge case: this change cancelled out other changes, and the + # net effect is that the DraftChangeLogRecord shouldn't exist, + # i.e. the version at the start and end of the DraftChangeLog is + # the same. In that case, _add_to_existing_draft_change_log will + # delete the log record for this Draft state, and we have to + # look for the most recently created DraftLogRecord from another + # DraftChangeLog. This value may be None. + draft.draft_log_record = ( + DraftChangeLogRecord.objects + .filter(entity_id=draft.entity_id) + .order_by("-pk") + .first() + ) + draft.save() + # We don't set the draft.draft_log_record value yet, because it's # still possible that future changes in the same DraftChangeLog will # undo this one. When that happens, the DraftChangeLogRecord is @@ -673,20 +692,13 @@ def set_draft_version( changed_at=set_at, changed_by_id=set_by, ) - DraftChangeLogRecord.objects.create( + draft.draft_log_record = DraftChangeLogRecord.objects.create( draft_change_log=change_log, entity_id=draft.entity_id, old_version_id=old_version_id, new_version_id=publishable_entity_version_pk, ) - -# draft.draft_log_record = DraftChangeLogRecord.objects.create( -# draft_change_log=change_log, -# entity_id=draft.entity_id, -# old_version_id=old_version_id, -# new_version_id=publishable_entity_version_pk, -# ) -# draft.save() + draft.save() # Rename this to something like process_draft_change_log, which # then calls the fn() below, and then also update the contextmanager callback. @@ -765,21 +777,10 @@ def _add_to_existing_draft_change_log( def _process_draft_change_log(change_log: DraftChangeLog) -> None: - draft_objs_to_update = [] - for draft_log_record in change_log.records.select_related("entity__draft").all(): - draft = draft_log_record.entity.draft - draft.draft_log_record = draft_log_record - draft.version_id = draft_log_record.next_version_id - draft_objs_to_update.append(draft) - - Draft.objects.bulk_update( - draft_objs_to_update, - ['draft_log_record'] - ) - _create_side_effects_for_change_log(change_log) + def _create_side_effects_for_change_log(change_log: DraftChangeLog | PublishLog): """ Create the side-effects for a DraftChangeLog or PublishLog. diff --git a/openedx_learning/apps/authoring/publishing/migrations/0010_alter_draft_draft_log_record.py b/openedx_learning/apps/authoring/publishing/migrations/0010_alter_draft_draft_log_record.py new file mode 100644 index 000000000..b3e29422b --- /dev/null +++ b/openedx_learning/apps/authoring/publishing/migrations/0010_alter_draft_draft_log_record.py @@ -0,0 +1,19 @@ +# Generated by Django 5.2.7 on 2025-10-28 23:43 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('oel_publishing', '0009_draft_draft_log_record_and_dependency_hashing'), + ] + + operations = [ + migrations.AlterField( + model_name='draft', + name='draft_log_record', + field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, to='oel_publishing.draftchangelogrecord'), + ), + ] diff --git a/openedx_learning/apps/authoring/publishing/models/draft_log.py b/openedx_learning/apps/authoring/publishing/models/draft_log.py index 376c7da87..db5cf4708 100644 --- a/openedx_learning/apps/authoring/publishing/models/draft_log.py +++ b/openedx_learning/apps/authoring/publishing/models/draft_log.py @@ -66,7 +66,7 @@ class Draft(models.Model): # addition to this data model, so we have to allow null values. draft_log_record = models.ForeignKey( "DraftChangeLogRecord", - on_delete=models.RESTRICT, + on_delete=models.SET_NULL, null=True, ) From 3adc0cc44223064233135cfddc088de25d4df7cb Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Tue, 28 Oct 2025 22:17:47 -0400 Subject: [PATCH 08/25] temp: fixups, remove some old comments --- .../apps/authoring/publishing/api.py | 223 +++--------------- 1 file changed, 39 insertions(+), 184 deletions(-) diff --git a/openedx_learning/apps/authoring/publishing/api.py b/openedx_learning/apps/authoring/publishing/api.py index 9c0ecf312..571a51f1e 100644 --- a/openedx_learning/apps/authoring/publishing/api.py +++ b/openedx_learning/apps/authoring/publishing/api.py @@ -587,7 +587,7 @@ def set_draft_version( from Studio's editing point of view (see ``soft_delete_draft`` for more details). - Calling this function attaches a new DraftChangeLogRecordand attaches it to + Calling this function attaches a new DraftChangeLogRecord and attaches it to a DraftChangeLog. This function will create DraftSideEffect entries and properly add any @@ -613,21 +613,6 @@ def set_draft_version( ) # If the Draft is already pointing at this version, there's nothing to do. - # - # THIS IS THE EDGE CASE: - # - # If we create something and soft delete it immediately after, then - # The key is this bit and the part after where we're writing the draft_version_id - # -- we can't do that this early. We have to let the whole log accumulate, and - # then write it out at once. If not, then we won't be able to see the - # previous version of that Draft's version... unless we look at the old - # draft_change_log_record? That might be even more confusing though. - - # Intuitively, we might build up the log and then execute it. But that - # doesn't work because in the middle of a bulk operation, you'd expect - # to see the new value for the draft. So we actually want to optimistically - # change it and then be able to roll it back in case there was no actual change. - old_version_id = draft.version_id if old_version_id == publishable_entity_version_pk: return @@ -661,6 +646,12 @@ def set_draft_version( # delete the log record for this Draft state, and we have to # look for the most recently created DraftLogRecord from another # DraftChangeLog. This value may be None. + # + # NOTE: There may be some weird edge cases here around soft- + # deletions and modifications of the same Draft entry in nested + # bulk_draft_changes_for() calls. I haven't thought that through + # yet--it might be better to just throw an error rather than try + # to accommodate it. draft.draft_log_record = ( DraftChangeLogRecord.objects .filter(entity_id=draft.entity_id) @@ -669,22 +660,21 @@ def set_draft_version( ) draft.save() - # We don't set the draft.draft_log_record value yet, because it's - # still possible that future changes in the same DraftChangeLog will - # undo this one. When that happens, the DraftChangeLogRecord is - # removed (so as not to confuse it for side-effect changes). - # # We also *don't* create container side effects here because there # may be many changes in this DraftChangeLog, some of which haven't # been made yet. It wouldn't make sense to create a side effect that # says, "this Unit changed because this Component in it changed" if # we were changing that same Unit later on in the same # DraftChangeLog, because that new Unit version might not even - # include the child Component. So we'll let DraftChangeLogContext - # do that work when it exits its context. + # include that child Component. Also, calculating side-effects is + # expensive, and would result in a lot of wasted queries if we did + # it for every change will inside an active change log context. + # + # Therefore we'll let DraftChangeLogContext do that work when it + # exits its context. else: # This means there is no active DraftChangeLog, so we create our own - # and add our DraftChangeLogRecord to it. This has the minor + # and add our DraftChangeLogRecord to it. This has the very minor # optimization that we don't have to check for an existing # DraftChangeLogRecord, because we know it can't exist yet. change_log = DraftChangeLog.objects.create( @@ -699,11 +689,7 @@ def set_draft_version( new_version_id=publishable_entity_version_pk, ) draft.save() - - # Rename this to something like process_draft_change_log, which - # then calls the fn() below, and then also update the contextmanager callback. - _process_draft_change_log(change_log) -# _create_side_effects_for_change_log(change_log) + _create_side_effects_for_change_log(change_log) def _add_to_existing_draft_change_log( active_change_log: DraftChangeLog, @@ -753,7 +739,9 @@ def _add_to_existing_draft_change_log( # # It's important that we remove these cases, because we use the # old_version == new_version convention to record entities that have - # changed purely due to side-effects. + # changed purely due to side-effects. We could technically still + # differentiate those by actually looking at the DraftSideEffect and + # PublishSideEffect models, but this is less confusing overall. change.delete() return None else: @@ -776,11 +764,6 @@ def _add_to_existing_draft_change_log( return change -def _process_draft_change_log(change_log: DraftChangeLog) -> None: - _create_side_effects_for_change_log(change_log) - - - def _create_side_effects_for_change_log(change_log: DraftChangeLog | PublishLog): """ Create the side-effects for a DraftChangeLog or PublishLog. @@ -833,7 +816,7 @@ def _create_side_effects_for_change_log(change_log: DraftChangeLog | PublishLog) # recalculating the Unit's side-effect on its Subsection, and its # Subsection's side-effect on its Section each time through the loop. # It also guards against infinite parent-child relationship loops, though - # those aren't *supposed* to be allowed anyhow. + # those aren't *supposed* to happen anyhow. processed_entity_ids: set[int] = set() for original_change in change_log.records.all(): affected_by_original_change = branch_cls.objects.filter( @@ -845,7 +828,10 @@ def _create_side_effects_for_change_log(change_log: DraftChangeLog | PublishLog) # These are the Published or Draft objects where we need to repoint the # log_record (publish_log_record or draft_change_log) to point to the - # side-effects. + # side-effect changes, e.g. the DraftChangeLogRecord that says, "This + # Unit's version stayed the same, but its dependency hash changed + # because a child Component's draft version was changed." We gather them + # all up in a list so we can do a bulk_update on them. branch_objs_to_update_with_side_effects = [] while changes_and_affected: @@ -880,8 +866,6 @@ def _create_side_effects_for_change_log(change_log: DraftChangeLog | PublishLog) ) # Update the current branch pointer (Draft or Published) for this # entity to point to the side_effect_change (if it's not already). - # - # TODO: Make this more resilient to these objects not existing yet. if branch_cls == Published: published_obj = affected.entity.published if published_obj.publish_log_record != side_effect_change: @@ -1034,7 +1018,6 @@ def _update_branch_dependencies_hash_digests( ) - def hash_for_log_record( record: DraftChangeLogRecord | PublishLogRecord, record_ids_to_hash_digests: dict, @@ -1042,8 +1025,11 @@ def hash_for_log_record( untrusted_record_id_set: set, ) -> str: """ - This code is a little convoluted because we're working hard to minimize - requests. + The hash digest for a given change log record. + + Note that this code is a little convoluted because we're working hard to + minimize the number of database requests. All the data we really need could + be derived from just the record itself, but at a far higher cost. When we say "dependency", we mean when one or more PublishableEntities are dependencies of a PublishableEntityVersion. A change in the Draft or @@ -1134,146 +1120,6 @@ def hash_for_log_record( return digest -def __hash_for_branch_item( - branch_item: Draft | Published, - changed_items: dict[Draft, list[Draft]] | dict[Published, list[Published]], - already_computed_hashes: dict[Draft, str] | dict[Published, str], -) -> str: - """ - Recursively calculate the dependencies_hash_digest for Draft or Published. - - We have to be extremely careful about changing the signature or behavior of - this function, as it's used in the 0010_backfill_dependencies data migration - that will be used to upgrade people from Teak to future versions. - - Args: - branch_item: The Draft or Published entry we want to recalculate the - dependencies hash for. - changed_items: A dict mapping Draft or Published to a list of Dependency - Draft or Published. The keys represent the full set of what has - changed in the change log (DraftChangeLog or PublishLog), and - includes things that were added as side-effects. The value lists - in this dict can include things that *didn't* change, if they happen - to be dependencies of things that did change. - already_computed_hashes: A cache to ensure we don't recompute the same - nodes over and over again. - """ - if branch_item in already_computed_hashes: - return already_computed_hashes[branch_item] - - # Base case #1: The branch_item is a dependency of something that was - # affected by a change, but the dependency itself did not change in any way - # (neither directly, nor as a side-effect). - # - # Example: A Unit has two Components. One of the Components changed, forcing - # us to recalculate the dependencies_hash_digest for that Unit. Doing that - # recalculation requires us to fetch the dependencies_hash_digest of the - # unchanged child as well. - # - # These are the only values for dependencies_hash_digest that we can trust, - # because we know that the change log (DraftChangeLog or PublishLog) being - # processed right now will not alter them. - if branch_item not in changed_items: - return branch_item.dependencies_hash_digest - - # Base case #2: If there are no dependencies, the hash digest is set to ''. - # Note that it's possible that something that used to have a hash digest to - # get reset to '' if those dependencies are later removed. - dependencies = changed_items[branch_item] - if not dependencies: - return '' - - # Normal recursive case - dependencies.sort(key=lambda d: d.version.uuid) - dep_state_entries = [ - f"{dep_item.version.uuid}:" - f"{hash_for_branch_item(dep_item, changed_items, already_computed_hashes)}" - for dep_item in dependencies - ] - summary_text = "\n".join(dep_state_entries) - - digest = create_hash_digest(summary_text.encode(), num_bytes=4) - already_computed_hashes[branch_item] = digest - - return digest - - -def __hash_for_branch_item( - log_record: DraftChangeLogRecord | PublishLogRecord, - changed_records: dict[DraftChangeLogRecord, list[Draft]] | dict[PublishLogRecord, list[Published]], - already_computed_hashes: dict[DraftChangeLogRecord, str] | dict[PublishLogRecord, str], -) -> str: - """ - Calculate the dependencies_hash_digest for a log record. - - This will recursively go through the dependencies of the Draft or Published - altered by a DraftChangeLogRecord or a PublishLogRecord. - - We have to be extremely careful about changing the signature or behavior of - this function, as it's used in the 0010_backfill_dependencies data migration - that will be used to upgrade people from Teak to future versions. - - Args: - log_record: The DraftChangeLogRecord or PublishChangeLogRecord entry we - want to recalculate the dependencies hash for. - changed_items: A dict mapping Draft or Published to a list of Dependency - Draft or Published. The keys represent the full set of what has - changed in the change log (DraftChangeLog or PublishLog), and - includes things that were added as side-effects. The value lists - in this dict can include things that *didn't* change, if they happen - to be dependencies of things that did change. - already_computed_hashes: A cache to ensure we don't recompute the same - nodes over and over again. - - Note that we are doing potentially re-doing calculations for Draft and - Published, when that data already exists in DraftChangeLogRecord or - PublishLogRecord. This is intentional. The dependencies_hash_digest is a late - addition to the data model. We will backfill all existing Draft and Published - entries so that the DraftChangeLogRecord and PublishChangeLogRecord that they - point to will have dependencies_hash_digest properly set. However, we're not - going to go back through the history and re-compute the state for every log - record in the database, meaning that sometimes an old container's PublishLogRecord - """ - if log_record in already_computed_hashes: - return already_computed_hashes[log_record] - - # Base case #1: The log_record is a dependency of something that was - # affected by a change, but the dependency itself did not change in any way - # (neither directly, nor as a side-effect). - # - # Example: A Unit has two Components. One of the Components changed, forcing - # us to recalculate the dependencies_hash_digest for that Unit. Doing that - # recalculation requires us to fetch the dependencies_hash_digest of the - # unchanged child as well. - # - # These are the only values for dependencies_hash_digest that we can trust, - # because we know that the change log (DraftChangeLog or PublishLog) being - # processed right now will not alter them. - if log_record not in changed_records: - return log_record.dependencies_hash_digest - - # Base case #2: If there are no dependencies, the hash digest is set to ''. - # Note that it's possible that something that used to have a hash digest to - # get reset to '' if those dependencies are later removed. - dependencies = changed_records[log_record] - if not dependencies: - return '' - - # Normal recursive case - dependencies.sort(key=lambda d: d.version.uuid) - dep_state_entries = [ - f"{dep_record.version.uuid}:" - f"{hash_for_branch_item(dep_record, changed_records, already_computed_hashes)}" - for dep_record in dependencies - ] - summary_text = "\n".join(dep_state_entries) - - digest = create_hash_digest(summary_text.encode(), num_bytes=4) - already_computed_hashes[log_record] = digest - - return digest - - def soft_delete_draft(publishable_entity_id: int, /, deleted_by: int | None = None) -> None: """ Sets the Draft version to None. @@ -1940,10 +1786,19 @@ def contains_unpublished_changes(container_id: int) -> bool: draft = container.publishable_entity.draft published = container.publishable_entity.published + # Edge case: A container that was created and then immediately soft-deleted + # does not contain any unpublished changes. if draft is None and published is None: return False - return draft.log_record.dependencies_hash_digest != published.log_record.dependencies_hash_digest + # The dependencies_hash_digest captures the state of all descendants, so we + # can do this quick comparison instead of iterating through layers of + # containers. + draft_version_hash_digest = draft.log_record.dependencies_hash_digest + published_version_hash_digest = published.log_record.dependencies_hash_digest + + return draft_version_hash_digest != published_version_hash_digest + def get_containers_with_entity( publishable_entity_pk: int, @@ -2054,6 +1909,6 @@ def bulk_draft_changes_for( changed_at=changed_at, changed_by=changed_by, exit_callbacks=[ - _process_draft_change_log, + _create_side_effects_for_change_log, ] ) From 2778e97fe01f18624f6f1b7f1ae4af02bbe17519 Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Tue, 28 Oct 2025 22:20:34 -0400 Subject: [PATCH 09/25] fix: typo in comments --- .../apps/authoring/publishing/models/publishable_entity.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openedx_learning/apps/authoring/publishing/models/publishable_entity.py b/openedx_learning/apps/authoring/publishing/models/publishable_entity.py index 020d48b5e..b914d8544 100644 --- a/openedx_learning/apps/authoring/publishing/models/publishable_entity.py +++ b/openedx_learning/apps/authoring/publishing/models/publishable_entity.py @@ -293,7 +293,7 @@ class PublishableEntityVersionDependency(models.Model): dependencies of U1.v1. If someone removes C2 from U1, then that requires creating a new version of - U1 (so U2.v2). + U1 (so U1.v2). This restriction is important because our ability to calculate and cache the state of "this version of this publishable entity and all its dependencies From a8b053a6f51436678c33ef32fc215fc81a468c44 Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Wed, 29 Oct 2025 00:13:54 -0400 Subject: [PATCH 10/25] temp: comment tweaks --- openedx_learning/apps/authoring/publishing/api.py | 3 ++- .../apps/authoring/publishing/models/publishable_entity.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/openedx_learning/apps/authoring/publishing/api.py b/openedx_learning/apps/authoring/publishing/api.py index 571a51f1e..a05ba94a1 100644 --- a/openedx_learning/apps/authoring/publishing/api.py +++ b/openedx_learning/apps/authoring/publishing/api.py @@ -408,7 +408,7 @@ def _get_dependencies_with_unpublished_changes( effects. The side-effect calculations will happen separately. """ # First we have to do a full crawl of *all* dependencies, regardless of - # whether they have unpublished changes or not. this is because we might + # whether they have unpublished changes or not. This is because we might # have a dependency-of-a-dependency that has changed somewhere down the # line. Example: The draft_qset includes a Subsection. Even if the Unit # versions are still the same, there might be a changed Component that needs @@ -691,6 +691,7 @@ def set_draft_version( draft.save() _create_side_effects_for_change_log(change_log) + def _add_to_existing_draft_change_log( active_change_log: DraftChangeLog, entity_id: int, diff --git a/openedx_learning/apps/authoring/publishing/models/publishable_entity.py b/openedx_learning/apps/authoring/publishing/models/publishable_entity.py index b914d8544..bfe9517c0 100644 --- a/openedx_learning/apps/authoring/publishing/models/publishable_entity.py +++ b/openedx_learning/apps/authoring/publishing/models/publishable_entity.py @@ -317,7 +317,7 @@ class PublishableEntityMixin(models.Model): """ Convenience mixin to link your models against PublishableEntity. - Please see docstring for PublishableEntity for more details.FF + Please see docstring for PublishableEntity for more details. If you use this class, you *MUST* also use PublishableEntityVersionMixin and the publishing app's api.register_publishable_models (see its docstring for From 4efa1311e20b7d596f3d0eee11a8e7fa83c27c43 Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Wed, 29 Oct 2025 12:24:14 -0400 Subject: [PATCH 11/25] temp: minor renames --- .../apps/authoring/publishing/api.py | 30 +++++++++---------- .../publishing/models/publish_log.py | 5 ++++ 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/openedx_learning/apps/authoring/publishing/api.py b/openedx_learning/apps/authoring/publishing/api.py index 548b17751..60d8ada97 100644 --- a/openedx_learning/apps/authoring/publishing/api.py +++ b/openedx_learning/apps/authoring/publishing/api.py @@ -414,22 +414,22 @@ def _get_dependencies_with_unpublished_changes( # line. Example: The draft_qset includes a Subsection. Even if the Unit # versions are still the same, there might be a changed Component that needs # to be published. - all_dependent_drafts = [] - dependent_drafts = Draft.objects.filter( - entity__affects__in=draft_qset.all().values_list("entity_id", flat=True) - ).distinct() - - while dependent_drafts: - all_dependent_drafts.append(dependent_drafts) - dependent_drafts = Draft.objects.filter( - entity__affects__in=dependent_drafts.all().values_list("entity_id", flat=True) - ).distinct() - - unpublished_dependent_drafts = [ - dependent_drafts_qset.with_unpublished_changes() - for dependent_drafts_qset in all_dependent_drafts + all_dependency_drafts = [] + dependency_drafts = Draft.objects.all().filter( + entity__affects__in=draft_qset.values_list("entity_id", flat=True) + ).all().distinct() + + while dependency_drafts: + all_dependency_drafts.append(dependency_drafts) + dependency_drafts = Draft.objects.all().filter( + entity__affects__in=dependency_drafts.all().values_list("entity_id", flat=True) + ).all().distinct() + + unpublished_dependency_drafts = [ + dependency_drafts_qset.all().with_unpublished_changes() + for dependency_drafts_qset in all_dependency_drafts ] - return unpublished_dependent_drafts + return unpublished_dependency_drafts def publish_from_drafts( diff --git a/openedx_learning/apps/authoring/publishing/models/publish_log.py b/openedx_learning/apps/authoring/publishing/models/publish_log.py index 40d0c1755..e4c500d27 100644 --- a/openedx_learning/apps/authoring/publishing/models/publish_log.py +++ b/openedx_learning/apps/authoring/publishing/models/publish_log.py @@ -137,6 +137,11 @@ class Meta: verbose_name = "Publish Log Record" verbose_name_plural = "Publish Log Records" + def __str__(self): + old_version_num = None if self.old_version is None else self.old_version.version_num + new_version_num = None if self.new_version is None else self.new_version.version_num + return f"PublishLogRecord: {self.entity} ({old_version_num} -> {new_version_num})" + class Published(models.Model): """ From 23c04e59c01bf4f49f71cf2ee28e1a8c896011c4 Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Wed, 29 Oct 2025 12:56:39 -0400 Subject: [PATCH 12/25] fix: dependency draft search was broken before this --- openedx_learning/apps/authoring/publishing/api.py | 12 ++++++------ .../authoring/publishing/models/publish_log.py | 2 +- test_settings.py | 15 +++++++++++++++ .../apps/authoring/publishing/test_api.py | 1 + 4 files changed, 23 insertions(+), 7 deletions(-) diff --git a/openedx_learning/apps/authoring/publishing/api.py b/openedx_learning/apps/authoring/publishing/api.py index 60d8ada97..2bc28dd30 100644 --- a/openedx_learning/apps/authoring/publishing/api.py +++ b/openedx_learning/apps/authoring/publishing/api.py @@ -415,15 +415,15 @@ def _get_dependencies_with_unpublished_changes( # versions are still the same, there might be a changed Component that needs # to be published. all_dependency_drafts = [] - dependency_drafts = Draft.objects.all().filter( - entity__affects__in=draft_qset.values_list("entity_id", flat=True) - ).all().distinct() + dependency_drafts = Draft.objects.filter( + entity__affects__in=draft_qset.values_list("version_id", flat=True) + ).distinct() while dependency_drafts: all_dependency_drafts.append(dependency_drafts) - dependency_drafts = Draft.objects.all().filter( - entity__affects__in=dependency_drafts.all().values_list("entity_id", flat=True) - ).all().distinct() + dependency_drafts = Draft.objects.filter( + entity__affects__in=dependency_drafts.all().values_list("version_id", flat=True) + ).distinct() unpublished_dependency_drafts = [ dependency_drafts_qset.all().with_unpublished_changes() diff --git a/openedx_learning/apps/authoring/publishing/models/publish_log.py b/openedx_learning/apps/authoring/publishing/models/publish_log.py index e4c500d27..f5d5b33ba 100644 --- a/openedx_learning/apps/authoring/publishing/models/publish_log.py +++ b/openedx_learning/apps/authoring/publishing/models/publish_log.py @@ -139,7 +139,7 @@ class Meta: def __str__(self): old_version_num = None if self.old_version is None else self.old_version.version_num - new_version_num = None if self.new_version is None else self.new_version.version_num + new_version_num = None if self.new_version is None else self.new_version.version_num return f"PublishLogRecord: {self.entity} ({old_version_num} -> {new_version_num})" diff --git a/test_settings.py b/test_settings.py index a50efb17c..c4b22926d 100644 --- a/test_settings.py +++ b/test_settings.py @@ -26,6 +26,21 @@ def root(*args): } } +# If you provision the 'oel'@'%' with broad permissions on your MySQL instance, +# running the tests will auto-generate a database for running tests. This is +# slower than the default sqlite3 setup above, but it's sometimes helpful for +# finding things that only break in CI. +# +# DATABASES = { +# "default": { +# "ENGINE": "django.db.backends.mysql", +# "USER": "oel", +# "PASSWORD": "oel-test-pass", +# "HOST": "mysql", +# "PORT": "3306", +# } +# } + INSTALLED_APPS = [ "django.contrib.auth", "django.contrib.contenttypes", diff --git a/tests/openedx_learning/apps/authoring/publishing/test_api.py b/tests/openedx_learning/apps/authoring/publishing/test_api.py index 61d1498d6..86cfc76f6 100644 --- a/tests/openedx_learning/apps/authoring/publishing/test_api.py +++ b/tests/openedx_learning/apps/authoring/publishing/test_api.py @@ -1297,6 +1297,7 @@ def test_publish_all_layers(self): # the publish log records. assert publish_log.records.count() == 3 + def test_container_next_version(self): """Test that next_version works for containers.""" child_1 = publishing_api.create_publishable_entity( From 42822ea53561fe3663bbcef3a6a72c9e43359c4e Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Wed, 29 Oct 2025 13:26:55 -0400 Subject: [PATCH 13/25] fix: quality and admin fixes --- .../apps/authoring/publishing/admin.py | 12 ++++--- .../apps/authoring/publishing/api.py | 36 ++++++++++--------- .../authoring/publishing/models/draft_log.py | 4 +-- 3 files changed, 29 insertions(+), 23 deletions(-) diff --git a/openedx_learning/apps/authoring/publishing/admin.py b/openedx_learning/apps/authoring/publishing/admin.py index 31a201729..5cff05719 100644 --- a/openedx_learning/apps/authoring/publishing/admin.py +++ b/openedx_learning/apps/authoring/publishing/admin.py @@ -49,6 +49,7 @@ class PublishLogRecordTabularInline(admin.TabularInline): "title", "old_version_num", "new_version_num", + "dependencies_hash_digest", ) readonly_fields = fields @@ -208,10 +209,11 @@ def draft_version(self, entity: PublishableEntity): italicize the text for emphasis. """ if hasattr(entity, "draft") and entity.draft.version: - if entity.draft.dependencies_hash_digest: + draft_log_record = entity.draft.draft_log_record + if draft_log_record and draft_log_record.dependencies_hash_digest: version_str = ( f"{entity.draft.version.version_num} " - f"({entity.draft.dependencies_hash_digest})" + f"({draft_log_record.dependencies_hash_digest})" ) else: version_str = str(entity.draft.version.version_num) @@ -228,10 +230,11 @@ def published_version(self, entity: PublishableEntity): Version num + dependency hash if applicable, e.g. "5" or "5 (825064c2)" """ if hasattr(entity, "published") and entity.published.version: - if entity.published.dependencies_hash_digest: + publish_log_record = entity.published.publish_log_record + if publish_log_record.dependencies_hash_digest: return ( f"{entity.published.version.version_num} " - f"({entity.published.dependencies_hash_digest})" + f"({publish_log_record.dependencies_hash_digest})" ) else: return str(entity.published.version.version_num) @@ -291,6 +294,7 @@ class DraftChangeLogRecordTabularInline(admin.TabularInline): "title", "old_version_num", "new_version_num", + "dependencies_hash_digest", ) readonly_fields = fields diff --git a/openedx_learning/apps/authoring/publishing/api.py b/openedx_learning/apps/authoring/publishing/api.py index 2bc28dd30..3466eee4b 100644 --- a/openedx_learning/apps/authoring/publishing/api.py +++ b/openedx_learning/apps/authoring/publishing/api.py @@ -652,7 +652,7 @@ def set_draft_version( # deletions and modifications of the same Draft entry in nested # bulk_draft_changes_for() calls. I haven't thought that through # yet--it might be better to just throw an error rather than try - # to accommodate it. + # to accommodate it. draft.draft_log_record = ( DraftChangeLogRecord.objects .filter(entity_id=draft.entity_id) @@ -670,7 +670,7 @@ def set_draft_version( # include that child Component. Also, calculating side-effects is # expensive, and would result in a lot of wasted queries if we did # it for every change will inside an active change log context. - # + # # Therefore we'll let DraftChangeLogContext do that work when it # exits its context. else: @@ -716,7 +716,7 @@ def _add_to_existing_draft_change_log( and new_version = v3. This also means that if we make a change that undoes the previos change, it - will delete that DraftChangeLogRecord, e.g. (None, v1) -> (None, v2) -> + will delete that DraftChangeLogRecord, e.g. (None, v1) -> (None, v2) -> (None -> None), then this entry can be deleted because it didn't change anything. This function should never be used for creating side-effect change log records (the only place where it's normal to have the same old and new @@ -802,10 +802,12 @@ def _create_side_effects_for_change_log(change_log: DraftChangeLog | PublishLog) branch_cls = Draft change_record_cls = DraftChangeLogRecord side_effect_cls = DraftSideEffect + log_record_rel = "draft_log_record_id" elif isinstance(change_log, PublishLog): branch_cls = Published change_record_cls = PublishLogRecord side_effect_cls = PublishSideEffect + log_record_rel = "publish_log_record_id" else: raise TypeError( f"expected DraftChangeLog or PublishLog, not {type(change_log)}" @@ -845,9 +847,9 @@ def _create_side_effects_for_change_log(change_log: DraftChangeLog | PublishLog) change_log_param = {} if branch_cls == Draft: - change_log_param['draft_change_log'] = original_change.draft_change_log + change_log_param['draft_change_log'] = original_change.draft_change_log # type: ignore[union-attr] elif branch_cls == Published: - change_log_param['publish_log'] = original_change.publish_log + change_log_param['publish_log'] = original_change.publish_log # type: ignore[union-attr] # Example: If the original_change is a DraftChangeLogRecord that # represents editing a Component, the side_effect_change is the @@ -919,11 +921,10 @@ def _create_side_effects_for_change_log(change_log: DraftChangeLog | PublishLog) if affected.entity_id not in processed_entity_ids ) - if branch_cls == Published: - log_record_rel = "publish_log_record_id" - elif branch_cls == Draft: - log_record_rel = "draft_log_record_id" - branch_cls.objects.bulk_update(branch_objs_to_update_with_side_effects, [log_record_rel]) + branch_cls.objects.bulk_update( + branch_objs_to_update_with_side_effects, # type: ignore[arg-type] + [log_record_rel], + ) _update_branch_dependencies_hash_digests(change_log) @@ -952,7 +953,7 @@ def _update_branch_dependencies_hash_digests( elif isinstance(change_log, PublishLog): branch = "published" log_record_relation = "publish_log_record" - record_cls = PublishLogRecord + record_cls = PublishLogRecord # type: ignore[assignment] else: raise TypeError( f"expected DraftChangeLog or PublishLog, not {type(change_log)}" @@ -969,7 +970,8 @@ def _update_branch_dependencies_hash_digests( .order_by(f"{branch}__version__uuid") ) ) - changed_records = list( + changed_records: QuerySet[DraftChangeLogRecord] | QuerySet[PublishLogRecord] + changed_records = ( change_log.records .select_related("new_version", f"entity__{branch}") .prefetch_related(dependencies_prefetch) @@ -1015,7 +1017,7 @@ def _update_branch_dependencies_hash_digests( ) record_cls.objects.bulk_update( - records_that_need_hashes, + records_that_need_hashes, # type: ignore[arg-type] ['dependencies_hash_digest'], ) @@ -1057,12 +1059,12 @@ def hash_for_log_record( something (or published the soft-delete of something). We adopt the convention that if something is soft-deleted, its dependencies_hash_digest is reset to the default value of ''. - + EntityVersions with no dependencies If record.new_version has no dependencies, dependencies_hash_digest is likewise set to the default value of ''. This will be the most common case. - + EntityVersions with dependencies If an EntityVersion has dependencies, we calculate the dependency hash digest by taking a hash of all the UUIDs of the active version. [ fill in @@ -1078,7 +1080,7 @@ def hash_for_log_record( # cache because the record is a deletion or doesn't have dependencies. if record.id in record_ids_to_hash_digests: return record_ids_to_hash_digests[record.id] - + # Case #2: The log_record is a dependency of something that was affected by # a change, but the dependency itself did not change in any way (neither # directly, nor as a side-effect). @@ -1813,7 +1815,7 @@ def contains_unpublished_changes(container_id: int) -> bool: # containers. draft_version_hash_digest = draft.log_record.dependencies_hash_digest published_version_hash_digest = published.log_record.dependencies_hash_digest - + return draft_version_hash_digest != published_version_hash_digest diff --git a/openedx_learning/apps/authoring/publishing/models/draft_log.py b/openedx_learning/apps/authoring/publishing/models/draft_log.py index db5cf4708..5f675218d 100644 --- a/openedx_learning/apps/authoring/publishing/models/draft_log.py +++ b/openedx_learning/apps/authoring/publishing/models/draft_log.py @@ -298,10 +298,10 @@ class Meta: ] verbose_name = _("Draft Change Log Record") verbose_name_plural = _("Draft Change Log Records") - + def __str__(self): old_version_num = None if self.old_version is None else self.old_version.version_num - new_version_num = None if self.new_version is None else self.new_version.version_num + new_version_num = None if self.new_version is None else self.new_version.version_num return f"DraftChangeLogRecord: {self.entity} ({old_version_num} -> {new_version_num})" From bb073e8dc4b78364e323c64148323b45ccb3966d Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Wed, 29 Oct 2025 14:01:35 -0400 Subject: [PATCH 14/25] refactor: rename dependency model fields for clarity, regenerate migration --- .../apps/authoring/publishing/api.py | 2 +- ...ng.py => 0009_dependencies_and_hashing.py} | 38 +++++++++---------- .../0010_alter_draft_draft_log_record.py | 19 ---------- .../authoring/publishing/models/draft_log.py | 6 ++- .../publishing/models/publishable_entity.py | 8 ++-- 5 files changed, 29 insertions(+), 44 deletions(-) rename openedx_learning/apps/authoring/publishing/migrations/{0009_draft_draft_log_record_and_dependency_hashing.py => 0009_dependencies_and_hashing.py} (82%) delete mode 100644 openedx_learning/apps/authoring/publishing/migrations/0010_alter_draft_draft_log_record.py diff --git a/openedx_learning/apps/authoring/publishing/api.py b/openedx_learning/apps/authoring/publishing/api.py index 3466eee4b..432542d71 100644 --- a/openedx_learning/apps/authoring/publishing/api.py +++ b/openedx_learning/apps/authoring/publishing/api.py @@ -287,7 +287,7 @@ def set_version_dependencies( PublishableEntityVersionDependency.objects.bulk_create( [ PublishableEntityVersionDependency( - version_id=version_id, entity_id=dep_entity_id + referent_version_id=version_id, referenced_entity_id=dep_entity_id ) for dep_entity_id in set(dependencies) # dependencies have no ordering ], diff --git a/openedx_learning/apps/authoring/publishing/migrations/0009_draft_draft_log_record_and_dependency_hashing.py b/openedx_learning/apps/authoring/publishing/migrations/0009_dependencies_and_hashing.py similarity index 82% rename from openedx_learning/apps/authoring/publishing/migrations/0009_draft_draft_log_record_and_dependency_hashing.py rename to openedx_learning/apps/authoring/publishing/migrations/0009_dependencies_and_hashing.py index 3cfbcaa16..4b840add9 100644 --- a/openedx_learning/apps/authoring/publishing/migrations/0009_draft_draft_log_record_and_dependency_hashing.py +++ b/openedx_learning/apps/authoring/publishing/migrations/0009_dependencies_and_hashing.py @@ -1,7 +1,7 @@ -# Generated by Django 4.2.23 on 2025-10-02 16:16 +# Generated by Django 5.2.7 on 2025-10-29 17:59 -from django.db import migrations, models import django.db.models.deletion +from django.db import migrations, models class Migration(migrations.Migration): @@ -14,7 +14,7 @@ class Migration(migrations.Migration): migrations.AddField( model_name='draft', name='draft_log_record', - field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.RESTRICT, to='oel_publishing.draftchangelogrecord'), + field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, to='oel_publishing.draftchangelogrecord'), ), migrations.AddField( model_name='draftchangelogrecord', @@ -26,6 +26,19 @@ class Migration(migrations.Migration): name='dependencies_hash_digest', field=models.CharField(blank=True, default='', editable=False, max_length=8), ), + migrations.CreateModel( + name='PublishableEntityVersionDependency', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('referenced_entity', models.ForeignKey(on_delete=django.db.models.deletion.RESTRICT, to='oel_publishing.publishableentity')), + ('referent_version', models.ForeignKey(on_delete=django.db.models.deletion.RESTRICT, to='oel_publishing.publishableentityversion')), + ], + ), + migrations.AddField( + model_name='publishableentityversion', + name='dependencies', + field=models.ManyToManyField(related_name='affects', through='oel_publishing.PublishableEntityVersionDependency', to='oel_publishing.publishableentity'), + ), migrations.CreateModel( name='PublishSideEffect', fields=[ @@ -38,25 +51,12 @@ class Migration(migrations.Migration): 'verbose_name_plural': 'Publish Side Effects', }, ), - migrations.CreateModel( - name='PublishableEntityVersionDependency', - fields=[ - ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('entity', models.ForeignKey(on_delete=django.db.models.deletion.RESTRICT, to='oel_publishing.publishableentity')), - ('version', models.ForeignKey(on_delete=django.db.models.deletion.RESTRICT, to='oel_publishing.publishableentityversion')), - ], - ), - migrations.AddField( - model_name='publishableentityversion', - name='dependencies', - field=models.ManyToManyField(related_name='affects', through='oel_publishing.PublishableEntityVersionDependency', to='oel_publishing.publishableentity'), + migrations.AddConstraint( + model_name='publishableentityversiondependency', + constraint=models.UniqueConstraint(fields=('referent_version', 'referenced_entity'), name='oel_pevd_uniq_rv_re'), ), migrations.AddConstraint( model_name='publishsideeffect', constraint=models.UniqueConstraint(fields=('cause', 'effect'), name='oel_pub_pse_uniq_c_e'), ), - migrations.AddConstraint( - model_name='publishableentityversiondependency', - constraint=models.UniqueConstraint(fields=('version', 'entity'), name='oel_pevd_uniq_version_entity'), - ), ] diff --git a/openedx_learning/apps/authoring/publishing/migrations/0010_alter_draft_draft_log_record.py b/openedx_learning/apps/authoring/publishing/migrations/0010_alter_draft_draft_log_record.py deleted file mode 100644 index b3e29422b..000000000 --- a/openedx_learning/apps/authoring/publishing/migrations/0010_alter_draft_draft_log_record.py +++ /dev/null @@ -1,19 +0,0 @@ -# Generated by Django 5.2.7 on 2025-10-28 23:43 - -import django.db.models.deletion -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('oel_publishing', '0009_draft_draft_log_record_and_dependency_hashing'), - ] - - operations = [ - migrations.AlterField( - model_name='draft', - name='draft_log_record', - field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, to='oel_publishing.draftchangelogrecord'), - ), - ] diff --git a/openedx_learning/apps/authoring/publishing/models/draft_log.py b/openedx_learning/apps/authoring/publishing/models/draft_log.py index 5f675218d..fe93ac662 100644 --- a/openedx_learning/apps/authoring/publishing/models/draft_log.py +++ b/openedx_learning/apps/authoring/publishing/models/draft_log.py @@ -63,7 +63,11 @@ class Draft(models.Model): # Note: this is actually a 1:1 relation in practice, but I'm keeping the # definition more consistent with the Published model, which has an fkey # to PublishLogRecord. Unlike PublishLogRecord, this fkey is a late - # addition to this data model, so we have to allow null values. + # addition to this data model, so we have to allow null values for the + # initial migration. But making this nullable also has another advantage, + # in that it allows us to set the draft_log_record to the most recent change + # while inside a bulk_draft_changes_for operation, and then delete that log + # record if it is undone in the same bulk operation. draft_log_record = models.ForeignKey( "DraftChangeLogRecord", on_delete=models.SET_NULL, diff --git a/openedx_learning/apps/authoring/publishing/models/publishable_entity.py b/openedx_learning/apps/authoring/publishing/models/publishable_entity.py index bfe9517c0..1dc2021ff 100644 --- a/openedx_learning/apps/authoring/publishing/models/publishable_entity.py +++ b/openedx_learning/apps/authoring/publishing/models/publishable_entity.py @@ -301,14 +301,14 @@ class PublishableEntityVersionDependency(models.Model): .. no_pii """ - version = models.ForeignKey(PublishableEntityVersion, on_delete=models.RESTRICT) - entity = models.ForeignKey(PublishableEntity, on_delete=models.RESTRICT) + referent_version = models.ForeignKey(PublishableEntityVersion, on_delete=models.RESTRICT) + referenced_entity = models.ForeignKey(PublishableEntity, on_delete=models.RESTRICT) class Meta: constraints = [ models.UniqueConstraint( - fields=["version", "entity"], - name="oel_pevd_uniq_version_entity", + fields=["referent_version", "referenced_entity"], + name="oel_pevd_uniq_rv_re", ) ] From a09cdab2e6934345aa97868cf040d7e45bdec549 Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Wed, 29 Oct 2025 14:22:18 -0400 Subject: [PATCH 15/25] temp: tried to fix up the docstring around set_version_dependencies to be clearer --- .../apps/authoring/publishing/api.py | 28 ++++++++++++++----- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/openedx_learning/apps/authoring/publishing/api.py b/openedx_learning/apps/authoring/publishing/api.py index 432542d71..9c9df1efe 100644 --- a/openedx_learning/apps/authoring/publishing/api.py +++ b/openedx_learning/apps/authoring/publishing/api.py @@ -258,16 +258,30 @@ def set_version_dependencies( this function). **This function is not atomic.** If you're doing backfills, you must wrap calls to this function with a transaction.atomic() call. - The idea behind dependencies is that a PublishableEntityVersion may be - declared to reference unpinned PublishableEntities. Changes to those + The idea behind dependencies is that a PublishableEntity's Versions may + be declared to reference unpinned PublishableEntities. Changes to those referenced PublishableEntities still affect the draft or published state of - the PublisahbleEntity, even if the version of the PublishableEntity is not + the referent PublishableEntity, even if the referent entity's version is not incremented. - For example, a Unit does not get a new version when unpinned child - Components are updated, but we do consider a Unit version to be changed when - that happens. So the child Components would be dependencies of the Unit - version in this case. + For example, we only create a new UnitVersion when there are changes to the + metadata of the Unit itself. So this would happen when the name of the Unit + is changed, or if a child Component is added, removed, or reordered. No new + UnitVersion is created when a child Component of that Unit is modified or + published. Yet we still consider a Unit to be affected when one of its child + Components is modified or published. Therefore, we say that the child + Components are dependencies of the UnitVersion. + + Dependencies vs. Container Rows/Children + + Dependencies overlap with the concept of container child rows, but the two + are not exactly the same. For instance: + + * Dependencies have no sense of ordering. + * If a row is declared to be pinned to a specific version of a child, then + it is NOT a dependency. For example, if U1.v1 is declared to have a pinned + reference to ComponentVersion C1.v1, then future changes to C1 do not + affect U1.v1 because U1.v1 will just ignore those new ComponentVersions. Guidelines: From 5bf98624ebb293b84a1728d0879c6a6081e6c460 Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Wed, 29 Oct 2025 16:04:49 -0400 Subject: [PATCH 16/25] refactor: more field name tweaking --- openedx_learning/apps/authoring/publishing/api.py | 3 ++- .../publishing/migrations/0009_dependencies_and_hashing.py | 4 ++-- .../apps/authoring/publishing/models/publishable_entity.py | 4 ++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/openedx_learning/apps/authoring/publishing/api.py b/openedx_learning/apps/authoring/publishing/api.py index 9c9df1efe..47a09bf31 100644 --- a/openedx_learning/apps/authoring/publishing/api.py +++ b/openedx_learning/apps/authoring/publishing/api.py @@ -301,7 +301,8 @@ def set_version_dependencies( PublishableEntityVersionDependency.objects.bulk_create( [ PublishableEntityVersionDependency( - referent_version_id=version_id, referenced_entity_id=dep_entity_id + referring_version_id=version_id, + referenced_entity_id=dep_entity_id, ) for dep_entity_id in set(dependencies) # dependencies have no ordering ], diff --git a/openedx_learning/apps/authoring/publishing/migrations/0009_dependencies_and_hashing.py b/openedx_learning/apps/authoring/publishing/migrations/0009_dependencies_and_hashing.py index 4b840add9..86ca1037d 100644 --- a/openedx_learning/apps/authoring/publishing/migrations/0009_dependencies_and_hashing.py +++ b/openedx_learning/apps/authoring/publishing/migrations/0009_dependencies_and_hashing.py @@ -31,7 +31,7 @@ class Migration(migrations.Migration): fields=[ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), ('referenced_entity', models.ForeignKey(on_delete=django.db.models.deletion.RESTRICT, to='oel_publishing.publishableentity')), - ('referent_version', models.ForeignKey(on_delete=django.db.models.deletion.RESTRICT, to='oel_publishing.publishableentityversion')), + ('referring_version', models.ForeignKey(on_delete=django.db.models.deletion.RESTRICT, to='oel_publishing.publishableentityversion')), ], ), migrations.AddField( @@ -53,7 +53,7 @@ class Migration(migrations.Migration): ), migrations.AddConstraint( model_name='publishableentityversiondependency', - constraint=models.UniqueConstraint(fields=('referent_version', 'referenced_entity'), name='oel_pevd_uniq_rv_re'), + constraint=models.UniqueConstraint(fields=('referring_version', 'referenced_entity'), name='oel_pevd_uniq_rv_re'), ), migrations.AddConstraint( model_name='publishsideeffect', diff --git a/openedx_learning/apps/authoring/publishing/models/publishable_entity.py b/openedx_learning/apps/authoring/publishing/models/publishable_entity.py index 1dc2021ff..f4bb1b8e8 100644 --- a/openedx_learning/apps/authoring/publishing/models/publishable_entity.py +++ b/openedx_learning/apps/authoring/publishing/models/publishable_entity.py @@ -301,13 +301,13 @@ class PublishableEntityVersionDependency(models.Model): .. no_pii """ - referent_version = models.ForeignKey(PublishableEntityVersion, on_delete=models.RESTRICT) + referring_version = models.ForeignKey(PublishableEntityVersion, on_delete=models.RESTRICT) referenced_entity = models.ForeignKey(PublishableEntity, on_delete=models.RESTRICT) class Meta: constraints = [ models.UniqueConstraint( - fields=["referent_version", "referenced_entity"], + fields=["referring_version", "referenced_entity"], name="oel_pevd_uniq_rv_re", ) ] From f23fd0237d542681c3622bcb285f59f0e27d5bfd Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Wed, 29 Oct 2025 20:51:20 -0400 Subject: [PATCH 17/25] feat: add data migration for dependencies --- .../apps/authoring/publishing/api.py | 43 ++++- .../migrations/0010_backfill_dependencies.py | 151 ++++++++++++++++++ 2 files changed, 188 insertions(+), 6 deletions(-) create mode 100644 openedx_learning/apps/authoring/publishing/migrations/0010_backfill_dependencies.py diff --git a/openedx_learning/apps/authoring/publishing/api.py b/openedx_learning/apps/authoring/publishing/api.py index 47a09bf31..ace8a6c89 100644 --- a/openedx_learning/apps/authoring/publishing/api.py +++ b/openedx_learning/apps/authoring/publishing/api.py @@ -941,11 +941,12 @@ def _create_side_effects_for_change_log(change_log: DraftChangeLog | PublishLog) [log_record_rel], ) - _update_branch_dependencies_hash_digests(change_log) + update_dependencies_hash_digests_for_log(change_log) -def _update_branch_dependencies_hash_digests( +def update_dependencies_hash_digests_for_log( change_log: DraftChangeLog | PublishLog, + backfill: bool=False, ) -> None: """ Update dependencies_hash_digest for Drafts or Published in a change log. @@ -960,6 +961,10 @@ def _update_branch_dependencies_hash_digests( change_log: A DraftChangeLog or PublishLog that already has all side-effects added to it. The Draft and Published models should already be updated to point to the post-change versions. + backfill: If this is true, we will not trust the hash values stored on + log records outside of our log, i.e. things that we would normally + expect to be pre-calculated. This will be important for the initial + data migration. """ if isinstance(change_log, DraftChangeLog): branch = "draft" @@ -1022,7 +1027,11 @@ def _update_branch_dependencies_hash_digests( records_that_need_hashes.append(record) record_ids_to_live_deps[record.id] = deps - untrusted_record_id_set = set(rec.id for rec in records_that_need_hashes) + if backfill: + untrusted_record_id_set = None + else: + untrusted_record_id_set = set(rec.id for rec in records_that_need_hashes) + for record in records_that_need_hashes: record.dependencies_hash_digest = hash_for_log_record( record, @@ -1041,7 +1050,7 @@ def hash_for_log_record( record: DraftChangeLogRecord | PublishLogRecord, record_ids_to_hash_digests: dict, record_ids_to_live_deps: dict, - untrusted_record_id_set: set, + untrusted_record_id_set: set | None, ) -> str: """ The hash digest for a given change log record. @@ -1104,10 +1113,14 @@ def hash_for_log_record( # us to recalculate the dependencies_hash_digest for that Unit. Doing that # recalculation requires us to fetch the dependencies_hash_digest of the # unchanged child Component as well. - if record.id not in untrusted_record_id_set: + # + # If we aren't given an explicit untrusted_record_id_set, it means we can't + # trust anything. This would happen when we're bootstrapping things with an + # initial data migration. + if (untrusted_record_id_set is not None) and (record.id not in untrusted_record_id_set): return record.dependencies_hash_digest - # Normal recursive case: + # Normal recursive case starts here: if isinstance(record, DraftChangeLogRecord): branch = "draft" elif isinstance(record, PublishLogRecord): @@ -1117,6 +1130,24 @@ def hash_for_log_record( f"expected DraftChangeLogRecord or PublishLogRecord, not {type(record)}" ) + # This is extra work that only happens in case of a backfill, where we might + # need to compute dependency hashes for things outside of our log (because + # we don't trust them). + if record.id not in record_ids_to_live_deps: + deps = list( + entity for entity in record.new_version.dependencies.all() + if hasattr(entity, branch) and getattr(entity, branch).version + ) + # If there are no live dependencies, this log record also gets the + # default/blank value. + if not deps: + record_ids_to_hash_digests[record.id] = '' + return '' + + record_ids_to_live_deps[record.id] = deps + # End special handling for backfill. + + # Begin normal dependencies: list[PublishableEntity] = sorted( record_ids_to_live_deps[record.id], key=lambda entity: getattr(entity, branch).log_record.new_version_id, diff --git a/openedx_learning/apps/authoring/publishing/migrations/0010_backfill_dependencies.py b/openedx_learning/apps/authoring/publishing/migrations/0010_backfill_dependencies.py new file mode 100644 index 000000000..70c73d29a --- /dev/null +++ b/openedx_learning/apps/authoring/publishing/migrations/0010_backfill_dependencies.py @@ -0,0 +1,151 @@ +""" +Backfill PublishableEntityVersionDependency entries based on ContainerVersions. + +We're introducing a lower-level publishing concept of a dependency that will be +used by Containers, but this means we have to backfill that dependency info for +existing Containers in the system. +""" +from django.db import migrations +from django.db.models import F + + +def create_backfill(apps, schema_editor): + """ + Create dependency entries and update dep hashes for Draft and Published. + """ + _create_dependencies(apps) + _update_drafts(apps) + _update_draft_dependencies_hashes(apps) + _update_published_dependencies_hashes(apps) + + +def _create_dependencies(apps): + """ + Populate the PublishableEntityVersion.dependencies relation. + + The only ones we should have in the system at this point are the ones from + containers, so we query ContainerVersion for that. + """ + PublishableEntityVersionDependency = apps.get_model( + "oel_publishing", "PublishableEntityVersionDependency" + ) + ContainerVersion = apps.get_model("oel_publishing", "ContainerVersion") + + for container_version in ContainerVersion.objects.all(): + # child_entity_ids is a set to de-dupe. This doesn't handle pinned + # child references yet, but you can't actually make those in a real + # library yet, so we shouldn't have that data lying around to migrate. + child_entity_ids = set( + container_version + .entity_list + .entitylistrow_set + .all() + .values_list("entity_id", flat=True) + ) + PublishableEntityVersionDependency.objects.bulk_create( + [ + PublishableEntityVersionDependency( + referring_version_id=container_version.pk, + referenced_entity_id=entity_id + ) + for entity_id in child_entity_ids + ], + ignore_conflicts=True, + ) + + +def _update_drafts(apps): + """ + Update Draft entries to point to their most recent DraftLogRecord. + + This is slow and expensive. + """ + Draft = apps.get_model("oel_publishing", "Draft") + DraftChangeLogRecord = apps.get_model("oel_publishing", "DraftChangeLogRecord") + for draft in Draft.objects.all(): + draft_log_record = ( + # Find the most recent DraftChangeLogRecord related to this Draft, + DraftChangeLogRecord.objects + .filter(entity_id=draft.pk) + .order_by('-pk') + .first() + ) + draft.draft_log_record = draft_log_record + draft.save() + + +def _update_draft_dependencies_hashes(apps): + """ + Update the dependency_hash_digest for all DraftChangeLogRecords. + + Backfill dependency state hashes. The important thing here is that things + without dependencies will have the default (blank) state hash, so we only + need to query for Draft entries for Containers. + + We are only backfilling the current DraftChangeLogRecords pointed to by the + Draft entries now. We are not backfilling all historical + DraftChangeLogRecords. Full historical reconstruction is probably possible, + but it's not really worth the cost and complexity. + """ + from ..api import update_dependencies_hash_digests_for_log + from ..models import DraftChangeLog + + Draft = apps.get_model("oel_publishing", "Draft") + # All DraftChangeLogs that have records that are pointed to by the current + # Draft and have a possibility of having dependencies. + change_logs = DraftChangeLog.objects.filter( + pk=F('records__entity__draft__draft_log_record__draft_change_log'), + records__entity__draft__version__isnull=False, + records__entity__container__isnull=False, + ).distinct() + for change_log in change_logs: + update_dependencies_hash_digests_for_log(change_log, backfill=True) + +def _update_published_dependencies_hashes(apps): + """ + Update all container Published.dependencies_hash_digest + + Backfill dependency state hashes. The important thing here is that things + without dependencies will have the default (blank) state hash, so we only + need to query for Published entries for Containers. + """ + from ..api import update_dependencies_hash_digests_for_log + from ..models import PublishLog + + Published = apps.get_model("oel_publishing", "Published") + # All PublishLogs that have records that are pointed to by the current + # Published and have a possibility of having dependencies. + change_logs = PublishLog.objects.filter( + pk=F('records__entity__published__publish_log_record__publish_log'), + records__entity__published__version__isnull=False, + records__entity__container__isnull=False, + ).distinct() + for change_log in change_logs: + update_dependencies_hash_digests_for_log(change_log, backfill=True) + +def remove_backfill(apps, schema_editor): + """ + Reset all dep hash values to default ('') and remove dependencies. + """ + Draft = apps.get_model("oel_publishing", "Draft") + DraftChangeLogRecord = apps.get_model("oel_publishing", "DraftChangeLogRecord") + PublishLogRecord = apps.get_model("oel_publishing", "PublishLogRecord") + PublishableEntityVersionDependency = apps.get_model( + "oel_publishing", "PublishableEntityVersionDependency" + ) + + PublishLogRecord.objects.all().update(dependencies_hash_digest='') + DraftChangeLogRecord.objects.all().update(dependencies_hash_digest='') + PublishableEntityVersionDependency.objects.all().delete() + Draft.objects.all().update(draft_log_record=None) + + +class Migration(migrations.Migration): + + dependencies = [ + ('oel_publishing', '0009_dependencies_and_hashing'), + ] + + operations = [ + migrations.RunPython(create_backfill, reverse_code=remove_backfill) + ] From 4ce7dbbde2722a6e8deba33bfaee3c966c2c4182 Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Wed, 29 Oct 2025 21:12:21 -0400 Subject: [PATCH 18/25] temp: cleanup --- .../publishing/migrations/0010_backfill_dependencies.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/openedx_learning/apps/authoring/publishing/migrations/0010_backfill_dependencies.py b/openedx_learning/apps/authoring/publishing/migrations/0010_backfill_dependencies.py index 70c73d29a..c274a401b 100644 --- a/openedx_learning/apps/authoring/publishing/migrations/0010_backfill_dependencies.py +++ b/openedx_learning/apps/authoring/publishing/migrations/0010_backfill_dependencies.py @@ -90,7 +90,6 @@ def _update_draft_dependencies_hashes(apps): from ..api import update_dependencies_hash_digests_for_log from ..models import DraftChangeLog - Draft = apps.get_model("oel_publishing", "Draft") # All DraftChangeLogs that have records that are pointed to by the current # Draft and have a possibility of having dependencies. change_logs = DraftChangeLog.objects.filter( @@ -112,7 +111,6 @@ def _update_published_dependencies_hashes(apps): from ..api import update_dependencies_hash_digests_for_log from ..models import PublishLog - Published = apps.get_model("oel_publishing", "Published") # All PublishLogs that have records that are pointed to by the current # Published and have a possibility of having dependencies. change_logs = PublishLog.objects.filter( From 1a858f6e72b05ec776d923e1d7a3bb434edd2195 Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Wed, 29 Oct 2025 21:44:33 -0400 Subject: [PATCH 19/25] temp: lint fixups --- .../apps/authoring/publishing/admin.py | 2 +- .../apps/authoring/publishing/api.py | 31 ++++++++++--------- .../authoring/publishing/models/draft_log.py | 18 +++++------ openedx_learning/lib/fields.py | 1 + .../apps/authoring/publishing/test_api.py | 2 -- tox.ini | 4 +-- 6 files changed, 29 insertions(+), 29 deletions(-) diff --git a/openedx_learning/apps/authoring/publishing/admin.py b/openedx_learning/apps/authoring/publishing/admin.py index 5cff05719..367111305 100644 --- a/openedx_learning/apps/authoring/publishing/admin.py +++ b/openedx_learning/apps/authoring/publishing/admin.py @@ -121,6 +121,7 @@ def get_queryset(self, request): .prefetch_related('dependencies') ) + class PublishStatusFilter(admin.SimpleListFilter): """ Custom filter for entities that have unpublished changes. @@ -242,7 +243,6 @@ def published_version(self, entity: PublishableEntity): return None - @admin.register(Published) class PublishedAdmin(ReadOnlyModelAdmin): """ diff --git a/openedx_learning/apps/authoring/publishing/api.py b/openedx_learning/apps/authoring/publishing/api.py index ace8a6c89..0e0f68a97 100644 --- a/openedx_learning/apps/authoring/publishing/api.py +++ b/openedx_learning/apps/authoring/publishing/api.py @@ -670,9 +670,9 @@ def set_draft_version( # to accommodate it. draft.draft_log_record = ( DraftChangeLogRecord.objects - .filter(entity_id=draft.entity_id) - .order_by("-pk") - .first() + .filter(entity_id=draft.entity_id) + .order_by("-pk") + .first() ) draft.save() @@ -946,7 +946,7 @@ def _create_side_effects_for_change_log(change_log: DraftChangeLog | PublishLog) def update_dependencies_hash_digests_for_log( change_log: DraftChangeLog | PublishLog, - backfill: bool=False, + backfill: bool = False, ) -> None: """ Update dependencies_hash_digest for Drafts or Published in a change log. @@ -981,14 +981,12 @@ def update_dependencies_hash_digests_for_log( dependencies_prefetch = Prefetch( "new_version__dependencies", - queryset=( - PublishableEntity.objects - .select_related( - f"{branch}__version", - f"{branch}__{log_record_relation}", - ) - .order_by(f"{branch}__version__uuid") - ) + queryset=PublishableEntity.objects + .select_related( + f"{branch}__version", + f"{branch}__{log_record_relation}", + ) + .order_by(f"{branch}__version__uuid") ) changed_records: QuerySet[DraftChangeLogRecord] | QuerySet[PublishLogRecord] changed_records = ( @@ -1134,6 +1132,9 @@ def hash_for_log_record( # need to compute dependency hashes for things outside of our log (because # we don't trust them). if record.id not in record_ids_to_live_deps: + if record.new_version is None: + record_ids_to_hash_digests[record.id] = '' + return '' deps = list( entity for entity in record.new_version.dependencies.all() if hasattr(entity, branch) and getattr(entity, branch).version @@ -1841,9 +1842,9 @@ def contains_unpublished_changes(container_id: int) -> bool: """ container = ( Container.objects - .select_related('publishable_entity__draft__draft_log_record') - .select_related('publishable_entity__published__publish_log_record') - .get(pk=container_id) + .select_related('publishable_entity__draft__draft_log_record') + .select_related('publishable_entity__published__publish_log_record') + .get(pk=container_id) ) if container.versioning.has_unpublished_changes: return True diff --git a/openedx_learning/apps/authoring/publishing/models/draft_log.py b/openedx_learning/apps/authoring/publishing/models/draft_log.py index fe93ac662..59283abf2 100644 --- a/openedx_learning/apps/authoring/publishing/models/draft_log.py +++ b/openedx_learning/apps/authoring/publishing/models/draft_log.py @@ -96,16 +96,17 @@ def with_unpublished_changes(self): """ return ( self.select_related("entity__published__version") - # Exclude where draft and published versions are the same + + # Exclude where draft and published versions are the same .exclude(entity__published__version_id=F("version_id")) - # Account for soft-deletes: - # NULL != NULL in SQL, so simply excluding entities where - # the Draft and Published versions match will not catch the - # case where a soft-delete has been published (i.e. both the - # Draft and Published versions are NULL). We need to - # explicitly check for that case instead, or else we will - # re-publish the same soft-deletes over and over again. + # Account for soft-deletes: + # NULL != NULL in SQL, so simply excluding entities where + # the Draft and Published versions match will not catch the + # case where a soft-delete has been published (i.e. both the + # Draft and Published versions are NULL). We need to + # explicitly check for that case instead, or else we will + # re-publish the same soft-deletes over and over again. .exclude( Q(version__isnull=True) & Q(entity__published__version__isnull=True) @@ -115,7 +116,6 @@ def with_unpublished_changes(self): objects = DraftQuerySet.as_manager() - class DraftChangeLog(models.Model): """ There is one row in this table for every time Drafts are created/modified. diff --git a/openedx_learning/lib/fields.py b/openedx_learning/lib/fields.py index 5d2bbddbb..c41678993 100644 --- a/openedx_learning/lib/fields.py +++ b/openedx_learning/lib/fields.py @@ -153,6 +153,7 @@ def hash_field(**kwargs) -> models.CharField: } return models.CharField(**(default_kwargs | kwargs)) + def manual_date_time_field() -> models.DateTimeField: """ DateTimeField that does not auto-generate values. diff --git a/tests/openedx_learning/apps/authoring/publishing/test_api.py b/tests/openedx_learning/apps/authoring/publishing/test_api.py index 86cfc76f6..f44667178 100644 --- a/tests/openedx_learning/apps/authoring/publishing/test_api.py +++ b/tests/openedx_learning/apps/authoring/publishing/test_api.py @@ -1169,7 +1169,6 @@ def test_draft_dependency_multiple_parents(self): assert side_effects.filter(effect__entity=unit_1.publishable_entity).count() == 1 assert side_effects.filter(effect__entity=unit_2.publishable_entity).count() == 1 - def test_multiple_layers_of_containers(self): """Test stacking containers three layers deep.""" # Note that these aren't real "components" and "units". Everything being @@ -1297,7 +1296,6 @@ def test_publish_all_layers(self): # the publish log records. assert publish_log.records.count() == 3 - def test_container_next_version(self): """Test that next_version works for containers.""" child_1 = publishing_api.create_publishable_entity( diff --git a/tox.ini b/tox.ini index 005ddb6c1..ebdc18874 100644 --- a/tox.ini +++ b/tox.ini @@ -75,8 +75,8 @@ deps = setuptools -r{toxinidir}/requirements/quality.txt commands = - pylint openedx_learning openedx_tagging tests test_utils manage.py setup.py - mypy + # pylint openedx_learning openedx_tagging tests test_utils manage.py setup.py + mypy --version pycodestyle openedx_learning openedx_tagging tests manage.py setup.py pydocstyle openedx_learning openedx_tagging tests manage.py setup.py isort --check-only --diff tests test_utils openedx_learning openedx_tagging manage.py setup.py test_settings.py From b2d1a7d2997408acfd2f0f3d19aecf9f71fd33dd Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Wed, 29 Oct 2025 23:21:49 -0400 Subject: [PATCH 20/25] temp: lint fixes and version bump --- openedx_learning/__init__.py | 2 +- .../apps/authoring/publishing/api.py | 21 +++++++++++++++---- .../authoring/publishing/models/__init__.py | 7 +------ .../authoring/publishing/models/draft_log.py | 6 +----- tox.ini | 2 +- 5 files changed, 21 insertions(+), 17 deletions(-) diff --git a/openedx_learning/__init__.py b/openedx_learning/__init__.py index e84267f13..60b0a4af1 100644 --- a/openedx_learning/__init__.py +++ b/openedx_learning/__init__.py @@ -2,4 +2,4 @@ Open edX Learning ("Learning Core"). """ -__version__ = "0.29.1" +__version__ = "0.30.0" diff --git a/openedx_learning/apps/authoring/publishing/api.py b/openedx_learning/apps/authoring/publishing/api.py index 0e0f68a97..bcc0e7924 100644 --- a/openedx_learning/apps/authoring/publishing/api.py +++ b/openedx_learning/apps/authoring/publishing/api.py @@ -15,6 +15,7 @@ from django.core.exceptions import ObjectDoesNotExist, ValidationError from django.db.models import F, Prefetch, Q, QuerySet from django.db.transaction import atomic + from openedx_learning.lib.fields import create_hash_digest from .contextmanagers import DraftChangeLogContext @@ -1038,10 +1039,22 @@ def update_dependencies_hash_digests_for_log( untrusted_record_id_set, ) - record_cls.objects.bulk_update( - records_that_need_hashes, # type: ignore[arg-type] - ['dependencies_hash_digest'], - ) + _bulk_update_hashes(record_cls, records_that_need_hashes) + + +def _bulk_update_hashes(model_cls, records): + """ + bulk_update using the model class (PublishLogRecord or DraftChangeLogRecord) + + The only reason this function exists is because mypy 1.18.2 throws an + exception in validate_bulk_update() during "make quality" checks otherwise + (though curiously enough, not when that same version of mypy is called + directly). Given that I'm writing this on the night before the Ulmo release + cut, I'm not really interested in tracking down the underlying issue. + + The lack of type annotations on this function is very intentional. + """ + model_cls.objects.bulk_update(records, ['dependencies_hash_digest']) def hash_for_log_record( diff --git a/openedx_learning/apps/authoring/publishing/models/__init__.py b/openedx_learning/apps/authoring/publishing/models/__init__.py index a3f5d35c6..32a73b214 100644 --- a/openedx_learning/apps/authoring/publishing/models/__init__.py +++ b/openedx_learning/apps/authoring/publishing/models/__init__.py @@ -14,12 +14,7 @@ """ from .container import Container, ContainerVersion -from .draft_log import ( - Draft, - DraftChangeLog, - DraftChangeLogRecord, - DraftSideEffect, -) +from .draft_log import Draft, DraftChangeLog, DraftChangeLogRecord, DraftSideEffect from .entity_list import EntityList, EntityListRow from .learning_package import LearningPackage from .publish_log import Published, PublishLog, PublishLogRecord, PublishSideEffect diff --git a/openedx_learning/apps/authoring/publishing/models/draft_log.py b/openedx_learning/apps/authoring/publishing/models/draft_log.py index 59283abf2..ed3eb8731 100644 --- a/openedx_learning/apps/authoring/publishing/models/draft_log.py +++ b/openedx_learning/apps/authoring/publishing/models/draft_log.py @@ -6,11 +6,7 @@ from django.db.models import F, Q from django.utils.translation import gettext_lazy as _ -from openedx_learning.lib.fields import ( - hash_field, - immutable_uuid_field, - manual_date_time_field, -) +from openedx_learning.lib.fields import hash_field, immutable_uuid_field, manual_date_time_field from .learning_package import LearningPackage from .publishable_entity import PublishableEntity, PublishableEntityVersion diff --git a/tox.ini b/tox.ini index ebdc18874..0344014dd 100644 --- a/tox.ini +++ b/tox.ini @@ -76,7 +76,7 @@ deps = -r{toxinidir}/requirements/quality.txt commands = # pylint openedx_learning openedx_tagging tests test_utils manage.py setup.py - mypy --version + mypy --show-traceback pycodestyle openedx_learning openedx_tagging tests manage.py setup.py pydocstyle openedx_learning openedx_tagging tests manage.py setup.py isort --check-only --diff tests test_utils openedx_learning openedx_tagging manage.py setup.py test_settings.py From 898be1edb1987c453e7c3d6d6a5d861679b990ad Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Thu, 30 Oct 2025 10:26:58 -0400 Subject: [PATCH 21/25] fix: version constraint was overly conservative --- .../publishing/migrations/0009_dependencies_and_hashing.py | 2 +- .../apps/authoring/publishing/models/publishable_entity.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/openedx_learning/apps/authoring/publishing/migrations/0009_dependencies_and_hashing.py b/openedx_learning/apps/authoring/publishing/migrations/0009_dependencies_and_hashing.py index 86ca1037d..bbed19bd5 100644 --- a/openedx_learning/apps/authoring/publishing/migrations/0009_dependencies_and_hashing.py +++ b/openedx_learning/apps/authoring/publishing/migrations/0009_dependencies_and_hashing.py @@ -31,7 +31,7 @@ class Migration(migrations.Migration): fields=[ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), ('referenced_entity', models.ForeignKey(on_delete=django.db.models.deletion.RESTRICT, to='oel_publishing.publishableentity')), - ('referring_version', models.ForeignKey(on_delete=django.db.models.deletion.RESTRICT, to='oel_publishing.publishableentityversion')), + ('referring_version', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='oel_publishing.publishableentityversion')), ], ), migrations.AddField( diff --git a/openedx_learning/apps/authoring/publishing/models/publishable_entity.py b/openedx_learning/apps/authoring/publishing/models/publishable_entity.py index f4bb1b8e8..176735a44 100644 --- a/openedx_learning/apps/authoring/publishing/models/publishable_entity.py +++ b/openedx_learning/apps/authoring/publishing/models/publishable_entity.py @@ -301,7 +301,7 @@ class PublishableEntityVersionDependency(models.Model): .. no_pii """ - referring_version = models.ForeignKey(PublishableEntityVersion, on_delete=models.RESTRICT) + referring_version = models.ForeignKey(PublishableEntityVersion, on_delete=models.CASCADE) referenced_entity = models.ForeignKey(PublishableEntity, on_delete=models.RESTRICT) class Meta: From 3e5c507b3fe094b124f3d54621caf4cd736d3f6c Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Thu, 30 Oct 2025 14:19:20 -0400 Subject: [PATCH 22/25] temp: re-enable pylint (cough) --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index 0344014dd..8c5fe2fcb 100644 --- a/tox.ini +++ b/tox.ini @@ -75,7 +75,7 @@ deps = setuptools -r{toxinidir}/requirements/quality.txt commands = - # pylint openedx_learning openedx_tagging tests test_utils manage.py setup.py + pylint openedx_learning openedx_tagging tests test_utils manage.py setup.py mypy --show-traceback pycodestyle openedx_learning openedx_tagging tests manage.py setup.py pydocstyle openedx_learning openedx_tagging tests manage.py setup.py From be1d376b56f5c16b7d71e30102c4fe7c96c5a80f Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Mon, 3 Nov 2025 16:47:16 -0500 Subject: [PATCH 23/25] temp: small renaming and comment fixes from Kyle's reviews --- .../apps/authoring/publishing/api.py | 37 ++++++++----------- .../publishing/models/publish_log.py | 3 +- 2 files changed, 17 insertions(+), 23 deletions(-) diff --git a/openedx_learning/apps/authoring/publishing/api.py b/openedx_learning/apps/authoring/publishing/api.py index bcc0e7924..2c0bf4a15 100644 --- a/openedx_learning/apps/authoring/publishing/api.py +++ b/openedx_learning/apps/authoring/publishing/api.py @@ -461,8 +461,8 @@ def publish_from_drafts( """ Publish the rows in the ``draft_model_qsets`` args passed in. - By default, this will also publish all dependencies (e.g. children) of the - Drafts that are passed in. + By default, this will also publish all dependencies (e.g. unpinned children) + of the Drafts that are passed in. """ if published_at is None: published_at = datetime.now(tz=timezone.utc) @@ -847,25 +847,20 @@ def _create_side_effects_for_change_log(change_log: DraftChangeLog | PublishLog) ] # These are the Published or Draft objects where we need to repoint the - # log_record (publish_log_record or draft_change_log) to point to the - # side-effect changes, e.g. the DraftChangeLogRecord that says, "This - # Unit's version stayed the same, but its dependency hash changed + # log_record (publish_log_record or draft_change_log_record) to point to + # the side-effect changes, e.g. the DraftChangeLogRecord that says, + # "This Unit's version stayed the same, but its dependency hash changed # because a child Component's draft version was changed." We gather them # all up in a list so we can do a bulk_update on them. branch_objs_to_update_with_side_effects = [] while changes_and_affected: - original_change, affected = changes_and_affected.pop() - - # If the Draft or Published that is affected by this change is not - # already in the change_log, then we have to add it. - affected_version_pk = affected.version_id - + change, affected = changes_and_affected.pop() change_log_param = {} if branch_cls == Draft: - change_log_param['draft_change_log'] = original_change.draft_change_log # type: ignore[union-attr] + change_log_param['draft_change_log'] = change.draft_change_log # type: ignore[union-attr] elif branch_cls == Published: - change_log_param['publish_log'] = original_change.publish_log # type: ignore[union-attr] + change_log_param['publish_log'] = change.publish_log # type: ignore[union-attr] # Example: If the original_change is a DraftChangeLogRecord that # represents editing a Component, the side_effect_change is the @@ -880,8 +875,8 @@ def _create_side_effects_for_change_log(change_log: DraftChangeLog | PublishLog) # old/new version entries. But if we're creating this change # record as a pure side-effect, then we use the (old_version # == new_version) convention to indicate that. - 'old_version_id': affected_version_pk, - 'new_version_id': affected_version_pk + 'old_version_id': affected.version_id, + 'new_version_id': affected.version_id, } ) # Update the current branch pointer (Draft or Published) for this @@ -898,10 +893,10 @@ def _create_side_effects_for_change_log(change_log: DraftChangeLog | PublishLog) branch_objs_to_update_with_side_effects.append(draft_obj) # Create a new side effect (DraftSideEffect or PublishSideEffect) to - # record the relationship between the ``original_change`` and - # the corresponding ``side_effect_change``. We'll do this regardless - # of whether we created the ``side_effect_change`` or just pulled - # back an existing one. This addresses two things: + # record the relationship between the ``change`` and the + # corresponding ``side_effect_change``. We'll do this regardless of + # whether we created the ``side_effect_change`` or just pulled back + # an existing one. This addresses two things: # # 1. A change in multiple dependencies can generate multiple # side effects that point to the same change log record, i.e. @@ -917,7 +912,7 @@ def _create_side_effects_for_change_log(change_log: DraftChangeLog | PublishLog) # Unit will be incremented, but we'll also create the # DraftSideEffect. side_effect_cls.objects.get_or_create( - cause=original_change, + cause=change, effect=side_effect_change, ) @@ -929,7 +924,7 @@ def _create_side_effects_for_change_log(change_log: DraftChangeLog | PublishLog) # Make sure we never re-add the change we just processed when we # queue up the next layer. - processed_entity_ids.add(original_change.entity_id) + processed_entity_ids.add(change.entity_id) changes_and_affected.extend( (side_effect_change, affected) diff --git a/openedx_learning/apps/authoring/publishing/models/publish_log.py b/openedx_learning/apps/authoring/publishing/models/publish_log.py index f5d5b33ba..d3bc1241f 100644 --- a/openedx_learning/apps/authoring/publishing/models/publish_log.py +++ b/openedx_learning/apps/authoring/publishing/models/publish_log.py @@ -74,8 +74,7 @@ class PublishLogRecord(models.Model): had the side-effect of changing the published state of this entity. For instance, if a Unit has unpinned references to its child Components (which it almost always will), then publishing one of those Components will alter - the published state of the Unit, even if the UnitVersion does not change. In - that case, we still consider the Unit to have been "published". + the published state of the Unit, even if the UnitVersion does not change. """ publish_log = models.ForeignKey( From 6673a6980ef43aa0baf26f0082b3cfe6c1d262cf Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Tue, 4 Nov 2025 16:14:22 -0500 Subject: [PATCH 24/25] temp: update openedx_learning/apps/authoring/publishing/api.py Co-authored-by: Kyle McCormick --- openedx_learning/apps/authoring/publishing/api.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/openedx_learning/apps/authoring/publishing/api.py b/openedx_learning/apps/authoring/publishing/api.py index 2c0bf4a15..5a6ab3b12 100644 --- a/openedx_learning/apps/authoring/publishing/api.py +++ b/openedx_learning/apps/authoring/publishing/api.py @@ -1096,9 +1096,12 @@ def hash_for_log_record( case. EntityVersions with dependencies - If an EntityVersion has dependencies, we calculate the dependency hash - digest by taking a hash of all the UUIDs of the active version. [ fill in - more later] + EntityVersions with dependencies + If an EntityVersion has dependencies, then its draft/published state + hash is based on the concatenation of, for each non-deleted dependency: + (i) the dependency's draft/published EntityVersion primary key, and + (ii) the dependency's own draft/published state hash, recursively re- + calculated if necessary. EntityVersions with soft-deleted dependencies A soft-deleted dependency isn't counted (it's as if the dependency were From 764c98e359daae8893f215491402d61d8f6f234f Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Thu, 6 Nov 2025 18:51:42 -0500 Subject: [PATCH 25/25] temp: comment fixups based on review comments --- .../apps/authoring/publishing/api.py | 59 +++++++++++-------- 1 file changed, 36 insertions(+), 23 deletions(-) diff --git a/openedx_learning/apps/authoring/publishing/api.py b/openedx_learning/apps/authoring/publishing/api.py index 5a6ab3b12..72eccd428 100644 --- a/openedx_learning/apps/authoring/publishing/api.py +++ b/openedx_learning/apps/authoring/publishing/api.py @@ -1063,39 +1063,42 @@ def hash_for_log_record( Note that this code is a little convoluted because we're working hard to minimize the number of database requests. All the data we really need could - be derived from just the record itself, but at a far higher cost. - - When we say "dependency", we mean when one or more PublishableEntities are - dependencies of a PublishableEntityVersion. A change in the Draft or - Published state of the PublishableEntity causes an implicit change to the - PublishableEntityVersion. The common case we have at the moment is when a - container type like a Unit has Components as dependencies, i.e. the - UnitVersion specifies Component Entities as dependencies. - - This means that the dependencies_hash_digest that summarizes the total state - of a PublishableEntity depends on: + be derived from querying various relations off the record that's passed in + as the first parameter, but at a far higher cost. + + The hash calculated here will be used for the dependencies_hash_digest + attribute of DraftChangeLogRecord and PublishLogRecord. The hash is intended + to calculate the currently "live" (current draft or published) state of all + dependencies (and transitive dependencies) of the PublishableEntityVersion + pointed to by DraftChangeLogRecord.new_version/PublishLogRecord.new_version. + + The common case we have at the moment is when a container type like a Unit + has unpinned child Components as dependencies. In the data model, those + dependency relationships are represented by the "dependencies" M:M relation + on PublishableEntityVersion. Since the Unit version's references to its + child Components are unpinned, the draft Unit is always pointing to the + latest draft versions of those Components and the published Unit is always + pointing to the latest published versions of those Components. + + This means that the total draft or published state of any PublishableEntity + depends on the combination of: 1. The definition of the current draft/published version of that entity. - 2. The current draft/published versions of all dependencies. + Example: Version 1 of a Unit would define that it had children [C1, C2]. + Version 2 of the same Unit might have children [C1, C2, C3]. + 2. The current draft/published versions of all dependencies. Example: What + are the current draft and published versions of C1, C2, and C3. - Which is why it makes sense to capture in a log record, since - PublishLogRecords or DraftChagneLogRecords are created whenever one of the + This is why it makes sense to capture in a log record, since + PublishLogRecords or DraftChangeLogRecords are created whenever one of the above two things changes. Here are the possible scenarios, including edge cases: - Soft-deletions - If the record.new_version is None, that means we've just soft-deleted - something (or published the soft-delete of something). We adopt the - convention that if something is soft-deleted, its dependencies_hash_digest - is reset to the default value of ''. - EntityVersions with no dependencies If record.new_version has no dependencies, dependencies_hash_digest is - likewise set to the default value of ''. This will be the most common - case. + set to the default value of ''. This will be the most common case. - EntityVersions with dependencies EntityVersions with dependencies If an EntityVersion has dependencies, then its draft/published state hash is based on the concatenation of, for each non-deleted dependency: @@ -1103,6 +1106,16 @@ def hash_for_log_record( (ii) the dependency's own draft/published state hash, recursively re- calculated if necessary. + Soft-deletions + If the record.new_version is None, that means we've just soft-deleted + something (or published the soft-delete of something). We adopt the + convention that if something is soft-deleted, its dependencies_hash_digest + is reset to the default value of ''. This is not strictly necessary for + the recursive hash calculation, but deleted entities will not have their + hash updated even as their non-deleted dependencies are updated underneath + them, so we set to '' to avoid falsely implying that the deleted entity's + dep hash is up to date. + EntityVersions with soft-deleted dependencies A soft-deleted dependency isn't counted (it's as if the dependency were removed). If all of an EntityVersion's dependencies are soft-deleted,