diff --git a/contentcuration/contentcuration/migrations/0168_alter_assessmentitem_type.py b/contentcuration/contentcuration/migrations/0168_alter_assessmentitem_type.py
new file mode 100644
index 0000000000..6a3c844d70
--- /dev/null
+++ b/contentcuration/contentcuration/migrations/0168_alter_assessmentitem_type.py
@@ -0,0 +1,30 @@
+# Generated by Django 3.2.24 on 2026-07-03 04:48
+from django.db import migrations
+from django.db import models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ("contentcuration", "0167_add_organization"),
+ ]
+
+ operations = [
+ migrations.AlterField(
+ model_name="assessmentitem",
+ name="type",
+ field=models.CharField(
+ choices=[
+ ("input_question", "Input Question"),
+ ("multiple_selection", "Multiple Selection"),
+ ("single_selection", "Single Selection"),
+ ("free_response", "Free Response"),
+ ("perseus_question", "Perseus Question"),
+ ("QTI", "QTI"),
+ ("true_false", "True/False"),
+ ],
+ default="multiple_selection",
+ max_length=50,
+ ),
+ ),
+ ]
diff --git a/contentcuration/contentcuration/tests/viewsets/test_assessmentitem.py b/contentcuration/contentcuration/tests/viewsets/test_assessmentitem.py
index 56f85c5799..925239fc76 100644
--- a/contentcuration/contentcuration/tests/viewsets/test_assessmentitem.py
+++ b/contentcuration/contentcuration/tests/viewsets/test_assessmentitem.py
@@ -8,6 +8,8 @@
from contentcuration import models
from contentcuration.tests import testdata
from contentcuration.tests.base import StudioAPITestCase
+from contentcuration.tests.utils.qti.test_validation import _item_xml
+from contentcuration.tests.utils.qti.test_validation import VALID_CHOICE_ITEM
from contentcuration.tests.viewsets.base import generate_create_event
from contentcuration.tests.viewsets.base import generate_delete_event
from contentcuration.tests.viewsets.base import generate_update_event
@@ -15,6 +17,25 @@
from contentcuration.viewsets.sync.constants import ASSESSMENTITEM
+QTI_ITEM_WITH_FILES = _item_xml(
+ "item_1",
+ "Sample Item",
+ ''
+ "choice_0"
+ "",
+ 'Select the correct answer. '
+ '
'
+ 'Option A'
+ ''
+ 'Option B'
+ "",
+)
+
+
class SyncTestCase(SyncTestMixin, StudioAPITestCase):
@property
def assessmentitem_metadata(self):
@@ -440,6 +461,160 @@ def test_update_assessmentitem_to_true_false(self):
"true_false",
)
+ def test_update_assessmentitem_to_qti(self):
+ assessmentitem = models.AssessmentItem.objects.create(
+ **self.assessmentitem_db_metadata
+ )
+ self.client.force_authenticate(user=self.user)
+ response = self.sync_changes(
+ [
+ generate_update_event(
+ [assessmentitem.contentnode_id, assessmentitem.assessment_id],
+ ASSESSMENTITEM,
+ {"type": "QTI", "raw_data": VALID_CHOICE_ITEM},
+ channel_id=self.channel.id,
+ )
+ ],
+ )
+ self.assertEqual(response.status_code, 200, response.content)
+ updated = models.AssessmentItem.objects.get(id=assessmentitem.id)
+ self.assertEqual(updated.type, "QTI")
+ self.assertEqual(updated.raw_data, VALID_CHOICE_ITEM)
+
+ def test_create_assessmentitem_preserves_raw_data_whitespace(self):
+ self.client.force_authenticate(user=self.user)
+ assessmentitem = self.assessmentitem_metadata
+ # Trailing whitespace only: a leading pad before the XML declaration
+ # would be invalid XML syntax and get rejected by QTI schema validation.
+ padded_raw_data = VALID_CHOICE_ITEM + "\n\n "
+ assessmentitem["type"] = "QTI"
+ assessmentitem["raw_data"] = padded_raw_data
+ response = self.sync_changes(
+ [
+ generate_create_event(
+ [assessmentitem["contentnode"], assessmentitem["assessment_id"]],
+ ASSESSMENTITEM,
+ assessmentitem,
+ channel_id=self.channel.id,
+ )
+ ],
+ )
+ self.assertEqual(response.status_code, 200, response.content)
+ created = models.AssessmentItem.objects.get(
+ assessment_id=assessmentitem["assessment_id"]
+ )
+ self.assertEqual(created.raw_data, padded_raw_data)
+
+ def test_create_qti_assessmentitem_bypasses_hints_answers_coercion(self):
+ self.client.force_authenticate(user=self.user)
+ assessmentitem = self.assessmentitem_metadata
+ assessmentitem["type"] = "QTI"
+ assessmentitem["raw_data"] = VALID_CHOICE_ITEM
+ assessmentitem["hints"] = "not a json array"
+ assessmentitem["answers"] = "also not a json array"
+ response = self.sync_changes(
+ [
+ generate_create_event(
+ [assessmentitem["contentnode"], assessmentitem["assessment_id"]],
+ ASSESSMENTITEM,
+ assessmentitem,
+ channel_id=self.channel.id,
+ )
+ ],
+ )
+ self.assertEqual(response.status_code, 200, response.content)
+ created = models.AssessmentItem.objects.get(
+ assessment_id=assessmentitem["assessment_id"]
+ )
+ self.assertEqual(created.hints, "not a json array")
+ self.assertEqual(created.answers, "also not a json array")
+
+ def _create_qti_referenced_files(self, checksums):
+ for checksum, ext in zip(checksums, ["png", "png", "pdf", "pdf"]):
+ image_file = testdata.fileobj_exercise_image()
+ image_file.checksum = checksum
+ image_file.file_format_id = ext
+ image_file.uploaded_by = self.user
+ image_file.save()
+
+ def test_create_qti_assessmentitem_extracts_files(self):
+ self.client.force_authenticate(user=self.user)
+ checksums = ["a" * 32, "c" * 32, "d" * 32, "b" * 32]
+ self._create_qti_referenced_files(checksums)
+
+ assessmentitem = self.assessmentitem_metadata
+ assessmentitem["type"] = "QTI"
+ assessmentitem["raw_data"] = QTI_ITEM_WITH_FILES
+ response = self.sync_changes(
+ [
+ generate_create_event(
+ [assessmentitem["contentnode"], assessmentitem["assessment_id"]],
+ ASSESSMENTITEM,
+ assessmentitem,
+ channel_id=self.channel.id,
+ )
+ ],
+ )
+ self.assertEqual(response.status_code, 200, response.content)
+ ai = models.AssessmentItem.objects.get(
+ assessment_id=assessmentitem["assessment_id"]
+ )
+ linked_checksums = set(ai.files.values_list("checksum", flat=True))
+ self.assertEqual(linked_checksums, set(checksums))
+
+ def test_qti_assessmentitem_full_round_trip(self):
+ self.client.force_authenticate(user=self.user)
+ checksums = ["a" * 32, "c" * 32, "d" * 32, "b" * 32]
+ self._create_qti_referenced_files(checksums)
+
+ assessmentitem = self.assessmentitem_metadata
+ assessmentitem["type"] = "QTI"
+ assessmentitem["raw_data"] = QTI_ITEM_WITH_FILES
+
+ create_response = self.sync_changes(
+ [
+ generate_create_event(
+ [assessmentitem["contentnode"], assessmentitem["assessment_id"]],
+ ASSESSMENTITEM,
+ assessmentitem,
+ channel_id=self.channel.id,
+ )
+ ],
+ )
+ self.assertEqual(create_response.status_code, 200, create_response.content)
+ ai = models.AssessmentItem.objects.get(
+ assessment_id=assessmentitem["assessment_id"]
+ )
+ self.assertEqual(ai.raw_data, QTI_ITEM_WITH_FILES)
+ self.assertEqual(ai.type, "QTI")
+ self.assertEqual(
+ set(ai.files.values_list("checksum", flat=True)), set(checksums)
+ )
+
+ # Drop the object/data reference (checksum "d"*32) and change the title.
+ updated_raw_data = QTI_ITEM_WITH_FILES.replace(
+ '',
+ "",
+ ).replace('title="Sample Item"', 'title="Updated Item"')
+ update_response = self.sync_changes(
+ [
+ generate_update_event(
+ [assessmentitem["contentnode"], assessmentitem["assessment_id"]],
+ ASSESSMENTITEM,
+ {"raw_data": updated_raw_data},
+ channel_id=self.channel.id,
+ )
+ ],
+ )
+ self.assertEqual(update_response.status_code, 200, update_response.content)
+ ai.refresh_from_db()
+ self.assertEqual(ai.raw_data, updated_raw_data)
+ self.assertEqual(
+ set(ai.files.values_list("checksum", flat=True)),
+ set(checksums) - {"d" * 32},
+ )
+
def test_attempt_update_missing_assessmentitem(self):
self.client.force_authenticate(user=self.user)
@@ -744,6 +919,54 @@ def test_invalid_hints_assessmentitem(self):
assessment_id=assessmentitem["assessment_id"]
)
+ def test_invalid_qti_assessmentitem(self):
+ self.client.force_authenticate(user=self.user)
+ assessmentitem = self.assessmentitem_metadata
+ assessmentitem["type"] = "QTI"
+ assessmentitem["raw_data"] = VALID_CHOICE_ITEM.replace(
+ 'orientation="vertical"', 'orientation="sideways"'
+ )
+ response = self.sync_changes(
+ [
+ generate_create_event(
+ [assessmentitem["contentnode"], assessmentitem["assessment_id"]],
+ ASSESSMENTITEM,
+ assessmentitem,
+ channel_id=self.channel.id,
+ ),
+ ],
+ )
+ self.assertEqual(response.json()["errors"][0]["table"], "assessmentitem")
+ self.assertTrue(response.json()["errors"][0]["errors"]["raw_data"])
+ self.assertEqual(len(response.json()["errors"]), 1)
+ with self.assertRaises(
+ models.AssessmentItem.DoesNotExist, msg="AssessmentItem was created"
+ ):
+ models.AssessmentItem.objects.get(
+ assessment_id=assessmentitem["assessment_id"]
+ )
+
+ def test_invalid_qti_xml_syntax_assessmentitem(self):
+ self.client.force_authenticate(user=self.user)
+ assessmentitem = self.assessmentitem_metadata
+ assessmentitem["type"] = "QTI"
+ assessmentitem["raw_data"] = ""
+ response = self.sync_changes(
+ [
+ generate_create_event(
+ [assessmentitem["contentnode"], assessmentitem["assessment_id"]],
+ ASSESSMENTITEM,
+ assessmentitem,
+ channel_id=self.channel.id,
+ ),
+ ],
+ )
+ self.assertTrue(response.json()["errors"][0]["errors"]["raw_data"])
+ with self.assertRaises(models.AssessmentItem.DoesNotExist):
+ models.AssessmentItem.objects.get(
+ assessment_id=assessmentitem["assessment_id"]
+ )
+
def test_valid_answers_assessmentitem(self):
self.client.force_authenticate(user=self.user)
assessmentitem = self.assessmentitem_metadata
diff --git a/contentcuration/contentcuration/utils/assessment/qti/validation.py b/contentcuration/contentcuration/utils/assessment/qti/validation.py
index 7cea89f276..b2f2243dbe 100644
--- a/contentcuration/contentcuration/utils/assessment/qti/validation.py
+++ b/contentcuration/contentcuration/utils/assessment/qti/validation.py
@@ -39,7 +39,7 @@ class QTIValidationResult:
errors: List[QTIValidationError] = field(default_factory=list)
-def _secure_parser() -> etree.XMLParser:
+def secure_parser() -> etree.XMLParser:
# resolve_entities=False + load_dtd=False blocks XXE/entity-expansion attacks;
# no_network=True blocks remote schemaLocation/entity fetches. QTI reaches this
# validator from ricecooker uploads and direct sync-API writes, not just the
@@ -54,12 +54,16 @@ def _secure_parser() -> etree.XMLParser:
)
+def parse_qti_xml(xml: bytes) -> etree._Element:
+ return etree.parse(BytesIO(xml), parser=secure_parser())
+
+
def validate_qti_item(xml: Union[str, bytes]) -> QTIValidationResult:
if isinstance(xml, str):
xml = xml.encode("utf-8")
try:
- doc = etree.parse(BytesIO(xml), parser=_secure_parser())
+ doc = parse_qti_xml(xml)
except etree.XMLSyntaxError as exc:
return QTIValidationResult(
is_valid=False,
diff --git a/contentcuration/contentcuration/viewsets/assessmentitem.py b/contentcuration/contentcuration/viewsets/assessmentitem.py
index 1877bfe6d7..4fa8614227 100644
--- a/contentcuration/contentcuration/viewsets/assessmentitem.py
+++ b/contentcuration/contentcuration/viewsets/assessmentitem.py
@@ -4,6 +4,7 @@
from django.db import transaction
from le_utils.constants import exercises
from le_utils.constants import format_presets
+from lxml import etree
from rest_framework import serializers
from rest_framework.permissions import IsAuthenticated
from rest_framework.serializers import ValidationError
@@ -12,6 +13,11 @@
from contentcuration.models import ContentNode
from contentcuration.models import File
from contentcuration.models import generate_object_storage_name
+from contentcuration.utils.assessment.qti.fields import (
+ entry_pattern as srcset_entry_pattern,
+)
+from contentcuration.utils.assessment.qti.validation import parse_qti_xml
+from contentcuration.utils.assessment.qti.validation import validate_qti_item
from contentcuration.viewsets.base import BulkCreateMixin
from contentcuration.viewsets.base import BulkListSerializer
from contentcuration.viewsets.base import BulkModelSerializer
@@ -58,6 +64,34 @@ def get_filenames_from_assessment(assessment_item):
)
+qti_reference_attributes = ("src", "href", "data")
+qti_checksum_filename_regex = re.compile(r"^[a-f0-9]{32}\.[0-9a-z]+$")
+
+
+def get_filenames_from_qti_item(assessment_item):
+ # QTI attribute values are bare . references, matching
+ # the TipTap editor's existing permanentSrc="." convention
+ # for round-tripped
tags -- not the markdown-only
+ # ${CONTENT_STORAGE}/. placeholder legacy types use.
+ checksums = set()
+ try:
+ doc = parse_qti_xml(assessment_item.raw_data.encode("utf-8"))
+ except etree.XMLSyntaxError:
+ return checksums
+ for element in doc.xpath("//*[@src or @href or @data or @srcset]"):
+ for attribute in qti_reference_attributes:
+ value = element.attrib.get(attribute)
+ if value and qti_checksum_filename_regex.match(value):
+ checksums.add(value)
+ srcset = element.attrib.get("srcset")
+ if srcset:
+ for entry in re.findall(srcset_entry_pattern, srcset):
+ token = entry[0].strip()
+ if token and qti_checksum_filename_regex.match(token):
+ checksums.add(token)
+ return checksums
+
+
class AssessmentListSerializer(BulkListSerializer):
def create(self, all_validated_data):
with transaction.atomic():
@@ -81,6 +115,7 @@ class AssessmentItemSerializer(BulkModelSerializer):
# to set it.
hints = serializers.CharField(required=False)
answers = serializers.CharField(required=False)
+ raw_data = serializers.CharField(required=False, trim_whitespace=False)
assessment_id = UUIDRegexField()
contentnode = UserFilteredPrimaryKeyRelatedField(
queryset=ContentNode.objects.all(), required=False
@@ -105,7 +140,33 @@ class Meta:
# Use the contentnode and assessment_id as the lookup field for updates
update_lookup_field = ("contentnode", "assessment_id")
+ @property
+ def _item_type(self):
+ return self.initial_data.get(
+ "type", self.instance.type if self.instance else None
+ )
+
+ def validate(self, data):
+ # Raising here (during is_valid(), before .save()) keeps this error
+ # attributed to just this item in the sync error channel. Raising from
+ # create()/update()/set_files() instead would be caught by the broad
+ # except Exception in create_from_changes/update_from_changes and
+ # reported as "Internal server error" for the whole batch.
+ data = super(AssessmentItemSerializer, self).validate(data)
+ if self._item_type == exercises.QTI:
+ raw_data = data.get(
+ "raw_data", self.instance.raw_data if self.instance else ""
+ )
+ result = validate_qti_item(raw_data)
+ if not result.is_valid:
+ raise ValidationError(
+ {"raw_data": [error.message for error in result.errors]}
+ )
+ return data
+
def validate_answers(self, value):
+ if self._item_type == exercises.QTI:
+ return value
answers = json.loads(value)
for answer in answers:
if not type(answer) is dict:
@@ -115,6 +176,8 @@ def validate_answers(self, value):
return value
def validate_hints(self, value):
+ if self._item_type == exercises.QTI:
+ return value
hints = json.loads(value)
for hint in hints:
if not type(hint) is dict:
@@ -128,25 +191,40 @@ def set_files(self, all_objects, all_validated_data=None): # noqa C901
files_to_update = {}
current_files_by_aitem = {}
- # Create a set of assessment item ids that have had markdown fields modified.
if all_validated_data:
# If this is an update operation, check the validated data for which items
- # have had these fields modified.
- md_fields_modified = {
+ # have had these fields modified. raw_data only carries file references for
+ # QTI items, so a raw_data-only change only counts as "modified" for those.
+ item_types_by_lookup = {
+ self.id_value_lookup(ai): ai.type for ai in all_objects
+ }
+ editable_text_fields_modified = {
self.id_value_lookup(ai)
for ai in all_validated_data
- if "question" in ai or "hints" in ai or "answers" in ai
+ if "question" in ai
+ or "hints" in ai
+ or "answers" in ai
+ or (
+ "raw_data" in ai
+ and item_types_by_lookup.get(self.id_value_lookup(ai))
+ == exercises.QTI
+ )
}
else:
# If this is a create operation, just check if these fields are not null.
- md_fields_modified = {
+ editable_text_fields_modified = {
self.id_value_lookup(ai)
for ai in all_objects
- if ai.question or ai.hints or ai.answers
+ if ai.question
+ or ai.hints
+ or ai.answers
+ or (ai.type == exercises.QTI and ai.raw_data)
}
all_objects = [
- ai for ai in all_objects if self.id_value_lookup(ai) in md_fields_modified
+ ai
+ for ai in all_objects
+ if self.id_value_lookup(ai) in editable_text_fields_modified
]
for file in File.objects.filter(assessment_item__in=all_objects):
@@ -156,7 +234,11 @@ def set_files(self, all_objects, all_validated_data=None): # noqa C901
for aitem in all_objects:
current_files = current_files_by_aitem.get(aitem.id, [])
- filenames = get_filenames_from_assessment(aitem)
+ filenames = (
+ get_filenames_from_qti_item(aitem)
+ if aitem.type == exercises.QTI
+ else get_filenames_from_assessment(aitem)
+ )
set_checksums = {filename.split(".")[0] for filename in filenames}
current_checksums = {f.checksum for f in current_files}
diff --git a/requirements.in b/requirements.in
index 17b03aa0a4..398c01aae7 100644
--- a/requirements.in
+++ b/requirements.in
@@ -5,7 +5,7 @@ djangorestframework==3.15.1
psycopg2-binary==2.9.11
django-js-reverse==0.10.2
django-registration==3.4
-le-utils==0.2.17
+le-utils==0.2.18
gunicorn==26.0.0
django-postmark==0.1.6
jsonfield==3.1.0
diff --git a/requirements.txt b/requirements.txt
index 1f6e4367f9..b1df711e1a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -177,7 +177,7 @@ langcodes==3.5.1
# via -r requirements.in
latex2mathml==3.78.1
# via -r requirements.in
-le-utils==0.2.17
+le-utils==0.2.18
# via -r requirements.in
lxml==6.1.1
# via -r requirements.in