From d6cb33ddd884729c156755713be231375d3339f6 Mon Sep 17 00:00:00 2001 From: Anton Melser Date: Mon, 27 Oct 2025 10:57:28 +0800 Subject: [PATCH 1/3] feat: Add pg18+ support to MultiCollationCharField Signed-off-by: Anton Melser --- .../0009_create_case_insensitive_collation.py | 35 +++++++++++++++++++ openedx_learning/lib/collations.py | 10 +++--- openedx_learning/lib/fields.py | 17 ++++++++- 3 files changed, 57 insertions(+), 5 deletions(-) create mode 100644 openedx_learning/apps/authoring/publishing/migrations/0009_create_case_insensitive_collation.py diff --git a/openedx_learning/apps/authoring/publishing/migrations/0009_create_case_insensitive_collation.py b/openedx_learning/apps/authoring/publishing/migrations/0009_create_case_insensitive_collation.py new file mode 100644 index 00000000..c6646c77 --- /dev/null +++ b/openedx_learning/apps/authoring/publishing/migrations/0009_create_case_insensitive_collation.py @@ -0,0 +1,35 @@ +# Generated manually for PostgreSQL collation support +from django.contrib.postgres.operations import CreateCollation +from django.db import migrations + + +class Migration(migrations.Migration): + dependencies = [ + ("oel_publishing", "0008_alter_draftchangelogrecord_options_and_more"), + ] + + operations = [ + # Create a custom case-insensitive collation for PostgreSQL. + # This collation is used by case_insensitive_char_field() to provide + # case-insensitive comparisons and unique constraints on PostgreSQL, + # matching the behavior of MySQL's utf8mb4_unicode_ci collation. + # + # Note: CreateCollation is a PostgreSQL-specific operation from + # django.contrib.postgres.operations. Django automatically skips + # PostgreSQL-specific operations when running migrations on other + # database backends (MySQL, SQLite, etc.). The operation checks + # schema_editor.connection.vendor and only executes when vendor=='postgresql'. + # + # Requirements: + # - PostgreSQL 12+ (for non-deterministic collations) + # - PostgreSQL compiled with ICU support (standard in most distributions) + # + # This works regardless of the database's locale_provider setting + # (whether it's 'libc', 'icu', or 'c'). + CreateCollation( + "case_insensitive", + provider="icu", + locale="en-US", + deterministic=False, + ), + ] diff --git a/openedx_learning/lib/collations.py b/openedx_learning/lib/collations.py index 5d63b8a6..0b79f1a3 100644 --- a/openedx_learning/lib/collations.py +++ b/openedx_learning/lib/collations.py @@ -2,8 +2,9 @@ This module has collation-related code to allow us to attach collation settings to specific fields on a per-database-vendor basis. This used by the ``fields`` module in order to specify field types have have normalized behavior between -SQLite and MySQL (see fields.py for more details). +SQLite, MySQL, and PostgreSQL (see fields.py for more details). """ + from django.db import models @@ -23,8 +24,9 @@ def __init__(self, *args, db_collations=None, db_collation=None, **kwargs): # p collations, like:: { - 'msyql': 'utf8mb4_bin', - 'sqlite': 'BINARY' + 'mysql': 'utf8mb4_bin', + 'sqlite': 'BINARY', + 'postgresql': 'C' } It is an error to pass in a CharField-style ``db_collation``. I @@ -42,7 +44,7 @@ def db_parameters(self, connection): We examine this field's ``db_collations`` attribute and return the collation that maps to ``connection.vendor``. This will typically be - 'mysql' or 'sqlite'. + 'mysql', 'sqlite', or 'postgresql'. """ db_params = models.Field.db_parameters(self, connection) diff --git a/openedx_learning/lib/fields.py b/openedx_learning/lib/fields.py index c4167899..cc12f606 100644 --- a/openedx_learning/lib/fields.py +++ b/openedx_learning/lib/fields.py @@ -8,6 +8,7 @@ We have helpers to make case sensitivity consistent across backends. MySQL is case-insensitive by default, SQLite and Postgres are case-sensitive. """ + from __future__ import annotations import hashlib @@ -58,6 +59,14 @@ def case_insensitive_char_field(**kwargs) -> MultiCollationCharField: # utf8mb4_0900_ai_ci based on Unicode 9, while MariaDB has # uca1400_ai_ci based on Unicode 14. "mysql": "utf8mb4_unicode_ci", + # PostgreSQL: Using custom case-insensitive collation. + # This collation is created via a Django migration using CreateCollation. + # It uses the ICU provider with locale 'en-US' and deterministic=False + # to provide case-insensitive comparisons. This works with any PostgreSQL + # setup (regardless of the database's locale_provider setting) as long as + # PostgreSQL was compiled with ICU support (which is standard). + # This gives us behavior similar to MySQL's utf8mb4_unicode_ci. + "postgresql": "case_insensitive", }, } # Override our defaults with whatever is passed in. @@ -84,6 +93,12 @@ def case_sensitive_char_field(**kwargs) -> MultiCollationCharField: "db_collations": { "sqlite": "BINARY", "mysql": "utf8mb4_bin", + # PostgreSQL: Using "C" collation for case-sensitive, byte-order comparisons. + # This is the fastest collation and provides strict case-sensitive matching + # similar to MySQL's utf8mb4_bin and SQLite's BINARY. + # The "C" collation is always available in PostgreSQL and doesn't depend on + # locale settings. + "postgresql": "C", }, } # Override our defaults with whatever is passed in. @@ -190,7 +205,7 @@ class MultiCollationCharField(MultiCollationMixin, models.CharField): Django's CharField already supports specifying the database collation, but that only works with a single value. So there would be no way to say, "Use - utf8mb4_bin for MySQL, and BINARY if we're running SQLite." This is a + utf8mb4_bin for MySQL, BINARY for SQLite, and C for PostgreSQL." This is a problem because we run tests in SQLite (and may potentially run more later). It's also a problem if we ever want to support other database backends, like PostgreSQL. Even MariaDB is starting to diverge from MySQL in terms of what From 18d7a6fcd93ffaf09eee0ea54b6176b461ac52e2 Mon Sep 17 00:00:00 2001 From: Anton Melser Date: Mon, 27 Oct 2025 11:59:18 +0800 Subject: [PATCH 2/3] feat: update migrations to support pg18+ Also refactor common calls for DRY and simplifying any later standard collation updates Signed-off-by: Anton Melser --- .../collections/migrations/0001_initial.py | 4 +- ...ion_name_collection_created_by_and_more.py | 4 +- .../migrations/0004_collection_key.py | 6 +- .../components/migrations/0001_initial.py | 8 +- .../0002_alter_componentversioncontent_key.py | 2 +- .../contents/migrations/0001_initial.py | 8 +- .../publishing/migrations/0001_initial.py | 12 +-- ...0002_alter_learningpackage_key_and_more.py | 4 +- openedx_learning/lib/fields.py | 88 ++++++++++++------- .../core/tagging/migrations/0001_initial.py | 42 ++------- .../core/tagging/migrations/0001_squashed.py | 12 +-- .../migrations/0002_auto_20230718_2026.py | 3 +- .../migrations/0004_auto_20230723_2001.py | 3 +- .../0009_alter_objecttag_object_id.py | 2 +- .../migrations/0016_object_tag_export_id.py | 2 +- 15 files changed, 94 insertions(+), 106 deletions(-) diff --git a/openedx_learning/apps/authoring/collections/migrations/0001_initial.py b/openedx_learning/apps/authoring/collections/migrations/0001_initial.py index 0fdee273..159042dc 100644 --- a/openedx_learning/apps/authoring/collections/migrations/0001_initial.py +++ b/openedx_learning/apps/authoring/collections/migrations/0001_initial.py @@ -19,8 +19,8 @@ class Migration(migrations.Migration): name='Collection', fields=[ ('id', models.AutoField(primary_key=True, serialize=False)), - ('name', openedx_learning.lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_unicode_ci', 'sqlite': 'NOCASE'}, db_index=True, help_text='The name of the collection.', max_length=255)), - ('description', openedx_learning.lib.fields.MultiCollationCharField(blank=True, db_collations={'mysql': 'utf8mb4_unicode_ci', 'sqlite': 'NOCASE'}, help_text='Provides extra information for the user about this collection.', max_length=10000)), + ('name', openedx_learning.lib.fields.case_insensitive_char_field(db_index=True, help_text='The name of the collection.', max_length=255)), + ('description', openedx_learning.lib.fields.case_insensitive_char_field(blank=True, help_text='Provides extra information for the user about this collection.', max_length=10000)), ('enabled', models.BooleanField(default=True, help_text='Whether the collection is enabled or not.')), ('created', models.DateTimeField(auto_now_add=True)), ('modified', models.DateTimeField(auto_now=True)), diff --git a/openedx_learning/apps/authoring/collections/migrations/0002_remove_collection_name_collection_created_by_and_more.py b/openedx_learning/apps/authoring/collections/migrations/0002_remove_collection_name_collection_created_by_and_more.py index eee95e50..03dcb59d 100644 --- a/openedx_learning/apps/authoring/collections/migrations/0002_remove_collection_name_collection_created_by_and_more.py +++ b/openedx_learning/apps/authoring/collections/migrations/0002_remove_collection_name_collection_created_by_and_more.py @@ -28,7 +28,7 @@ class Migration(migrations.Migration): migrations.AddField( model_name='collection', name='title', - field=openedx_learning.lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_unicode_ci', 'sqlite': 'NOCASE'}, default='Collection', help_text='The title of the collection.', max_length=500), + field=openedx_learning.lib.fields.case_insensitive_char_field(default='Collection', help_text='The title of the collection.', max_length=500), preserve_default=False, ), migrations.AlterField( @@ -39,7 +39,7 @@ class Migration(migrations.Migration): migrations.AlterField( model_name='collection', name='description', - field=openedx_learning.lib.fields.MultiCollationTextField(blank=True, db_collations={'mysql': 'utf8mb4_unicode_ci', 'sqlite': 'NOCASE'}, default='', help_text='Provides extra information for the user about this collection.', max_length=10000), + field=openedx_learning.lib.fields.case_insensitive_text_field(blank=True, default='', help_text='Provides extra information for the user about this collection.', max_length=10000), ), migrations.AlterField( model_name='collection', diff --git a/openedx_learning/apps/authoring/collections/migrations/0004_collection_key.py b/openedx_learning/apps/authoring/collections/migrations/0004_collection_key.py index 843419ee..12aabf77 100644 --- a/openedx_learning/apps/authoring/collections/migrations/0004_collection_key.py +++ b/openedx_learning/apps/authoring/collections/migrations/0004_collection_key.py @@ -33,8 +33,7 @@ class Migration(migrations.Migration): migrations.AddField( model_name='collection', name='key', - field=openedx_learning.lib.fields.MultiCollationCharField( - db_collations={'mysql': 'utf8mb4_bin', 'sqlite': 'BINARY'}, + field=openedx_learning.lib.fields.case_sensitive_char_field( db_column='_key', max_length=500, null=True, blank=True), preserve_default=False, ), @@ -44,8 +43,7 @@ class Migration(migrations.Migration): migrations.AlterField( model_name='collection', name='key', - field=openedx_learning.lib.fields.MultiCollationCharField( - db_collations={'mysql': 'utf8mb4_bin', 'sqlite': 'BINARY'}, + field=openedx_learning.lib.fields.case_sensitive_char_field( db_column='_key', max_length=500, null=False, blank=False), preserve_default=False, ), diff --git a/openedx_learning/apps/authoring/components/migrations/0001_initial.py b/openedx_learning/apps/authoring/components/migrations/0001_initial.py index ece93796..76482839 100644 --- a/openedx_learning/apps/authoring/components/migrations/0001_initial.py +++ b/openedx_learning/apps/authoring/components/migrations/0001_initial.py @@ -22,7 +22,7 @@ class Migration(migrations.Migration): name='Component', fields=[ ('publishable_entity', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, primary_key=True, serialize=False, to='oel_publishing.publishableentity')), - ('local_key', openedx_learning.lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_bin', 'sqlite': 'BINARY'}, max_length=500)), + ('local_key', openedx_learning.lib.fields.case_sensitive_char_field(max_length=500)), ], options={ 'verbose_name': 'Component', @@ -33,8 +33,8 @@ class Migration(migrations.Migration): name='ComponentType', fields=[ ('id', models.AutoField(primary_key=True, serialize=False)), - ('namespace', openedx_learning.lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_bin', 'sqlite': 'BINARY'}, max_length=100)), - ('name', openedx_learning.lib.fields.MultiCollationCharField(blank=True, db_collations={'mysql': 'utf8mb4_bin', 'sqlite': 'BINARY'}, max_length=100)), + ('namespace', openedx_learning.lib.fields.case_sensitive_char_field(max_length=100)), + ('name', openedx_learning.lib.fields.case_sensitive_char_field(blank=True, max_length=100)), ], ), migrations.CreateModel( @@ -53,7 +53,7 @@ class Migration(migrations.Migration): fields=[ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), ('uuid', models.UUIDField(default=uuid.uuid4, editable=False, unique=True, verbose_name='UUID')), - ('key', openedx_learning.lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_bin', 'sqlite': 'BINARY'}, max_length=500)), + ('key', openedx_learning.lib.fields.case_sensitive_char_field(max_length=500)), ('learner_downloadable', models.BooleanField(default=False)), ('component_version', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='oel_components.componentversion')), ('content', models.ForeignKey(on_delete=django.db.models.deletion.RESTRICT, to='oel_contents.content')), diff --git a/openedx_learning/apps/authoring/components/migrations/0002_alter_componentversioncontent_key.py b/openedx_learning/apps/authoring/components/migrations/0002_alter_componentversioncontent_key.py index ee12970d..0ffa1aba 100644 --- a/openedx_learning/apps/authoring/components/migrations/0002_alter_componentversioncontent_key.py +++ b/openedx_learning/apps/authoring/components/migrations/0002_alter_componentversioncontent_key.py @@ -15,6 +15,6 @@ class Migration(migrations.Migration): migrations.AlterField( model_name='componentversioncontent', name='key', - field=openedx_learning.lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_bin', 'sqlite': 'BINARY'}, db_column='_key', max_length=500), + field=openedx_learning.lib.fields.case_sensitive_char_field(db_column='_key', max_length=500), ), ] diff --git a/openedx_learning/apps/authoring/contents/migrations/0001_initial.py b/openedx_learning/apps/authoring/contents/migrations/0001_initial.py index 1abf65ad..9b1037d1 100644 --- a/openedx_learning/apps/authoring/contents/migrations/0001_initial.py +++ b/openedx_learning/apps/authoring/contents/migrations/0001_initial.py @@ -24,7 +24,7 @@ class Migration(migrations.Migration): ('size', models.PositiveBigIntegerField(validators=[django.core.validators.MaxValueValidator(50000000)])), ('hash_digest', models.CharField(editable=False, max_length=40)), ('has_file', models.BooleanField()), - ('text', openedx_learning.lib.fields.MultiCollationTextField(blank=True, db_collations={'mysql': 'utf8mb4_unicode_ci', 'sqlite': 'NOCASE'}, max_length=50000, null=True)), + ('text', openedx_learning.lib.fields.case_insensitive_text_field(blank=True, max_length=50000, null=True)), ('created', models.DateTimeField(validators=[openedx_learning.lib.validators.validate_utc_datetime])), ], options={ @@ -36,9 +36,9 @@ class Migration(migrations.Migration): name='MediaType', fields=[ ('id', models.AutoField(primary_key=True, serialize=False)), - ('type', openedx_learning.lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_unicode_ci', 'sqlite': 'NOCASE'}, max_length=127)), - ('sub_type', openedx_learning.lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_unicode_ci', 'sqlite': 'NOCASE'}, max_length=127)), - ('suffix', openedx_learning.lib.fields.MultiCollationCharField(blank=True, db_collations={'mysql': 'utf8mb4_unicode_ci', 'sqlite': 'NOCASE'}, max_length=127)), + ('type', openedx_learning.lib.fields.case_insensitive_char_field(max_length=127)), + ('sub_type', openedx_learning.lib.fields.case_insensitive_char_field(max_length=127)), + ('suffix', openedx_learning.lib.fields.case_insensitive_char_field(blank=True, max_length=127)), ], ), migrations.AddConstraint( diff --git a/openedx_learning/apps/authoring/publishing/migrations/0001_initial.py b/openedx_learning/apps/authoring/publishing/migrations/0001_initial.py index 6ffb8b85..3f230b5b 100644 --- a/openedx_learning/apps/authoring/publishing/migrations/0001_initial.py +++ b/openedx_learning/apps/authoring/publishing/migrations/0001_initial.py @@ -25,9 +25,9 @@ class Migration(migrations.Migration): fields=[ ('id', models.AutoField(primary_key=True, serialize=False)), ('uuid', models.UUIDField(default=uuid.uuid4, editable=False, unique=True, verbose_name='UUID')), - ('key', openedx_learning.lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_bin', 'sqlite': 'BINARY'}, max_length=500)), - ('title', openedx_learning.lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_unicode_ci', 'sqlite': 'NOCASE'}, max_length=500)), - ('description', openedx_learning.lib.fields.MultiCollationTextField(blank=True, db_collations={'mysql': 'utf8mb4_unicode_ci', 'sqlite': 'NOCASE'}, default='', max_length=10000)), + ('key', openedx_learning.lib.fields.case_sensitive_char_field(max_length=500)), + ('title', openedx_learning.lib.fields.case_insensitive_char_field(max_length=500)), + ('description', openedx_learning.lib.fields.case_insensitive_text_field(blank=True, default='', max_length=10000)), ('created', models.DateTimeField(validators=[openedx_learning.lib.validators.validate_utc_datetime])), ('updated', models.DateTimeField(validators=[openedx_learning.lib.validators.validate_utc_datetime])), ], @@ -41,7 +41,7 @@ class Migration(migrations.Migration): fields=[ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), ('uuid', models.UUIDField(default=uuid.uuid4, editable=False, unique=True, verbose_name='UUID')), - ('key', openedx_learning.lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_bin', 'sqlite': 'BINARY'}, max_length=500)), + ('key', openedx_learning.lib.fields.case_sensitive_char_field(max_length=500)), ('created', models.DateTimeField(validators=[openedx_learning.lib.validators.validate_utc_datetime])), ('created_by', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to=settings.AUTH_USER_MODEL)), ('learning_package', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='publishable_entities', to='oel_publishing.learningpackage')), @@ -56,7 +56,7 @@ class Migration(migrations.Migration): fields=[ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), ('uuid', models.UUIDField(default=uuid.uuid4, editable=False, unique=True, verbose_name='UUID')), - ('title', openedx_learning.lib.fields.MultiCollationCharField(blank=True, db_collations={'mysql': 'utf8mb4_unicode_ci', 'sqlite': 'NOCASE'}, default='', max_length=500)), + ('title', openedx_learning.lib.fields.case_insensitive_char_field(blank=True, default='', max_length=500)), ('version_num', models.PositiveIntegerField(validators=[django.core.validators.MinValueValidator(1)])), ('created', models.DateTimeField(validators=[openedx_learning.lib.validators.validate_utc_datetime])), ('created_by', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to=settings.AUTH_USER_MODEL)), @@ -72,7 +72,7 @@ class Migration(migrations.Migration): fields=[ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), ('uuid', models.UUIDField(default=uuid.uuid4, editable=False, unique=True, verbose_name='UUID')), - ('message', openedx_learning.lib.fields.MultiCollationCharField(blank=True, db_collations={'mysql': 'utf8mb4_unicode_ci', 'sqlite': 'NOCASE'}, default='', max_length=500)), + ('message', openedx_learning.lib.fields.case_insensitive_char_field(blank=True, default='', max_length=500)), ('published_at', models.DateTimeField(validators=[openedx_learning.lib.validators.validate_utc_datetime])), ('learning_package', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='oel_publishing.learningpackage')), ('published_by', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to=settings.AUTH_USER_MODEL)), diff --git a/openedx_learning/apps/authoring/publishing/migrations/0002_alter_learningpackage_key_and_more.py b/openedx_learning/apps/authoring/publishing/migrations/0002_alter_learningpackage_key_and_more.py index 0a4c3b48..a6705cd3 100644 --- a/openedx_learning/apps/authoring/publishing/migrations/0002_alter_learningpackage_key_and_more.py +++ b/openedx_learning/apps/authoring/publishing/migrations/0002_alter_learningpackage_key_and_more.py @@ -15,11 +15,11 @@ class Migration(migrations.Migration): migrations.AlterField( model_name='learningpackage', name='key', - field=openedx_learning.lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_bin', 'sqlite': 'BINARY'}, db_column='_key', max_length=500), + field=openedx_learning.lib.fields.case_sensitive_char_field(db_column='_key', max_length=500), ), migrations.AlterField( model_name='publishableentity', name='key', - field=openedx_learning.lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_bin', 'sqlite': 'BINARY'}, db_column='_key', max_length=500), + field=openedx_learning.lib.fields.case_sensitive_char_field(db_column='_key', max_length=500), ), ] diff --git a/openedx_learning/lib/fields.py b/openedx_learning/lib/fields.py index cc12f606..f9ea7588 100644 --- a/openedx_learning/lib/fields.py +++ b/openedx_learning/lib/fields.py @@ -37,20 +37,8 @@ def create_hash_digest(data_bytes: bytes, num_bytes=20) -> str: return hashlib.blake2b(data_bytes, digest_size=num_bytes).hexdigest() -def case_insensitive_char_field(**kwargs) -> MultiCollationCharField: - """ - Return a case-insensitive ``MultiCollationCharField``. - - This means that entries will sort in a case-insensitive manner, and that - unique indexes will be case insensitive, e.g. you would not be able to - insert "abc" and "ABC" into the same table field if you put a unique index - on this field. - - You may override any argument that you would normally pass into - ``MultiCollationCharField`` (which is itself a subclass of ``CharField``). - """ - # Set our default arguments - final_kwargs = { +def default_case_insensitive_collations_args(**kwargs): + return { "null": False, "db_collations": { "sqlite": "NOCASE", @@ -68,11 +56,40 @@ def case_insensitive_char_field(**kwargs) -> MultiCollationCharField: # This gives us behavior similar to MySQL's utf8mb4_unicode_ci. "postgresql": "case_insensitive", }, + **kwargs, } - # Override our defaults with whatever is passed in. - final_kwargs.update(kwargs) - return MultiCollationCharField(**final_kwargs) + +def default_case_sensitive_collations_args(**kwargs): + return { + "null": False, + "db_collations": { + "sqlite": "BINARY", + "mysql": "utf8mb4_bin", + # PostgreSQL: Using "C" collation for case-sensitive, byte-order comparisons. + # This is the fastest collation and provides strict case-sensitive matching + # similar to MySQL's utf8mb4_bin and SQLite's BINARY. + # The "C" collation is always available in PostgreSQL and doesn't depend on + # locale settings. + "postgresql": "C", + }, + **kwargs, + } + + +def case_insensitive_char_field(**kwargs) -> MultiCollationCharField: + """ + Return a case-insensitive ``MultiCollationCharField``. + + This means that entries will sort in a case-insensitive manner, and that + unique indexes will be case insensitive, e.g. you would not be able to + insert "abc" and "ABC" into the same table field if you put a unique index + on this field. + + You may override any argument that you would normally pass into + ``MultiCollationCharField`` (which is itself a subclass of ``CharField``). + """ + return MultiCollationCharField(**default_case_insensitive_collations_args(**kwargs)) def case_sensitive_char_field(**kwargs) -> MultiCollationCharField: @@ -87,24 +104,27 @@ def case_sensitive_char_field(**kwargs) -> MultiCollationCharField: You may override any argument that you would normally pass into ``MultiCollationCharField`` (which is itself a subclass of ``CharField``). """ - # Set our default arguments - final_kwargs = { - "null": False, - "db_collations": { - "sqlite": "BINARY", - "mysql": "utf8mb4_bin", - # PostgreSQL: Using "C" collation for case-sensitive, byte-order comparisons. - # This is the fastest collation and provides strict case-sensitive matching - # similar to MySQL's utf8mb4_bin and SQLite's BINARY. - # The "C" collation is always available in PostgreSQL and doesn't depend on - # locale settings. - "postgresql": "C", - }, - } - # Override our defaults with whatever is passed in. - final_kwargs.update(kwargs) + return MultiCollationCharField(**default_case_sensitive_collations_args(**kwargs)) - return MultiCollationCharField(**final_kwargs) + +def case_insensitive_text_field(**kwargs) -> MultiCollationTextField: + """ + Return a case-insensitive ``MultiCollationTextField``. + + You may override any argument that you would normally pass into + ``MultiCollationTextField`` (which is itself a subclass of ``TextField``). + """ + return MultiCollationTextField(**default_case_insensitive_collations_args(**kwargs)) + + +def case_sensitive_text_field(**kwargs) -> MultiCollationTextField: + """ + Return a case-sensitive ``MultiCollationTextField``. + + You may override any argument that you would normally pass into + ``MultiCollationTextField`` (which is itself a subclass of ``TextField``). + """ + return MultiCollationTextField(**default_case_sensitive_collations_args(**kwargs)) def immutable_uuid_field() -> models.UUIDField: diff --git a/openedx_tagging/core/tagging/migrations/0001_initial.py b/openedx_tagging/core/tagging/migrations/0001_initial.py index 1599f876..4845eef6 100644 --- a/openedx_tagging/core/tagging/migrations/0001_initial.py +++ b/openedx_tagging/core/tagging/migrations/0001_initial.py @@ -18,11 +18,7 @@ class Migration(migrations.Migration): ("id", models.BigAutoField(primary_key=True, serialize=False)), ( "name", - openedx_learning.lib.fields.MultiCollationCharField( - db_collations={ - "mysql": "utf8mb4_unicode_ci", - "sqlite": "NOCASE", - }, + openedx_learning.lib.fields.case_insensitive_char_field( db_index=True, help_text="User-facing label used when applying tags from this taxonomy to Open edX objects.", max_length=255, @@ -75,23 +71,15 @@ class Migration(migrations.Migration): ("id", models.BigAutoField(primary_key=True, serialize=False)), ( "value", - openedx_learning.lib.fields.MultiCollationCharField( - db_collations={ - "mysql": "utf8mb4_unicode_ci", - "sqlite": "NOCASE", - }, + openedx_learning.lib.fields.case_insensitive_char_field( help_text="Content of a given tag, occupying the 'value' part of the key:value pair.", max_length=500, ), ), ( "external_id", - openedx_learning.lib.fields.MultiCollationCharField( + openedx_learning.lib.fields.case_insensitive_char_field( blank=True, - db_collations={ - "mysql": "utf8mb4_unicode_ci", - "sqlite": "NOCASE", - }, help_text="Used to link an Open edX Tag with a tag in an externally-defined taxonomy.", max_length=255, null=True, @@ -126,44 +114,28 @@ class Migration(migrations.Migration): ("id", models.BigAutoField(primary_key=True, serialize=False)), ( "object_id", - openedx_learning.lib.fields.MultiCollationCharField( - db_collations={ - "mysql": "utf8mb4_unicode_ci", - "sqlite": "NOCASE", - }, + openedx_learning.lib.fields.case_insensitive_char_field( help_text="Identifier for the object being tagged", max_length=255, ), ), ( "object_type", - openedx_learning.lib.fields.MultiCollationCharField( - db_collations={ - "mysql": "utf8mb4_unicode_ci", - "sqlite": "NOCASE", - }, + openedx_learning.lib.fields.case_insensitive_char_field( help_text="Type of object being tagged", max_length=255, ), ), ( "_name", - openedx_learning.lib.fields.MultiCollationCharField( - db_collations={ - "mysql": "utf8mb4_unicode_ci", - "sqlite": "NOCASE", - }, + openedx_learning.lib.fields.case_insensitive_char_field( help_text="User-facing label used for this tag, stored in case taxonomy is (or becomes) null. If the taxonomy field is set, then taxonomy.name takes precedence over this field.", max_length=255, ), ), ( "_value", - openedx_learning.lib.fields.MultiCollationCharField( - db_collations={ - "mysql": "utf8mb4_unicode_ci", - "sqlite": "NOCASE", - }, + openedx_learning.lib.fields.case_insensitive_char_field( help_text="User-facing value used for this tag, stored in case tag is null, e.g if taxonomy is free text, or if it becomes null (e.g. if the Tag is deleted). If the tag field is set, then tag.value takes precedence over this field.", max_length=500, ), diff --git a/openedx_tagging/core/tagging/migrations/0001_squashed.py b/openedx_tagging/core/tagging/migrations/0001_squashed.py index e4ee96cf..7644fc34 100644 --- a/openedx_tagging/core/tagging/migrations/0001_squashed.py +++ b/openedx_tagging/core/tagging/migrations/0001_squashed.py @@ -34,7 +34,7 @@ class Migration(migrations.Migration): name='Taxonomy', fields=[ ('id', models.BigAutoField(primary_key=True, serialize=False)), - ('name', openedx_learning.lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_unicode_ci', 'sqlite': 'NOCASE'}, db_index=True, help_text='User-facing label used when applying tags from this taxonomy to Open edX objects.', max_length=255)), + ('name', openedx_learning.lib.fields.case_insensitive_char_field(db_index=True, help_text='User-facing label used when applying tags from this taxonomy to Open edX objects.', max_length=255)), ('description', openedx_learning.lib.fields.MultiCollationTextField(blank=True, help_text='Provides extra information for the user when applying tags from this taxonomy to an object.')), ('enabled', models.BooleanField(default=True, help_text='Only enabled taxonomies will be shown to authors.')), ('allow_multiple', models.BooleanField(default=True, help_text='Indicates that multiple tags from this taxonomy may be added to an object.')), @@ -60,8 +60,8 @@ class Migration(migrations.Migration): name='Tag', fields=[ ('id', models.BigAutoField(primary_key=True, serialize=False)), - ('value', openedx_learning.lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_unicode_ci', 'sqlite': 'NOCASE'}, help_text="Content of a given tag, occupying the 'value' part of the key:value pair.", max_length=500)), - ('external_id', openedx_learning.lib.fields.MultiCollationCharField(blank=True, db_collations={'mysql': 'utf8mb4_unicode_ci', 'sqlite': 'NOCASE'}, help_text='Used to link an Open edX Tag with a tag in an externally-defined taxonomy.', max_length=255, null=True)), + ('value', openedx_learning.lib.fields.case_insensitive_char_field(help_text="Content of a given tag, occupying the 'value' part of the key:value pair.", max_length=500)), + ('external_id', openedx_learning.lib.fields.case_insensitive_char_field(blank=True, help_text='Used to link an Open edX Tag with a tag in an externally-defined taxonomy.', max_length=255, null=True)), ('parent', models.ForeignKey(default=None, help_text='Tag that lives one level up from the current tag, forming a hierarchy.', null=True, on_delete=django.db.models.deletion.CASCADE, related_name='children', to='oel_tagging.tag')), ('taxonomy', models.ForeignKey(default=None, help_text='Namespace and rules for using a given set of tags.', null=True, on_delete=django.db.models.deletion.CASCADE, to='oel_tagging.taxonomy')), ], @@ -70,9 +70,9 @@ class Migration(migrations.Migration): name='ObjectTag', fields=[ ('id', models.BigAutoField(primary_key=True, serialize=False)), - ('object_id', openedx_learning.lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_bin', 'sqlite': 'BINARY'}, db_index=True, editable=False, help_text='Identifier for the object being tagged', max_length=255)), - ('_name', openedx_learning.lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_unicode_ci', 'sqlite': 'NOCASE'}, help_text='User-facing label used for this tag, stored in case taxonomy is (or becomes) null. If the taxonomy field is set, then taxonomy.name takes precedence over this field.', max_length=255)), - ('_value', openedx_learning.lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_unicode_ci', 'sqlite': 'NOCASE'}, help_text='User-facing value used for this tag, stored in case tag is null, e.g if taxonomy is free text, or if it becomes null (e.g. if the Tag is deleted). If the tag field is set, then tag.value takes precedence over this field.', max_length=500)), + ('object_id', openedx_learning.lib.fields.case_sensitive_char_field(db_index=True, editable=False, help_text='Identifier for the object being tagged', max_length=255)), + ('_name', openedx_learning.lib.fields.case_insensitive_char_field(help_text='User-facing label used for this tag, stored in case taxonomy is (or becomes) null. If the taxonomy field is set, then taxonomy.name takes precedence over this field.', max_length=255)), + ('_value', openedx_learning.lib.fields.case_insensitive_char_field(help_text='User-facing value used for this tag, stored in case tag is null, e.g if taxonomy is free text, or if it becomes null (e.g. if the Tag is deleted). If the tag field is set, then tag.value takes precedence over this field.', max_length=500)), ('tag', models.ForeignKey(blank=True, default=None, help_text="Tag associated with this object tag. Provides the tag's 'value' if set.", null=True, on_delete=django.db.models.deletion.SET_NULL, to='oel_tagging.tag')), ('taxonomy', models.ForeignKey(default=None, help_text="Taxonomy that this object tag belongs to. Used for validating the tag and provides the tag's 'name' if set.", null=True, on_delete=django.db.models.deletion.SET_NULL, to='oel_tagging.taxonomy')), ], diff --git a/openedx_tagging/core/tagging/migrations/0002_auto_20230718_2026.py b/openedx_tagging/core/tagging/migrations/0002_auto_20230718_2026.py index d0d14c93..9f09cfdf 100644 --- a/openedx_tagging/core/tagging/migrations/0002_auto_20230718_2026.py +++ b/openedx_tagging/core/tagging/migrations/0002_auto_20230718_2026.py @@ -69,8 +69,7 @@ class Migration(migrations.Migration): migrations.AlterField( model_name="objecttag", name="object_id", - field=openedx_learning.lib.fields.MultiCollationCharField( - db_collations={"mysql": "utf8mb4_unicode_ci", "sqlite": "NOCASE"}, + field=openedx_learning.lib.fields.case_insensitive_char_field( editable=False, help_text="Identifier for the object being tagged", max_length=255, diff --git a/openedx_tagging/core/tagging/migrations/0004_auto_20230723_2001.py b/openedx_tagging/core/tagging/migrations/0004_auto_20230723_2001.py index c96e0520..dcad4337 100644 --- a/openedx_tagging/core/tagging/migrations/0004_auto_20230723_2001.py +++ b/openedx_tagging/core/tagging/migrations/0004_auto_20230723_2001.py @@ -14,8 +14,7 @@ class Migration(migrations.Migration): migrations.AlterField( model_name="objecttag", name="object_id", - field=openedx_learning.lib.fields.MultiCollationCharField( - db_collations={"mysql": "utf8mb4_unicode_ci", "sqlite": "NOCASE"}, + field=openedx_learning.lib.fields.case_insensitive_char_field( db_index=True, editable=False, help_text="Identifier for the object being tagged", diff --git a/openedx_tagging/core/tagging/migrations/0009_alter_objecttag_object_id.py b/openedx_tagging/core/tagging/migrations/0009_alter_objecttag_object_id.py index 9e1150b2..0a1ba712 100644 --- a/openedx_tagging/core/tagging/migrations/0009_alter_objecttag_object_id.py +++ b/openedx_tagging/core/tagging/migrations/0009_alter_objecttag_object_id.py @@ -15,6 +15,6 @@ class Migration(migrations.Migration): migrations.AlterField( model_name='objecttag', name='object_id', - field=openedx_learning.lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_bin', 'sqlite': 'BINARY'}, db_index=True, editable=False, help_text='Identifier for the object being tagged', max_length=255), + field=openedx_learning.lib.fields.case_sensitive_char_field(db_index=True, editable=False, help_text='Identifier for the object being tagged', max_length=255), ), ] diff --git a/openedx_tagging/core/tagging/migrations/0016_object_tag_export_id.py b/openedx_tagging/core/tagging/migrations/0016_object_tag_export_id.py index 671cf1a3..cae59c38 100644 --- a/openedx_tagging/core/tagging/migrations/0016_object_tag_export_id.py +++ b/openedx_tagging/core/tagging/migrations/0016_object_tag_export_id.py @@ -56,7 +56,7 @@ class Migration(migrations.Migration): migrations.AlterField( model_name='objecttag', name='_export_id', - field=openedx_learning.lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_unicode_ci', 'sqlite': 'NOCASE'}, help_text='User-facing label used for this tag, stored in case taxonomy is (or becomes) null. If the taxonomy field is set, then taxonomy.export_id takes precedence over this field.', max_length=255), + field=openedx_learning.lib.fields.case_insensitive_char_field(help_text='User-facing label used for this tag, stored in case taxonomy is (or becomes) null. If the taxonomy field is set, then taxonomy.export_id takes precedence over this field.', max_length=255), ), migrations.RunPython(migrate_language_export_id, reverse_language_export_id), ] From f1fd6d6580e9d5f7e3d3fa7bd2cbecf989678ec8 Mon Sep 17 00:00:00 2001 From: Anton Melser Date: Mon, 27 Oct 2025 17:47:06 +0800 Subject: [PATCH 3/3] tmp fix collation to insensitive Signed-off-by: Anton Melser --- .../migrations/0009_create_case_insensitive_collation.py | 7 +++---- openedx_learning/lib/fields.py | 5 ++++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/openedx_learning/apps/authoring/publishing/migrations/0009_create_case_insensitive_collation.py b/openedx_learning/apps/authoring/publishing/migrations/0009_create_case_insensitive_collation.py index c6646c77..adabd0c8 100644 --- a/openedx_learning/apps/authoring/publishing/migrations/0009_create_case_insensitive_collation.py +++ b/openedx_learning/apps/authoring/publishing/migrations/0009_create_case_insensitive_collation.py @@ -4,10 +4,9 @@ class Migration(migrations.Migration): - dependencies = [ - ("oel_publishing", "0008_alter_draftchangelogrecord_options_and_more"), + run_before = [ + ("oel_publishing", "0001_initial"), ] - operations = [ # Create a custom case-insensitive collation for PostgreSQL. # This collation is used by case_insensitive_char_field() to provide @@ -29,7 +28,7 @@ class Migration(migrations.Migration): CreateCollation( "case_insensitive", provider="icu", - locale="en-US", + locale="und-u-ks-level2", deterministic=False, ), ] diff --git a/openedx_learning/lib/fields.py b/openedx_learning/lib/fields.py index f9ea7588..7b66e8c5 100644 --- a/openedx_learning/lib/fields.py +++ b/openedx_learning/lib/fields.py @@ -14,7 +14,7 @@ import hashlib import uuid -from django.db import models +from django.db import connection, models from .collations import MultiCollationMixin from .validators import validate_utc_datetime @@ -38,6 +38,9 @@ def create_hash_digest(data_bytes: bytes, num_bytes=20) -> str: def default_case_insensitive_collations_args(**kwargs): + # Remove db_index from kwargs if using PostgreSQL to avoid collation issues + if connection.vendor == "postgresql": + kwargs = {k: v for k, v in kwargs.items() if k != "db_index"} return { "null": False, "db_collations": {