From 96299f4b7f786352284b7366b919ddd2412b3171 Mon Sep 17 00:00:00 2001 From: Sylvain Boissel Date: Wed, 19 Nov 2025 14:49:24 +0100 Subject: [PATCH 1/3] =?UTF-8?q?=F0=9F=9A=B8(backend)=20use=20unaccented=20?= =?UTF-8?q?full=20name=20for=20user=20search?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have the user full name through OIDC in the database, but the search only used the email field. This change allows to search for a user by their first and/or last name (fix #929). Given that user names are more likely than emails to include diacritics, it unaccents both the query and the database entry for search (fix #1091). It also unaccents for email so that internationalized domain names are managed whether or not the accent is included in the search. An unaccented gin index is added on users full_name an email fields. Using a manual migration because a wrapper around unaccent is necessary to make it IMMUTABLE (cf. https://stackoverflow.com/questions/9063402/ ) --- CHANGELOG.md | 1 + src/backend/core/api/viewsets.py | 20 ++- .../migrations/0027_auto_20251120_0956.py | 37 ++++++ src/backend/core/tests/test_api_users.py | 125 ++++++++++++++++++ 4 files changed, 177 insertions(+), 6 deletions(-) create mode 100644 src/backend/core/migrations/0027_auto_20251120_0956.py diff --git a/CHANGELOG.md b/CHANGELOG.md index d78a063ae9..124f843255 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -64,6 +64,7 @@ and this project adheres to - ♻️(frontend) preserve @ character when esc is pressed after typing it #1512 - ♻️(frontend) make summary button fixed to remain visible during scroll #1581 - ♻️(frontend) pdf embed use full width #1526 +- 🚸(backend) use unaccented full name for user search #1637 ### Fixed diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index 1c1b9ef50a..7594770bdd 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -1,4 +1,5 @@ """API endpoints""" + # pylint: disable=too-many-lines import base64 @@ -18,7 +19,7 @@ from django.db import connection, transaction from django.db import models as db from django.db.models.expressions import RawSQL -from django.db.models.functions import Left, Length +from django.db.models.functions import Greatest, Left, Length from django.http import Http404, StreamingHttpResponse from django.urls import reverse from django.utils import timezone @@ -37,6 +38,7 @@ from rest_framework.permissions import AllowAny from core import authentication, choices, enums, models +from core.api.filters import remove_accents from core.services.ai_services import AIService from core.services.collaboration_services import CollaborationService from core.services.converter_services import ( @@ -188,13 +190,15 @@ def get_queryset(self): queryset = queryset.exclude(documentaccess__document_id=document_id) filter_data = filterset.form.cleaned_data - query = filter_data["q"] + query = remove_accents(filter_data["q"]) # For emails, match emails by Levenstein distance to prevent typing errors if "@" in query: return ( queryset.annotate( - distance=RawSQL("levenshtein(email::text, %s::text)", (query,)) + distance=RawSQL( + "levenshtein(unaccent(email::text), %s::text)", (query,) + ) ) .filter(distance__lte=3) .order_by("distance", "email")[: settings.API_USERS_LIST_LIMIT] @@ -203,11 +207,15 @@ def get_queryset(self): # Use trigram similarity for non-email-like queries # For performance reasons we filter first by similarity, which relies on an # index, then only calculate precise similarity scores for sorting purposes + return ( - queryset.filter(email__trigram_word_similar=query) - .annotate(similarity=TrigramSimilarity("email", query)) + queryset.annotate( + sim_email=TrigramSimilarity("email", query), + sim_name=TrigramSimilarity("full_name", query), + ) + .annotate(similarity=Greatest("sim_email", "sim_name")) .filter(similarity__gt=0.2) - .order_by("-similarity", "email")[: settings.API_USERS_LIST_LIMIT] + .order_by("-similarity")[: settings.API_USERS_LIST_LIMIT] ) @drf.decorators.action( diff --git a/src/backend/core/migrations/0027_auto_20251120_0956.py b/src/backend/core/migrations/0027_auto_20251120_0956.py new file mode 100644 index 0000000000..fe795ff5f2 --- /dev/null +++ b/src/backend/core/migrations/0027_auto_20251120_0956.py @@ -0,0 +1,37 @@ +# Generated by Django 5.2.8 on 2025-11-20 09:56 + +from django.db import migrations + + +class Migration(migrations.Migration): + dependencies = [ + ("core", "0026_comments"), + ] + + operations = [ + migrations.RunSQL( + sql=""" + CREATE OR REPLACE FUNCTION public.immutable_unaccent(regdictionary, text) + RETURNS text + LANGUAGE c IMMUTABLE PARALLEL SAFE STRICT AS + '$libdir/unaccent', 'unaccent_dict'; + + CREATE OR REPLACE FUNCTION public.f_unaccent(text) + RETURNS text + LANGUAGE sql IMMUTABLE PARALLEL SAFE STRICT + RETURN public.immutable_unaccent(regdictionary 'public.unaccent', $1); + + CREATE INDEX IF NOT EXISTS user_email_unaccent_trgm_idx + ON impress_user + USING gin (f_unaccent(email) gin_trgm_ops); + + CREATE INDEX IF NOT EXISTS user_full_name_unaccent_trgm_idx + ON impress_user + USING gin (f_unaccent(full_name) gin_trgm_ops); + """, + reverse_sql=""" + DROP INDEX IF EXISTS user_email_unaccent_trgm_idx; + DROP INDEX IF EXISTS user_full_name_unaccent_trgm_idx; + """, + ), + ] diff --git a/src/backend/core/tests/test_api_users.py b/src/backend/core/tests/test_api_users.py index a0a4355280..926e731bd4 100644 --- a/src/backend/core/tests/test_api_users.py +++ b/src/backend/core/tests/test_api_users.py @@ -76,6 +76,131 @@ def test_api_users_list_query_email(): assert user_ids == [] +def test_api_users_list_query_email_with_internationalized_domain_names(): + """ + Authenticated users should be able to list users and filter by email. + It should work even if the email address contains an internationalized domain name. + """ + user = factories.UserFactory() + + client = APIClient() + client.force_login(user) + + jean = factories.UserFactory(email="jean.martin@éducation.fr") + marie = factories.UserFactory(email="marie.durand@education.fr") + kurokawa = factories.UserFactory(email="contact@黒川.日本") + + response = client.get("/api/v1.0/users/?q=jean.martin@education.fr") + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()] + assert user_ids == [str(jean.id)] + + response = client.get("/api/v1.0/users/?q=jean.martin@éducation.fr") + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()] + assert user_ids == [str(jean.id)] + + response = client.get("/api/v1.0/users/?q=marie.durand@education.fr") + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()] + assert user_ids == [str(marie.id)] + + response = client.get("/api/v1.0/users/?q=marie.durand@éducation.fr") + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()] + assert user_ids == [str(marie.id)] + + response = client.get("/api/v1.0/users/?q=contact@黒川.日本") + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()] + assert user_ids == [str(kurokawa.id)] + + +def test_api_users_list_query_full_name(): + """ + Authenticated users should be able to list users and filter by full name. + Only results with a Trigram similarity greater than 0.2 with the query should be returned. + """ + user = factories.UserFactory() + + client = APIClient() + client.force_login(user) + + dave = factories.UserFactory(email="contact@work.com", full_name="David Bowman") + + response = client.get( + "/api/v1.0/users/?q=David", + ) + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()] + assert user_ids == [str(dave.id)] + + response = client.get("/api/v1.0/users/?q=Bowman") + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()] + assert user_ids == [str(dave.id)] + + response = client.get("/api/v1.0/users/?q=bowman") + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()] + assert user_ids == [str(dave.id)] + + response = client.get("/api/v1.0/users/?q=BOWMAN") + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()] + assert user_ids == [str(dave.id)] + + response = client.get("/api/v1.0/users/?q=BoWmAn") + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()] + assert user_ids == [str(dave.id)] + + response = client.get("/api/v1.0/users/?q=Bovin") + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()] + assert user_ids == [] + + +def test_api_users_list_query_accented_full_name(): + """ + Authenticated users should be able to list users and filter by full name with accents. + Only results with a Trigram similarity greater than 0.2 with the query should be returned. + """ + user = factories.UserFactory() + + client = APIClient() + client.force_login(user) + + fred = factories.UserFactory( + email="contact@work.com", full_name="Frédérique Lefèvre" + ) + + response = client.get("/api/v1.0/users/?q=Frédérique") + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()] + assert user_ids == [str(fred.id)] + + response = client.get("/api/v1.0/users/?q=Frederique") + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()] + assert user_ids == [str(fred.id)] + + response = client.get("/api/v1.0/users/?q=Lefèvre") + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()] + assert user_ids == [str(fred.id)] + + response = client.get("/api/v1.0/users/?q=Lefevre") + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()] + assert user_ids == [str(fred.id)] + + response = client.get("/api/v1.0/users/?q=François Lorfebvre") + assert response.status_code == 200 + users = [user["full_name"] for user in response.json()] + assert users == [] + + def test_api_users_list_limit(settings): """ Authenticated users should be able to list users and the number of results From bd9a3334db081b8d216ed6066616e0d58c8d5359 Mon Sep 17 00:00:00 2001 From: Manuel Raynaud Date: Mon, 15 Dec 2025 11:13:25 +0100 Subject: [PATCH 2/3] =?UTF-8?q?=F0=9F=94=A7(helm)=20add=20user=20name=20in?= =?UTF-8?q?=20oidc=20scopes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The given_name and usual_name is not configured in the oidc scopes. When a user connect to docs with the dev and feature configuration, we don't have this informations. --- src/helm/env.d/dev/values.impress.yaml.gotmpl | 2 +- src/helm/env.d/feature/values.impress.yaml.gotmpl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/helm/env.d/dev/values.impress.yaml.gotmpl b/src/helm/env.d/dev/values.impress.yaml.gotmpl index f8b6a6e501..129a4b89ba 100644 --- a/src/helm/env.d/dev/values.impress.yaml.gotmpl +++ b/src/helm/env.d/dev/values.impress.yaml.gotmpl @@ -41,7 +41,7 @@ backend: OIDC_RP_CLIENT_ID: docs OIDC_RP_CLIENT_SECRET: ThisIsAnExampleKeyForDevPurposeOnly OIDC_RP_SIGN_ALGO: RS256 - OIDC_RP_SCOPES: "openid email" + OIDC_RP_SCOPES: "openid email given_name usual_name" LOGIN_REDIRECT_URL: https://docs.127.0.0.1.nip.io LOGIN_REDIRECT_URL_FAILURE: https://docs.127.0.0.1.nip.io LOGOUT_REDIRECT_URL: https://docs.127.0.0.1.nip.io diff --git a/src/helm/env.d/feature/values.impress.yaml.gotmpl b/src/helm/env.d/feature/values.impress.yaml.gotmpl index c6c7fe8cf3..050c35ece2 100644 --- a/src/helm/env.d/feature/values.impress.yaml.gotmpl +++ b/src/helm/env.d/feature/values.impress.yaml.gotmpl @@ -42,7 +42,7 @@ backend: OIDC_RP_CLIENT_ID: docs OIDC_RP_CLIENT_SECRET: ThisIsAnExampleKeyForDevPurposeOnly OIDC_RP_SIGN_ALGO: RS256 - OIDC_RP_SCOPES: "openid email" + OIDC_RP_SCOPES: "openid email given_name usual_name" LOGIN_REDIRECT_URL: https://{{ .Values.feature }}-docs.{{ .Values.domain }} LOGIN_REDIRECT_URL_FAILURE: https://{{ .Values.feature }}-docs.{{ .Values.domain }} LOGOUT_REDIRECT_URL: https://{{ .Values.feature }}-docs.{{ .Values.domain }} From a49f3b6b32705cb9931dff4f2b643b03cf6ff091 Mon Sep 17 00:00:00 2001 From: Sylvain Boissel Date: Mon, 15 Dec 2025 11:28:53 +0100 Subject: [PATCH 3/3] =?UTF-8?q?=F0=9F=93=9D(changelog)=20move=20entry=20in?= =?UTF-8?q?=20unreleased=20section?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The changelog line was at the wrong place after rebase --- CHANGELOG.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 124f843255..41fd811d7f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,10 @@ and this project adheres to - ✨(backend) allow to create a new user in a marketing system +### Changed + +- 🚸(backend) use unaccented full name for user search #1637 + ## [4.1.0] - 2025-12-09 ### Added @@ -64,7 +68,6 @@ and this project adheres to - ♻️(frontend) preserve @ character when esc is pressed after typing it #1512 - ♻️(frontend) make summary button fixed to remain visible during scroll #1581 - ♻️(frontend) pdf embed use full width #1526 -- 🚸(backend) use unaccented full name for user search #1637 ### Fixed