From 2ba4e482c5094bc8b442f377bd9181e0fede6df0 Mon Sep 17 00:00:00 2001 From: rgermain Date: Thu, 27 Nov 2025 15:07:31 +0100 Subject: [PATCH 01/32] change all crisalid --- apps/commons/urls.py | 9 + projects/asgi.py | 10 - projects/settings/base.py | 16 - projects/wsgi.py | 10 - services/crisalid/admin.py | 32 +- services/crisalid/bus/__init__.py | 0 services/crisalid/bus/client.py | 177 +++++++++++ services/crisalid/bus/constant.py | 39 +++ services/crisalid/bus/consumer.py | 69 +++++ services/crisalid/bus/logger.py | 0 services/crisalid/bus/organization.py | 82 +++++ services/crisalid/crisalid_bus.py | 290 ------------------ services/crisalid/factories.py | 24 +- services/crisalid/interface.py | 21 +- .../migrations/0002_crisalidconfig.py | 66 ++++ ...ter_crisalidconfig_apollo_host_and_more.py | 23 ++ services/crisalid/models.py | 27 ++ services/crisalid/populates/base.py | 5 +- services/crisalid/populates/caches.py | 12 + services/crisalid/populates/document.py | 38 ++- services/crisalid/populates/researcher.py | 21 +- services/crisalid/signals.py | 21 ++ services/crisalid/tasks.py | 42 ++- services/crisalid/tests/test_crisalid_bus.py | 17 +- services/crisalid/tests/test_populate.py | 31 +- services/crisalid/urls.py | 12 +- services/crisalid/views.py | 11 +- 27 files changed, 704 insertions(+), 401 deletions(-) create mode 100644 services/crisalid/bus/__init__.py create mode 100644 services/crisalid/bus/client.py create mode 100644 services/crisalid/bus/constant.py create mode 100644 services/crisalid/bus/consumer.py create mode 100644 services/crisalid/bus/logger.py create mode 100644 services/crisalid/bus/organization.py delete mode 100644 services/crisalid/crisalid_bus.py create mode 100644 services/crisalid/migrations/0002_crisalidconfig.py create mode 100644 services/crisalid/migrations/0003_alter_crisalidconfig_apollo_host_and_more.py create mode 100644 services/crisalid/signals.py diff --git a/apps/commons/urls.py b/apps/commons/urls.py index c2f98d80..4678e898 100644 --- a/apps/commons/urls.py +++ b/apps/commons/urls.py @@ -117,6 +117,15 @@ def user_router_register( router.register(prefix, viewset, basename) +def organization_researcher_router_register( + router: DefaultRouter, path: str, viewset: View, basename: str = None +): + prefix = r"organization/(?P[^/]+)/researcher/(?P[^/]+)" + if path: + prefix += r"/" + path + router.register(prefix, viewset, basename) + + def researcher_router_register( router: DefaultRouter, path: str, viewset: View, basename: str = None ): diff --git a/projects/asgi.py b/projects/asgi.py index ee5f228c..aae2493a 100644 --- a/projects/asgi.py +++ b/projects/asgi.py @@ -14,13 +14,3 @@ os.environ.setdefault("DJANGO_SETTINGS_MODULE", "projects.settings.base") application = get_asgi_application() - - -from django.conf import settings # noqa: E402 - -from services.crisalid.crisalid_bus import logger, start_thread # noqa: E402 - -if settings.ENABLE_CRISALID_BUS: - start_thread() -else: - logger.info("CrisalidBus is not enabled") diff --git a/projects/settings/base.py b/projects/settings/base.py index 6e60d622..53ea32ca 100644 --- a/projects/settings/base.py +++ b/projects/settings/base.py @@ -690,19 +690,3 @@ AZURE_TRANSLATOR_ENDPOINT = os.getenv( "AZURE_TRANSLATOR_ENDPOINT", "https://api.cognitive.microsofttranslator.com" ) - - -############## -# CRISALID # -############## - -CRISALID_API_URL = os.getenv("CRISALID_API_URL", "http://crisalid-apollo:4000") -CRISALID_API_TOKEN = os.getenv("CRISALID_API_TOKEN", "crisalid-apollo-key") - -ENABLE_CRISALID_BUS = os.getenv("ENABLE_CRISALID_BUS", "false").lower() == "true" -CRISALID_BUS = { - "host": os.getenv("CRISALID_BUS_HOST"), - "port": os.getenv("CRISALID_BUS_PORT"), - "user": os.getenv("CRISALID_BUS_USER"), - "password": os.getenv("CRISALID_BUS_PASSWORD"), -} diff --git a/projects/wsgi.py b/projects/wsgi.py index 01b77d28..f44e3d22 100644 --- a/projects/wsgi.py +++ b/projects/wsgi.py @@ -14,13 +14,3 @@ os.environ.setdefault("DJANGO_SETTINGS_MODULE", "projects.settings.base") application = get_wsgi_application() - - -from django.conf import settings # noqa: E402 - -from services.crisalid.crisalid_bus import logger, start_thread # noqa: E402 - -if settings.ENABLE_CRISALID_BUS: - start_thread() -else: - logger.info("CrisalidBus is not enabled") diff --git a/services/crisalid/admin.py b/services/crisalid/admin.py index 17f6ada5..f6e92988 100644 --- a/services/crisalid/admin.py +++ b/services/crisalid/admin.py @@ -1,9 +1,17 @@ -from django.contrib import admin -from django.db.models import Count +from typing import Any, Optional from apps.accounts.models import ProjectUser +from django import forms +from django.contrib import admin +from django.db.models import Count -from .models import Document, DocumentContributor, Identifier, Researcher +from .models import ( + CrisalidConfig, + Document, + DocumentContributor, + Identifier, + Researcher, +) class IdentifierAdmin(admin.ModelAdmin): @@ -156,6 +164,24 @@ def get_identifiers(self, instance): return f"{', '.join(result)} ({len(result)})" +class CrisalidConfigForm(forms.ModelForm): + class Meta: + model = CrisalidConfig + fields = "__all__" + widgets = { + "crisalidbus_password": forms.PasswordInput(), + "apollo_token": forms.PasswordInput(), + } + + +class CrisalidConfigAdmin(admin.ModelAdmin): + list_display = ("organization", "active") + search_fields = ("organization", "active") + autocomplete_fields = ("organization",) + form = CrisalidConfigForm + + +admin.site.register(CrisalidConfig, CrisalidConfigAdmin) admin.site.register(Researcher, ResearcherAdmin) admin.site.register(Identifier, IdentifierAdmin) admin.site.register(Document, DocumentAdmin) diff --git a/services/crisalid/bus/__init__.py b/services/crisalid/bus/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/services/crisalid/bus/client.py b/services/crisalid/bus/client.py new file mode 100644 index 00000000..5426ff45 --- /dev/null +++ b/services/crisalid/bus/client.py @@ -0,0 +1,177 @@ +import json +import logging +import time +from collections.abc import Callable + +import jsonschema +import pika +from urllib3.util import parse_url + +from services.crisalid.bus.constant import ( + CRISALID_MESSAGE_SCHEMA, + CrisalidEventEnum, +) +from services.crisalid.models import CrisalidConfig + +from .consumer import crisalid_consumer + + +class CrisalidBusClient: + """Class to connect to crisalid rabitmqt, and receive all event messages.""" + + # queue create by ikg for send messages + CRISALID_EXCHANGE = "graph" + # routing key ikg send event (the * is for listen on all event (updated,created,deleted)) + CRISALID_ROUTING_KEYS = [] + for event in CrisalidEventEnum: + CRISALID_ROUTING_KEYS.extend( + ( + f"event.people.person.{event.value}", + f"event.structures.structure.{event.value}", + f"event.documents.document.{event.value}", + ) + ) + + def __init__(self, config: CrisalidConfig): + self.config = config + self.conn: pika.BlockingConnection | None = None + self._channel = pika.channel.Channel + self._run: bool = True + self.logger = logging.getLogger(config.organization.code) + + def connect(self): + assert self.conn is None, "rabimqt is already started" + + url = parse_url(self.config.crisalidbus_url) + + parameters = { + "host": url.host, + "port": url.port, + "user": self.config.crisalidbus_username, + "password": self.config.crisalidbus_password, + } + + if not all(parameters.values()): + # safe remove password to not showing in log + if parameters["password"]: + parameters["password"] = "*" * 10 + self.logger.critical( + "Can't instantiate CrisalidBus: invalid parameters, %s", parameters + ) + return + + retry = 1 + # run in loop to retry when connection is lost + while self._run: + try: + self.logger.info("Create pika connection") + + credentials = pika.PlainCredentials( + parameters["user"], parameters["password"] + ) + + self.conn = pika.BlockingConnection( + pika.ConnectionParameters( + host=parameters["host"], + port=parameters["port"], + credentials=credentials, + virtual_host="/", + ), + ) + self._channel = self.conn.channel() + exchange = self.CRISALID_EXCHANGE + self._channel.exchange_declare( + exchange=exchange, exchange_type="topic", durable=True + ) + queue_name = f"projects-backend.{exchange}" + self._channel.queue_declare(queue=queue_name, exclusive=True) + for routing_key in self.CRISALID_ROUTING_KEYS: + self._channel.queue_bind( + exchange=exchange, queue=queue_name, routing_key=routing_key + ) + + self._channel.basic_consume( + queue=queue_name, on_message_callback=self._dispatch, auto_ack=True + ) + + self.logger.info("Start channel Consuming") + self._channel.start_consuming() + break + + except pika.exceptions.ConnectionClosedByBroker: + self.logger.error("Connection closed by crisalid broker") + except pika.exceptions.AMQPChannelError as e: + self.logger.error("Channel error: %s", str(e)) + except pika.exceptions.AMQPConnectionError as e: + self.logger.error("Connection closed: %s", str(e)) + + if not self._run: + break + + # incremental retry (max 60s) + retry = min(retry * 2, 60) + time.sleep(retry) + + # ensure disconect after loop + self._disconnect() + + def disconnect(self): + """disconnect rabitmqt connection""" + self._run = False + if not self.conn: + return + + self.self.logger.info("CrisalidBus connection closed") + + self.conn.close() + self.conn = None + self._channel.cancel() + self._channel = None + + def __delete__(self): + # for disconnect when class is deleted + self.disconnect() + + def _dispatch( + self, + chanel: pika.channel.Channel, + method: pika.spec.Basic.Deliver, + properties: pika.spec.BasicProperties, + body: bytes, + ): + """Global callback to get message, and dispatch on every listener""" + + self.logger.info("Receive routingkey=%r", method.routing_key) + self.logger.debug("body: %s", body) + + # all message sended is json binary "stringify" + try: + body_str = body.decode() + payload = json.loads(body_str) + except UnicodeDecodeError as e: + self.logger.exception("Impossible to decode bytes body: %s", str(e)) + return + except (TypeError, ValueError) as e: + self.logger.exception("Impossible to decode json body: %s", str(e)) + return + + # validate schema + try: + jsonschema.validate(payload, CRISALID_MESSAGE_SCHEMA) + except jsonschema.exceptions.ValidationError as e: + self.logger.exception("Can't validate payload format: %s", str(e)) + return + + crisalid_type = payload["type"] + crisalid_event = payload["event"] + if not crisalid_consumer[crisalid_type][crisalid_event]: + self.logger.info( + "Not listener for event: %s::%s", crisalid_type, crisalid_event + ) + return + + event_callback = crisalid_consumer[crisalid_type][crisalid_event] + self.logger.debug("Call %s", event_callback) + + fields = payload["fields"] + event_callback(self.config.pk, fields) diff --git a/services/crisalid/bus/constant.py b/services/crisalid/bus/constant.py new file mode 100644 index 00000000..ca21c5c0 --- /dev/null +++ b/services/crisalid/bus/constant.py @@ -0,0 +1,39 @@ +import enum + + +# https://github.com/CRISalid-esr/crisalid-deployment/blob/6b37862bb27b0e2164666f9e8b049ac3dbf60923/docker/crisalid-bus/definitions.sample.json#L7 +# Event/Type from crisalid https://github.com/CRISalid-esr/crisalid-ikg/tree/dev-main/app/amqp +class CrisalidTypeEnum(enum.StrEnum): + PERSON = "person" + STRUCTURE = "research_structure" + HARVESTING = "harvesting_result_event" + DOCUMENT = "document" + + +class CrisalidEventEnum(enum.StrEnum): + """Event from crisalid + "unchanged" event is ignored + """ + + UPDATED = "updated" + CREATED = "created" + DELETED = "deleted" + + +# schema received from crisalid +CRISALID_MESSAGE_SCHEMA = { + "type": "object", + "properties": { + "type": { + "enum": [v.value for v in CrisalidTypeEnum], + }, + "event": { + "enum": [v.value for v in CrisalidEventEnum], + }, + "fields": { + "type": "object" + # TODO(remi): speficied all fields types ? + }, + }, + "required": ["type", "event", "fields"], +} diff --git a/services/crisalid/bus/consumer.py b/services/crisalid/bus/consumer.py new file mode 100644 index 00000000..acfdcfdb --- /dev/null +++ b/services/crisalid/bus/consumer.py @@ -0,0 +1,69 @@ +import logging +from collections import defaultdict +from collections.abc import Callable +from functools import wraps + +from celery import Task + +from services.crisalid.bus.constant import CrisalidEventEnum, CrisalidTypeEnum + +logger = logging.getLogger(__name__) + + +class CrisalidConsumer: + def __init__(self): + self.clean() + + def clean(self): + self._consumer: dict[CrisalidTypeEnum, dict[CrisalidEventEnum, Callable]] = ( + defaultdict(lambda: defaultdict(lambda: None)) + ) + + def add_callback( + self, + crisalid_type: CrisalidTypeEnum, + crisalid_event: CrisalidEventEnum, + callback: Callable, + ): + assert ( + crisalid_event.value not in self._consumer[crisalid_type.value] + ), f"Event {crisalid_type}::{crisalid_event}, is already set" + + # add callback + self._consumer[crisalid_type.value][crisalid_event.value] = callback + return callback + + +crisalid_consumer = CrisalidConsumer() + + +# check methods is celery +def is_task_celery(func): + return isinstance(func, Task) or ( + hasattr(func, "__wrapped__") and isinstance(func.__wrapped__, Task) + ) + + +# easy decorator method +def on_event(crisalid_type: CrisalidTypeEnum, crisalid_event: CrisalidEventEnum): + """shortcut decorator to crisalid_bus.add_callback + + :param crisalid_type: crisalid type name + :param crisalid_event: crisalid event name + """ + + def _wraps(func): + original_func = func + if is_task_celery(func): + + # if is a task, add correct seriliazer for data + @wraps(func) + def _tasks(*args): + logger.info("post task celery %s", original_func) + return original_func.apply_async(args) + + func = _tasks + crisalid_consumer.add_callback(crisalid_type, crisalid_event, func) + return original_func + + return _wraps diff --git a/services/crisalid/bus/logger.py b/services/crisalid/bus/logger.py new file mode 100644 index 00000000..e69de29b diff --git a/services/crisalid/bus/organization.py b/services/crisalid/bus/organization.py new file mode 100644 index 00000000..13d8d7dc --- /dev/null +++ b/services/crisalid/bus/organization.py @@ -0,0 +1,82 @@ +import atexit +import logging +import threading + +from services.crisalid.bus.client import CrisalidBusClient +from services.crisalid.models import CrisalidConfig + +rlock = threading.RLock() + + +class OrganizationClient: + def __init__(self, config: CrisalidConfig): + self.config = config + self.client = CrisalidBusClient(self.config) + self.logger = logging.getLogger(config.organization.code) + self.thread = None + + @property + def name(self): + return self.config.organization.code + + def start(self): + thread_name = f"[{self.name}]CrisalidBus" + assert self.thread is None, f"can't start twice {thread_name}" + + self.logger.info("Start thread %s", thread_name) + self.thread = threading.Thread( + target=self.client.connect, + name=thread_name, + daemon=True, + ) + self.thread.start() + + def stop(self): + self.logger.info("Stop thread %s", self.name) + if self.thread is None: + return + self.client.disconnect() + self.thread.join(3) + self.thread = None + + +organization_maps: dict[str, OrganizationClient] = {} + + +def start_crisalidbus(config: CrisalidConfig): + with rlock: + client = organization_maps.get(config.organization.code) + if client is None: + client = OrganizationClient(config) + organization_maps[config.organization.code] = client + else: + client.stop() + client.config = config + + client.start() + + +def stop_crisalidbus(config: CrisalidConfig): + with rlock: + client = organization_maps.get(config.organization.code) + if client is None: + return + client.config = config + client.stop() + + +def delete_crisalidbus(config: CrisalidConfig): + with rlock: + client = organization_maps.get(config.organization.code) + if client is None: + return + client.stop() + del organization_maps[config.organization.code] + + +# safe stop all crisalid bus +@atexit.register +def _stop_all_crisalid(): + with rlock: + for client in list(organization_maps.values()): + delete_crisalidbus(client.config) diff --git a/services/crisalid/crisalid_bus.py b/services/crisalid/crisalid_bus.py deleted file mode 100644 index 7a5dbf23..00000000 --- a/services/crisalid/crisalid_bus.py +++ /dev/null @@ -1,290 +0,0 @@ -import atexit -import enum -import json -import logging -import threading -import time -from collections import defaultdict -from collections.abc import Callable -from functools import wraps - -import jsonschema -import pika -from celery import Task -from django.conf import settings - -logger = logging.getLogger(__name__) - - -# https://github.com/CRISalid-esr/crisalid-deployment/blob/6b37862bb27b0e2164666f9e8b049ac3dbf60923/docker/crisalid-bus/definitions.sample.json#L7 -# Event/Type from crisalid https://github.com/CRISalid-esr/crisalid-ikg/tree/dev-main/app/amqp -class CrisalidTypeEnum(enum.StrEnum): - PERSON = "person" - STRUCTURE = "research_structure" - HARVESTING = "harvesting_result_event" - DOCUMENT = "document" - - -class CrisalidEventEnum(enum.StrEnum): - """Event from crisalid - "unchanged" event is ignored - """ - - UPDATED = "updated" - CREATED = "created" - DELETED = "deleted" - - -# schema received from crisalid -CRISALID_MESSAGE_SCHEMA = { - "type": "object", - "properties": { - "type": { - "enum": [v.value for v in CrisalidTypeEnum], - }, - "event": { - "enum": [v.value for v in CrisalidEventEnum], - }, - "fields": { - "type": "object" - # TODO(remi): speficied all fields types ? - }, - }, - "required": ["type", "event", "fields"], -} - - -class CrisalidBusClient: - """Class to connect to crisalid rabitmqt, and receive all event messages.""" - - # queue create by ikg for send messages - CRISALID_EXCHANGE = "graph" - # routing key ikg send event (the * is for listen on all event (updated,created,deleted)) - CRISALID_ROUTING_KEYS = [] - for event in CrisalidEventEnum: - CRISALID_ROUTING_KEYS.extend( - ( - f"event.people.person.{event.value}", - f"event.structures.structure.{event.value}", - f"event.documents.document.{event.value}", - ) - ) - - def __init__(self): - self.conn: pika.BlockingConnection | None = None - self._channel = pika.channel.Channel - self._run: bool = True - self._consumer: dict[CrisalidTypeEnum, dict[CrisalidEventEnum, Callable]] = ( - defaultdict(lambda: defaultdict(lambda: None)) - ) - - def add_callback( - self, - crisalid_type: CrisalidTypeEnum, - crisalid_event: CrisalidEventEnum, - callback: Callable, - ): - assert ( - crisalid_event.value not in self._consumer[crisalid_type.value] - ), f"Event {crisalid_type}::{crisalid_event}, is already set" - - # add callback - self._consumer[crisalid_type.value][crisalid_event.value] = callback - return callback - - def connect(self): - assert self.conn is None, "rabimqt is already started" - - parameters = { - "host": settings.CRISALID_BUS["host"], - "port": settings.CRISALID_BUS["port"], - "user": settings.CRISALID_BUS["user"], - "password": settings.CRISALID_BUS["password"], - } - - if not all(parameters.values()): - # safe remove password to not showing in log - if parameters["password"]: - parameters["password"] = "*" * 10 - logger.critical( - "Can't instantiate CrisalidBus: invalid parameters, %s", parameters - ) - return - - retry = 1 - # run in loop to retry when connection is lost - while self._run: - try: - logger.info("Create pika connection") - - credentials = pika.PlainCredentials( - parameters["user"], parameters["password"] - ) - - self.conn = pika.BlockingConnection( - pika.ConnectionParameters( - host=parameters["host"], - port=parameters["port"], - credentials=credentials, - virtual_host="/", - ), - ) - self._channel = self.conn.channel() - exchange = self.CRISALID_EXCHANGE - self._channel.exchange_declare( - exchange=exchange, exchange_type="topic", durable=True - ) - queue_name = f"projects-backend.{exchange}" - self._channel.queue_declare(queue=queue_name, exclusive=True) - for routing_key in self.CRISALID_ROUTING_KEYS: - self._channel.queue_bind( - exchange=exchange, queue=queue_name, routing_key=routing_key - ) - - self._channel.basic_consume( - queue=queue_name, on_message_callback=self._dispatch, auto_ack=True - ) - - logger.info("Start channel Consuming") - self._channel.start_consuming() - break - - except pika.exceptions.ConnectionClosedByBroker: - logger.error("Connection closed by crisalid broker") - except pika.exceptions.AMQPChannelError as e: - logger.error("Channel error: %s", str(e)) - except pika.exceptions.AMQPConnectionError as e: - logger.error("Connection closed: %s", str(e)) - - if not self._run: - break - - # incremental retry (max 60s) - retry = min(retry * 2, 60) - time.sleep(retry) - - # ensure disconect after loop - self._disconnect() - - def disconnect(self): - """disconnect rabitmqt connection""" - self._run = False - if not self.conn: - return - - self.logger.info("CrisalidBus connection closed") - - self.conn.close() - self.conn = None - self._channel.cancel() - self._channel = None - - def __delete__(self): - # for disconnect when class is deleted - self.disconnect() - - def _dispatch( - self, - chanel: pika.channel.Channel, - method: pika.spec.Basic.Deliver, - properties: pika.spec.BasicProperties, - body: bytes, - ): - """Global callback to get message, and dispatch on every listener""" - - logger.info("Receive routingkey=%r", method.routing_key) - logger.debug("body: %s", body) - - # all message sended is json binary "stringify" - try: - body_str = body.decode() - payload = json.loads(body_str) - except UnicodeDecodeError as e: - logger.exception("Impossible to decode bytes body: %s", str(e)) - return - except (TypeError, ValueError) as e: - logger.exception("Impossible to decode json body: %s", str(e)) - return - - # validate schema - try: - jsonschema.validate(payload, CRISALID_MESSAGE_SCHEMA) - except jsonschema.exceptions.ValidationError as e: - logger.exception("Can't validate payload format: %s", str(e)) - return - - crisalid_type = payload["type"] - crisalid_event = payload["event"] - if not self._consumer[crisalid_type][crisalid_event]: - logger.info("Not listener for event: %s::%s", crisalid_type, crisalid_event) - return - - event_callback = self._consumer[crisalid_type][crisalid_event] - logger.debug("Call %s", event_callback) - - fields = payload["fields"] - event_callback(fields) - - -# TODO(remi): nedd to create a singleton type ? -crisalid_bus_client = CrisalidBusClient() - -__thread_crisalid_bus = None - - -def start_thread(): - global __thread_crisalid_bus - assert __thread_crisalid_bus is None, "can't start twice crisalidbus" - - # target is connect function in crisalidbus - __thread_crisalid_bus = threading.Thread( - target=crisalid_bus_client.connect, - name="CrisalidBus", - daemon=True, - ) - - # start thread - __thread_crisalid_bus.start() - - -# register atexit to force disconnect pika -@atexit.register -def stop_thread(): - global __thread_crisalid_bus - crisalid_bus_client.disconnect() - # wait 3 seconds to stop thread (the thread is daemon, so no realy need this) - if __thread_crisalid_bus is not None: - __thread_crisalid_bus.join(3) - __thread_crisalid_bus = None - - -# check methods is celery -def is_task_celery(func): - return isinstance(func, Task) or ( - hasattr(func, "__wrapped__") and isinstance(func.__wrapped__, Task) - ) - - -# easy decorator method -def on_event(crisalid_type: CrisalidTypeEnum, crisalid_event: CrisalidEventEnum): - """shortcut decorator to crisalid_bus.add_callback - - :param crisalid_type: crisalid type name - :param crisalid_event: crisalid event name - """ - - def _wraps(func): - original_func = func - if is_task_celery(func): - - # if is a task, add correct seriliazer for data - @wraps(func) - def _tasks(data): - logger.info("post task celery %s", original_func) - return original_func.apply_async((data,)) - - func = _tasks - crisalid_bus_client.add_callback(crisalid_type, crisalid_event, func) - return original_func - - return _wraps diff --git a/services/crisalid/factories.py b/services/crisalid/factories.py index a6b177f6..988755b4 100644 --- a/services/crisalid/factories.py +++ b/services/crisalid/factories.py @@ -1,11 +1,18 @@ import factory +from apps.accounts.factories import UserFactory +from apps.organizations.factories import OrganizationFactory from factory.fuzzy import FuzzyChoice from faker import Faker -from apps.accounts.factories import UserFactory from services.crisalid import relators -from .models import Document, DocumentContributor, Identifier, Researcher +from .models import ( + CrisalidConfig, + Document, + DocumentContributor, + Identifier, + Researcher, +) faker = Faker() @@ -92,3 +99,16 @@ class DocumentContributorFactory(factory.django.DjangoModelFactory): class Meta: model = DocumentContributor + + +class CrisalidConfigFactory(factory.django.DjangoModelFactory): + organization = factory.LazyFunction(lambda: OrganizationFactory()) + crisalidbus_host = factory.Factory("url") + crisalidbus_username = factory.Factory("username") + crisalidbus_password = factory.Factory("password") + apollo_host = factory.Factory("url") + apollo_token = factory.Factory("password") + active = True + + class Meta: + model = CrisalidConfig diff --git a/services/crisalid/interface.py b/services/crisalid/interface.py index b683c8f2..48ea823a 100644 --- a/services/crisalid/interface.py +++ b/services/crisalid/interface.py @@ -1,23 +1,24 @@ -from typing import Any, Dict, List, Optional, Tuple +from typing import Any -from django.conf import settings from gql import Client, gql from gql.transport.requests import RequestsHTTPTransport +from services.crisalid.models import CrisalidConfig + class CrisalidService: QUERIES_DIRECTORY = "services/crisalid/queries" - def __init__(self): + def __init__(self, config: CrisalidConfig): self.transport = RequestsHTTPTransport( - url=f"{settings.CRISALID_API_URL}/graphql", - headers={"X-API-Key": settings.CRISALID_API_TOKEN}, + url=config.apollo_host, + headers={"X-API-Key": config.apollo_token}, ) self.client = Client( transport=self.transport, fetch_schema_from_transport=False ) - def query(self, query_file: str, **kwargs) -> Dict[str, Any]: + def query(self, query_file: str, **kwargs) -> dict[str, Any]: """ Execute a query from the queries directory. @@ -34,7 +35,7 @@ def query(self, query_file: str, **kwargs) -> Dict[str, Any]: def profiles( self, limit: int = 100, offset: int = 0, **kwargs - ) -> Tuple[List[Dict[str, Any]], Optional[int]]: + ) -> tuple[list[dict[str, Any]], int | None]: """ Get a list of profiles from the Crisalid API. @@ -52,7 +53,7 @@ def profiles( next_page = offset + limit if offset + limit < count else None return response["people"], next_page - def profile(self, uid: str) -> Dict[str, Any]: + def profile(self, uid: str) -> dict[str, Any]: """ Get a profile from the Crisalid API. @@ -72,7 +73,7 @@ def profile(self, uid: str) -> Dict[str, Any]: def textual_documents( self, limit: int = 100, offset: int = 0, **kwargs - ) -> Tuple[List[Dict[str, Any]], Optional[int]]: + ) -> tuple[list[dict[str, Any]], int | None]: """ Get a list of textual documents from the Crisalid API. @@ -90,7 +91,7 @@ def textual_documents( next_page = offset + limit if offset + limit < count else None return response["textualDocuments"], next_page - def textual_document(self, uid: str) -> Dict[str, Any]: + def textual_document(self, uid: str) -> dict[str, Any]: """ Get a textual document from the Crisalid API. diff --git a/services/crisalid/migrations/0002_crisalidconfig.py b/services/crisalid/migrations/0002_crisalidconfig.py new file mode 100644 index 00000000..69c48b9b --- /dev/null +++ b/services/crisalid/migrations/0002_crisalidconfig.py @@ -0,0 +1,66 @@ +# Generated by Django 4.2.25 on 2025-11-27 10:37 + +import apps.commons.mixins +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ("organizations", "0003_initial"), + ("crisalid", "0001_initial"), + ] + + operations = [ + migrations.CreateModel( + name="CrisalidConfig", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "crisalidbus_host", + models.URLField(help_text="crisalidbus/rabimqt host"), + ), + ( + "crisalidbus_username", + models.CharField( + help_text="crisalidbus/rabimqt username", max_length=255 + ), + ), + ( + "crisalidbus_password", + models.CharField( + help_text="crisalidbus/rabimqt password", max_length=255 + ), + ), + ("apollo_host", models.URLField(help_text="apollo/graphql host")), + ( + "apollo_token", + models.CharField(help_text="apollo token", max_length=255), + ), + ( + "active", + models.BooleanField( + default=False, help_text="config is enabled/disabled" + ), + ), + ( + "organization", + models.OneToOneField( + on_delete=django.db.models.deletion.CASCADE, + related_name="crisalid", + to="organizations.organization", + ), + ), + ], + bases=(apps.commons.mixins.OrganizationRelated, models.Model), + ), + ] diff --git a/services/crisalid/migrations/0003_alter_crisalidconfig_apollo_host_and_more.py b/services/crisalid/migrations/0003_alter_crisalidconfig_apollo_host_and_more.py new file mode 100644 index 00000000..8096bf84 --- /dev/null +++ b/services/crisalid/migrations/0003_alter_crisalidconfig_apollo_host_and_more.py @@ -0,0 +1,23 @@ +# Generated by Django 4.2.25 on 2025-11-27 12:20 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("crisalid", "0002_crisalidconfig"), + ] + + operations = [ + migrations.AlterField( + model_name="crisalidconfig", + name="apollo_host", + field=models.URLField(help_text="apollo/graphql host:port"), + ), + migrations.AlterField( + model_name="crisalidconfig", + name="crisalidbus_host", + field=models.URLField(help_text="crisalidbus/rabimqt host:port"), + ), + ] diff --git a/services/crisalid/models.py b/services/crisalid/models.py index e9bcf162..7d076d32 100644 --- a/services/crisalid/models.py +++ b/services/crisalid/models.py @@ -1,5 +1,6 @@ from collections.abc import Generator +from apps.commons.mixins import OrganizationRelated from django import forms from django.contrib.postgres.fields import ArrayField from django.db import models @@ -266,3 +267,29 @@ def keys(cls) -> Generator[list[str]]: def values(cls) -> Generator[tuple[str]]: for _, v in cls.items(): yield v + + +class CrisalidConfig(OrganizationRelated, models.Model): + """model for crisalid config with host/pass for connected to crisalid, + is linked to a one organization + """ + + organization = models.OneToOneField( + "organizations.Organization", + on_delete=models.CASCADE, + related_name="crisalid", + ) + + crisalidbus_host = models.URLField(help_text="crisalidbus/rabimqt host:port") + # crisalidbu_port = models.URLField(help_text="crisalidbus/rabimqt host:port") + crisalidbus_username = models.CharField( + max_length=255, help_text="crisalidbus/rabimqt username" + ) + crisalidbus_password = models.CharField( + max_length=255, help_text="crisalidbus/rabimqt password" + ) + + apollo_host = models.URLField(help_text="apollo/graphql host:port") + apollo_token = models.CharField(max_length=255, help_text="apollo token") + + active = models.BooleanField(help_text="config is enabled/disabled", default=False) diff --git a/services/crisalid/populates/base.py b/services/crisalid/populates/base.py index 2c2994ec..813ee799 100644 --- a/services/crisalid/populates/base.py +++ b/services/crisalid/populates/base.py @@ -2,6 +2,8 @@ import datetime from typing import TypeVar +from services.crisalid.models import CrisalidConfig + from .caches import BaseCache, LiveCache from .logger import logger @@ -13,7 +15,8 @@ class AbstractPopulate(metaclass=abc.ABCMeta): - def __init__(self, cache: TCACHE = None): + def __init__(self, config: CrisalidConfig, cache: TCACHE = None): + self.config = config self.cache = cache or LiveCache() def sanitize_languages(self, values: list[dict[str, str]]) -> str: diff --git a/services/crisalid/populates/caches.py b/services/crisalid/populates/caches.py index 777820c9..60389039 100644 --- a/services/crisalid/populates/caches.py +++ b/services/crisalid/populates/caches.py @@ -3,6 +3,8 @@ from django.db.models import Model +from services.crisalid.models import Identifier + from .logger import logger # TODO create a new Cache class to optimize save/get with @@ -22,6 +24,10 @@ def save_m2m(self, instance, *fields): def model(self, model, *fields): """get object element from model/fields""" + @abc.abstractclassmethod + def indentifiers(self, model, identifiers: list[Identifier]): + """get object element from identifiers lists""" + class LiveCache(BaseCache): def save(self, obj, **fields): @@ -49,3 +55,9 @@ def model(self, model: Model, **fields): return model.objects.get(**fields) except model.DoesNotExist: return model(**fields) + + def identifiers(self, model, identifiers): + try: + return model.objects.filter(identifiers__in=identifiers).distinct().get() + except model.DoesNotExist: + return model() diff --git a/services/crisalid/populates/document.py b/services/crisalid/populates/document.py index 15fc0b18..385d06eb 100644 --- a/services/crisalid/populates/document.py +++ b/services/crisalid/populates/document.py @@ -1,5 +1,10 @@ from services.crisalid import relators -from services.crisalid.models import Document, DocumentContributor, Identifier +from services.crisalid.models import ( + CrisalidConfig, + Document, + DocumentContributor, + Identifier, +) from .base import AbstractPopulate from .logger import logger @@ -7,9 +12,9 @@ class PopulateDocument(AbstractPopulate): - def __init__(self, cache=None): - super().__init__(cache) - self.populate_researcher = PopulateResearcher(self.cache) + def __init__(self, config: CrisalidConfig, cache=None): + super().__init__(config, cache) + self.populate_researcher = PopulateResearcher(self.config, self.cache) def sanitize_document_type(self, data: str | None): """Check documentType , and return unknow value if is not set in enum""" @@ -31,18 +36,8 @@ def sanitize_roles(self, data: list[str]) -> list[str]: def single(self, data: dict): """this method create/update only on document from crisalid""" - - document = self.cache.model(Document, crisalid_uid=data["uid"]) - self.cache.save( - document, - title=self.sanitize_languages(data["titles"]), - description=self.sanitize_languages(data["abstracts"]), - publication_date=self.sanitize_date(data["publication_date"]), - document_type=self.sanitize_document_type(data["document_type"]), - ) - # identifiers (hal, openalex, idref ...ect) - identifiers = [] + documents_identifiers = [] for recorded in data["recorded_by"]: identifier = self.cache.model( Identifier, @@ -50,8 +45,17 @@ def single(self, data: dict): harvester=recorded["harvester"].lower(), ) self.cache.save(identifier) - identifiers.append(identifier) - self.cache.save_m2m(document, identifiers=identifiers) + documents_identifiers.append(identifier) + + document = self.cache.indentifiers(Document, documents_identifiers) + self.cache.save( + document, + title=self.sanitize_languages(data["titles"]), + description=self.sanitize_languages(data["abstracts"]), + publication_date=self.sanitize_date(data["publication_date"]), + document_type=self.sanitize_document_type(data["document_type"]), + ) + self.cache.save_m2m(document, identifiers=documents_identifiers) contributors = [] for contribution in data["has_contributions"]: diff --git a/services/crisalid/populates/researcher.py b/services/crisalid/populates/researcher.py index 19ff9f4d..1032f842 100644 --- a/services/crisalid/populates/researcher.py +++ b/services/crisalid/populates/researcher.py @@ -1,4 +1,5 @@ from apps.accounts.models import ProjectUser + from services.crisalid.models import Identifier, Researcher from .base import AbstractPopulate @@ -21,6 +22,7 @@ def check_mapping_user( """match user from researcher (need eppn)""" if researcher.user: + researcher.user.groups.add(self.config.organization.get_users()) return researcher.user for iden in data["identifiers"]: @@ -28,7 +30,11 @@ def check_mapping_user( continue # filter by eppn - user = self.cache.model(ProjectUser, email=iden["value"]) + user = self.cache.model( + ProjectUser, + email=iden["value"], + groups__in=(self.config.organization.get_users(),), + ) # create only user if we have eppn given_name, family_name = self.get_names(data) @@ -38,12 +44,11 @@ def check_mapping_user( given_name=given_name, family_name=family_name, ) + user.groups.add(self.config.organization.get_users()) return user return None def single(self, data: dict) -> Researcher: - researcher = self.cache.model(Researcher, crisalid_uid=data["uid"]) - researcher_identifiers = [] for iden in data["identifiers"]: identifier = self.cache.model( @@ -52,6 +57,16 @@ def single(self, data: dict) -> Researcher: self.cache.save(identifier) researcher_identifiers.append(identifier) + # remove local identifiers to match only hal/eppn/orcid ..ect + researcher_identifiers_without_local = [ + identifier + for identifier in researcher_identifiers + if identifier.harvester != Identifier.Harvester.LOCAL + ] + researcher = self.cache.indentifiers( + Researcher, researcher_identifiers_without_local + ) + user = self.check_mapping_user(researcher, data) self.cache.save(researcher, display_name=data["display_name"], user=user) self.cache.save_m2m(researcher, identifiers=researcher_identifiers) diff --git a/services/crisalid/signals.py b/services/crisalid/signals.py new file mode 100644 index 00000000..22e01257 --- /dev/null +++ b/services/crisalid/signals.py @@ -0,0 +1,21 @@ +from django.db.models.signals import post_delete, post_save +from django.dispatch import receiver + +from services.crisalid.apps import CrisalidConfig +from services.crisalid.bus.organization import ( + remove_crisalidbus, + start_crisalidbus, +) + + +@receiver(post_save, sender=CrisalidConfig) +def on_save(sender, instance, **kwargs): + if instance.active: + start_crisalidbus(instance) + else: + remove_crisalidbus(instance) + + +@receiver(post_delete, sender=CrisalidConfig) +def on_delete(sender, instance, **kwargs): + remove_crisalidbus(instance) diff --git a/services/crisalid/tasks.py b/services/crisalid/tasks.py index 6c4252c5..0e0afb86 100644 --- a/services/crisalid/tasks.py +++ b/services/crisalid/tasks.py @@ -1,13 +1,23 @@ import logging from projects.celery import app -from services.crisalid.crisalid_bus import CrisalidEventEnum, CrisalidTypeEnum, on_event + +from services.crisalid.apps import CrisalidConfig +from services.crisalid.bus.constant import CrisalidEventEnum, CrisalidTypeEnum +from services.crisalid.bus.consumer import on_event from services.crisalid.interface import CrisalidService from services.crisalid.models import Document, Researcher from services.crisalid.populates import PopulateDocument, PopulateResearcher logger = logging.getLogger(__name__) + +def get_crisalid_config(crisalid_config_id: int) -> CrisalidConfig: + return CrisalidConfig.objects.get(id=crisalid_config_id).selected_related( + "organization" + ) + + # https://github.com/CRISalid-esr/crisalid-ikg/blob/dev-main/app/amqp/amqp_person_event_message_factory.py#L28 # https://github.com/CRISalid-esr/crisalid-ikg/blob/dev-main/app/amqp/amqp_document_event_message_factory.py#L37 @@ -15,29 +25,34 @@ @on_event(CrisalidTypeEnum.PERSON, CrisalidEventEnum.CREATED) @on_event(CrisalidTypeEnum.PERSON, CrisalidEventEnum.UPDATED) @app.task(name=f"{__name__}.create_person") -def create_person(fields: dict): - logger.info("receive %s", fields) +def create_person(crisalid_config_id: int, fields: dict): + config = get_crisalid_config(crisalid_config_id) + logger.error("receive %s for organization %s", fields, config.organization) - populate = PopulateResearcher() + populate = PopulateResearcher(config) populate.single(fields) @on_event(CrisalidTypeEnum.PERSON, CrisalidEventEnum.DELETED) @app.task(name=f"{__name__}.delete_person") -def delete_person(fields: dict): - logger.info("receive %s", fields) +def delete_person(crisalid_config_id: int, fields: dict): + config = get_crisalid_config(crisalid_config_id) + logger.error("receive %s for organization %s", fields, config.organization) - deleted = Researcher.objects.filter(crisalid_uid=fields["uid"]).delete() + deleted = Researcher.objects.filter( + crisalid_uid=fields["uid"], user__groups__in=(config.organization.get_users(),) + ).delete() logger.info("deleted = %s", deleted) @on_event(CrisalidTypeEnum.DOCUMENT, CrisalidEventEnum.CREATED) @on_event(CrisalidTypeEnum.DOCUMENT, CrisalidEventEnum.UPDATED) @app.task(name=f"{__name__}.create_document") -def create_document(fields: dict): - logger.info("receive %s", fields) +def create_document(crisalid_config_id: int, fields: dict): + config = get_crisalid_config(crisalid_config_id) + logger.error("receive %s for organization %s", fields, config.organization) - service = CrisalidService() + service = CrisalidService(config) # fetch data from apollo data = service.query( @@ -47,14 +62,15 @@ def create_document(fields: dict): logger.warning("no result fetching crisalid_uid=%s", fields["uid"]) return - populate = PopulateDocument() + populate = PopulateDocument(config) populate.single(data[0]) @on_event(CrisalidTypeEnum.DOCUMENT, CrisalidEventEnum.DELETED) @app.task(name=f"{__name__}.delete_document") -def delete_document(fields: dict): - logger.error("receive %s", fields) +def delete_document(crisalid_config_id: int, fields: dict): + config = get_crisalid_config(crisalid_config_id) + logger.error("receive %s for organization %s", fields, config.organization) deleted = Document.objects.filter(crisalid_uid=fields["uid"]).delete() logger.info("deleted = %s", deleted) diff --git a/services/crisalid/tests/test_crisalid_bus.py b/services/crisalid/tests/test_crisalid_bus.py index b219b9ce..c4c65ce8 100644 --- a/services/crisalid/tests/test_crisalid_bus.py +++ b/services/crisalid/tests/test_crisalid_bus.py @@ -3,11 +3,9 @@ from django import test -from services.crisalid.crisalid_bus import ( - CrisalidBusClient, - CrisalidEventEnum, - CrisalidTypeEnum, -) +from services.crisalid.bus.client import CrisalidBusClient +from services.crisalid.bus.constant import CrisalidEventEnum, CrisalidTypeEnum +from services.crisalid.bus.consumer import crisalid_consumer class TestCrisalidBus(test.TestCase): @@ -28,6 +26,7 @@ def setUpClass(cls): def setUp(self): self.client = CrisalidBusClient() + crisalid_consumer.clean() def test_dispatch_no_callback(self): # this run withtout called any callback @@ -35,7 +34,7 @@ def test_dispatch_no_callback(self): def test_dispatch_with_callback(self): callback = Mock() - self.client.add_callback( + crisalid_consumer.add_callback( CrisalidTypeEnum.DOCUMENT, CrisalidEventEnum.CREATED, callback ) @@ -47,19 +46,19 @@ def test_dispatch_with_callback(self): def test_add_callback(self): callback = Mock() - self.client.add_callback( + crisalid_consumer.add_callback( CrisalidTypeEnum.DOCUMENT, CrisalidEventEnum.CREATED, callback ) # try to readd this callback, raise a exception with self.assertRaises(AssertionError): - self.client.add_callback( + crisalid_consumer.add_callback( CrisalidTypeEnum.DOCUMENT, CrisalidEventEnum.CREATED, callback ) def test_validated_payload(self): callback = Mock() - self.client.add_callback( + crisalid_consumer.add_callback( CrisalidTypeEnum.DOCUMENT, CrisalidEventEnum.CREATED, callback ) diff --git a/services/crisalid/tests/test_populate.py b/services/crisalid/tests/test_populate.py index 2e2c5d51..577680c8 100644 --- a/services/crisalid/tests/test_populate.py +++ b/services/crisalid/tests/test_populate.py @@ -1,15 +1,21 @@ import datetime +from apps.accounts.models import ProjectUser from django import test -from apps.accounts.models import ProjectUser +from services.crisalid.factories import CrisalidConfigFactory from services.crisalid.models import Document, Identifier, Researcher from services.crisalid.populates import PopulateDocument, PopulateResearcher class TestPopulateResearcher(test.TestCase): + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.config = CrisalidConfigFactory() + def test_create_researcher(self): - popu = PopulateResearcher() + popu = PopulateResearcher(self.config) data = { "uid": "05-11-1995-uuid", "display_name": "marty mcfly", @@ -47,7 +53,7 @@ def test_no_change_researcher(self): value="hals-truc", harvester=Identifier.Harvester.HAL.value ) researcher.identifiers.add(iden) - popu = PopulateResearcher() + popu = PopulateResearcher(self.config) new_obj = popu.single(data) @@ -86,7 +92,7 @@ def test_update_identifiers(self): data["identifiers"].append( {"value": "000-666-999", "type": Identifier.Harvester.ORCID.value} ) - popu = PopulateResearcher() + popu = PopulateResearcher(self.config) popu.single(data) # check no new object are created @@ -110,7 +116,7 @@ def test_create_user_researcher(self): {"value": "eppn@lpi.com", "type": Identifier.Harvester.EPPN.value}, ], } - popu = PopulateResearcher() + popu = PopulateResearcher(self.config) popu.single(data) user = ProjectUser.objects.first() @@ -133,7 +139,7 @@ def test_match_user_researcher(self): # a project user already exists with same eepn user = ProjectUser.objects.create(email="eppn@lpi.com") - popu = PopulateResearcher() + popu = PopulateResearcher(self.config) popu.single(data) researcher = Researcher.objects.first() @@ -146,8 +152,13 @@ def test_match_user_researcher(self): class TestPopulateDocument(test.TestCase): + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.config = CrisalidConfigFactory() + def test_create_publication(self): - popu = PopulateDocument() + popu = PopulateDocument(self.config) data = { "uid": "05-11-1995-uuid", "document_type": None, @@ -208,7 +219,7 @@ def test_create_publication(self): self.assertEqual(iden.harvester, Identifier.Harvester.HAL.value) def test_sanitize_date(self): - popu = PopulateDocument() + popu = PopulateDocument(self.config) self.assertEqual( popu.sanitize_date("1999"), datetime.datetime(1999, 1, 1).date() @@ -224,7 +235,7 @@ def test_sanitize_date(self): self.assertEqual(popu.sanitize_date("invalidDate"), None) def test_sanitize_titles(self): - popu = PopulateDocument() + popu = PopulateDocument(self.config) self.assertEqual(popu.sanitize_languages([]), "") self.assertEqual( @@ -255,7 +266,7 @@ def test_sanitize_titles(self): ) def test_sanitize_document_type(self): - popu = PopulateDocument() + popu = PopulateDocument(self.config) self.assertEqual( popu.sanitize_document_type(None), diff --git a/services/crisalid/urls.py b/services/crisalid/urls.py index 8b7aeaff..ec65ac50 100644 --- a/services/crisalid/urls.py +++ b/services/crisalid/urls.py @@ -1,7 +1,10 @@ +from apps.commons.urls import ( + organization_researcher_router_register, + organization_router_register, +) from django.urls import include, path from rest_framework.routers import DefaultRouter -from apps.commons.urls import researcher_router_register from services.crisalid.views import ( ConferenceViewSet, PublicationViewSet, @@ -9,16 +12,17 @@ ) researcher_router = DefaultRouter() -researcher_router.register(r"researcher", ResearcherViewSet, basename="Researcher") -researcher_router_register( +organization_router_register(r"researcher", ResearcherViewSet, basename="Researcher") + +organization_researcher_router_register( researcher_router, r"publications", PublicationViewSet, basename="ResearcherPublications", ) -researcher_router_register( +organization_researcher_router_register( researcher_router, r"conferences", ConferenceViewSet, diff --git a/services/crisalid/views.py b/services/crisalid/views.py index dc0a63d3..ccde94cf 100644 --- a/services/crisalid/views.py +++ b/services/crisalid/views.py @@ -290,9 +290,14 @@ class ResearcherViewSet(viewsets.ReadOnlyModelViewSet): serializer_class = ResearcherSerializer filter_backends = (DjangoFilterBackend,) filterset_fields = ("user_id", "crisalid_uid", "id") - queryset = ( - Researcher.objects.all().prefetch_related("identifiers").select_related("user") - ) + + def get_queryset(self): + return ( + super() + .get_queryset() + .prefetch_related("identifiers") + .select_related("user") + ) @action( detail=False, From bc839fc4f82d9dac5a8f85e9779d45de2696c7f4 Mon Sep 17 00:00:00 2001 From: rgermain Date: Thu, 27 Nov 2025 18:30:13 +0100 Subject: [PATCH 02/32] fix(crisalid): add command to fetchresearch graphql --- .../management/commands/populate_crisalid.py | 52 +++++++++++++++---- services/crisalid/queries/people.graphql | 20 +++++++ 2 files changed, 61 insertions(+), 11 deletions(-) create mode 100644 services/crisalid/queries/people.graphql diff --git a/services/crisalid/management/commands/populate_crisalid.py b/services/crisalid/management/commands/populate_crisalid.py index 4c647ac2..b24943ee 100644 --- a/services/crisalid/management/commands/populate_crisalid.py +++ b/services/crisalid/management/commands/populate_crisalid.py @@ -9,7 +9,8 @@ Identifier, Researcher, ) -from services.crisalid.populates import PopulateDocument +from services.crisalid.populates import PopulateDocument, PopulateResearcher +from services.crisalid.populates.base import AbstractPopulate from services.crisalid.utils import timeit from services.mistral.models import DocumentEmbedding @@ -18,6 +19,11 @@ class Command(BaseCommand): help = "create or update data from researcher/Document crisalid neo4j/graphql" # noqa: A003 def add_arguments(self, parser): + parser.add_argument( + "command", + choices=("document", "researcher", "all"), + help="elements to populate", + ) parser.add_argument( "--delete", help="delete all crisalid models", @@ -41,26 +47,28 @@ def delete_crisalid_models(self): deleted = model.objects.all().delete() print(f"deleted {model=}: {deleted=}") - def handle(self, **options): - if options["delete"]: - self.delete_crisalid_models() - - service = CrisalidService() - populate = PopulateDocument() + def populate_crisalid( + self, + service: CrisalidService, + populate: AbstractPopulate, + query: str, + where: None, + **options, + ): offset = int(options["offset"]) limit = int(options["limit"]) max_elements = float(options["max"]) total = 0 - with timeit(print, "Populate All Data"): + with timeit(print, f"Populate All Data from '{query}'"): while max_elements >= 1: with timeit(print, "GrapQL request "): - data = service.query("documents", offset=offset, limit=limit)[ - "documents" - ] + data = service.query( + query, offset=offset, limit=limit, where=where + )[query] if not data: break @@ -72,3 +80,25 @@ def handle(self, **options): offset += limit max_elements -= 1 + + def handle(self, **options): + if options["delete"]: + self.delete_crisalid_models() + + command = options["command"] + service = CrisalidService() + + if command in ("all", "document"): + populate = PopulateDocument() + self.populate_crisalid(service, populate, query="documents", **options) + + if command in ("all", "researcher"): + populate = PopulateResearcher() + self.populate_crisalid( + service, + populate, + query="people", + # populate only local researcher + where={"external_EQ": False}, + **options, + ) diff --git a/services/crisalid/queries/people.graphql b/services/crisalid/queries/people.graphql new file mode 100644 index 00000000..2f17f160 --- /dev/null +++ b/services/crisalid/queries/people.graphql @@ -0,0 +1,20 @@ +query PopulateFromCrisalid($limit: Int, $offset: Int, $where: PersonWhere) { + people(limit: $limit, offset: $offset, where: $where) { + uid + display_name + names { + first_names { + language + value + } + last_names { + language + value + } + } + identifiers { + type + value + } + } +} From 72fea803f513cdc9a9d8d9ea771857f1e9592f61 Mon Sep 17 00:00:00 2001 From: rgermain Date: Fri, 28 Nov 2025 13:05:13 +0100 Subject: [PATCH 03/32] rework --- apps/commons/permissions.py | 12 ++- apps/commons/urls.py | 65 +++++--------- apps/commons/views.py | 13 ++- apps/organizations/urls.py | 6 +- projects/asgi.py | 11 +++ projects/settings/base.py | 2 + projects/wsgi.py | 11 +++ services/crisalid/admin.py | 23 ++--- services/crisalid/bus/client.py | 22 +++-- services/crisalid/bus/consumer.py | 6 ++ services/crisalid/bus/logger.py | 0 .../bus/{organization.py => runner.py} | 22 ++--- services/crisalid/factories.py | 64 ++++++------- ...ove_crisalidconfig_apollo_host_and_more.py | 37 ++++++++ ...d_document_unique_crisalid_uid_and_more.py | 53 +++++++++++ services/crisalid/models.py | 26 +++--- services/crisalid/populates/caches.py | 4 +- services/crisalid/populates/document.py | 2 +- services/crisalid/populates/researcher.py | 20 +++-- services/crisalid/serializers.py | 12 ++- services/crisalid/signals.py | 9 +- services/crisalid/tasks.py | 1 + services/crisalid/tests/test_crisalid_bus.py | 9 +- services/crisalid/tests/test_populate.py | 40 +++++---- services/crisalid/tests/test_views.py | 89 +++++++++++++++++-- services/crisalid/urls.py | 10 ++- services/crisalid/views.py | 19 ++-- 27 files changed, 396 insertions(+), 192 deletions(-) delete mode 100644 services/crisalid/bus/logger.py rename services/crisalid/bus/{organization.py => runner.py} (81%) create mode 100644 services/crisalid/migrations/0004_remove_crisalidconfig_apollo_host_and_more.py create mode 100644 services/crisalid/migrations/0005_remove_document_crisalid_document_unique_crisalid_uid_and_more.py diff --git a/apps/commons/permissions.py b/apps/commons/permissions.py index 97ef2bd9..af89a98a 100644 --- a/apps/commons/permissions.py +++ b/apps/commons/permissions.py @@ -68,7 +68,7 @@ def has_permission(self, request: Request, view: GenericViewSet) -> bool: return False def has_object_permission( - self, request: Request, view: GenericViewSet, obj: Union[HasOwner, HasOwners] + self, request: Request, view: GenericViewSet, obj: HasOwner | HasOwners ) -> bool: return request.user.is_authenticated and obj.is_owned_by(request.user) @@ -91,3 +91,13 @@ def has_object_permission( self, request: Request, view: GenericViewSet, obj ) -> bool: return self.has_permission(request, view) + + +class OrganizationPermission(permissions.BasePermission): + def has_permission(self, request: Request, view: GenericViewSet, obj=None) -> bool: + if request.user.is_superuser: + return True + grp = view.organization.get_users() + return request.user.groups.contains(grp) + + has_object_permission = has_permission diff --git a/apps/commons/urls.py b/apps/commons/urls.py index 4678e898..9a51fb04 100644 --- a/apps/commons/urls.py +++ b/apps/commons/urls.py @@ -1,3 +1,5 @@ +import os + from rest_framework.routers import DefaultRouter, DynamicRoute, Route from rest_framework.views import View @@ -60,76 +62,57 @@ class OneToOneExtendedRouter(OneToOneRouter, ExtendedRouter): """ +ORGANIZATION_PREFIX = r"organization/(?P[^/]+)" +PEOPLEGROUP_PREFIX = r"people-group/(?P[^/]+)" +RESEARCHER_PREFIX = r"researcher/(?P[^/]+)" +PROJECT_PREFIX = r"project/(?P[^/]+)" +USER_PREFIX = r"user/(?P[^/]+)" + + def organization_router_register( router: DefaultRouter, path: str, viewset: View, basename: str = None ): - prefix = r"organization/(?P[^/]+)" - if path: - prefix += r"/" + path - router.register(prefix, viewset, basename) + url = os.path.join(ORGANIZATION_PREFIX, path) + router.register(url, viewset, basename) def organization_project_router_register( router: DefaultRouter, path: str, viewset: View, basename: str = None ): - prefix = r"organization/(?P[^/]+)/project/(?P[^/]+)" - if path: - prefix += r"/" + path - router.register(prefix, viewset, basename) + url = os.path.join(ORGANIZATION_PREFIX, PROJECT_PREFIX, path) + router.register(url, viewset, basename) def project_router_register( router: DefaultRouter, path: str, viewset: View, basename: str = None ): - prefix = r"project/(?P[^/]+)" - if path: - prefix += r"/" + path - router.register(prefix, viewset, basename) + url = os.path.join(PROJECT_PREFIX, path) + router.register(url, viewset, basename) -def people_group_router_register( +def organization_people_group_router_register( router: DefaultRouter, path: str, viewset: View, basename: str = None ): - prefix = ( - r"organization/(?P[^/]+)/" - r"people-group/(?P[^/]+)" - ) - if path: - prefix += r"/" + path - router.register(prefix, viewset, basename) + url = os.path.join(ORGANIZATION_PREFIX, PEOPLEGROUP_PREFIX, path) + router.register(url, viewset, basename) def organization_user_router_register( router: DefaultRouter, path: str, viewset: View, basename: str = None ): - prefix = r"organization/(?P[^/]+)/user/(?P[^/]+)" - if path: - prefix += r"/" + path - router.register(prefix, viewset, basename) + url = os.path.join(ORGANIZATION_PREFIX, USER_PREFIX, path) + router.register(url, viewset, basename) def user_router_register( router: DefaultRouter, path: str, viewset: View, basename: str = None ): - prefix = r"user/(?P[^/]+)" - if path: - prefix += r"/" + path - router.register(prefix, viewset, basename) + url = os.path.join(USER_PREFIX, path) + router.register(url, viewset, basename) def organization_researcher_router_register( router: DefaultRouter, path: str, viewset: View, basename: str = None ): - prefix = r"organization/(?P[^/]+)/researcher/(?P[^/]+)" - if path: - prefix += r"/" + path - router.register(prefix, viewset, basename) - - -def researcher_router_register( - router: DefaultRouter, path: str, viewset: View, basename: str = None -): - prefix = r"researcher/(?P[^/]+)" - if path: - prefix += r"/" + path - router.register(prefix, viewset, basename) + url = os.path.join(ORGANIZATION_PREFIX, RESEARCHER_PREFIX, path) + router.register(url, viewset, basename) diff --git a/apps/commons/views.py b/apps/commons/views.py index 2b17a8e3..705f880d 100644 --- a/apps/commons/views.py +++ b/apps/commons/views.py @@ -5,6 +5,8 @@ from rest_framework.response import Response from rest_framework.settings import api_settings +from apps.organizations.models import Organization + from .mixins import HasMultipleIDs @@ -60,7 +62,7 @@ class ReadDestroyModelViewSet( class MultipleIDViewsetMixin: - multiple_lookup_fields: List[Tuple[HasMultipleIDs, str]] = [] + multiple_lookup_fields: list[tuple[HasMultipleIDs, str]] = [] def dispatch(self, request, *args, **kwargs): """ @@ -141,3 +143,12 @@ def get_paginated_list(self, queryset): queryset, many=True, context=self.get_serializer_context() ) return Response(serializer.data) + + +class NestedOrganizationViewMixins: + def initial(self, request, *args, **kwargs): + self.organization = get_object_or_404( + Organization, code=kwargs["organization_code"] + ) + + super().initial(request, *args, **kwargs) diff --git a/apps/organizations/urls.py b/apps/organizations/urls.py index 65e5bc1d..25fb0be3 100644 --- a/apps/organizations/urls.py +++ b/apps/organizations/urls.py @@ -7,8 +7,8 @@ ) from apps.commons.urls import ( OneToOneRouter, + organization_people_group_router_register, organization_router_register, - people_group_router_register, ) from apps.invitations.views import AccessRequestViewSet, InvitationViewSet @@ -68,9 +68,9 @@ ) one_to_one_router = OneToOneRouter() -people_group_router_register( +organization_people_group_router_register( one_to_one_router, r"logo", PeopleGroupLogoView, basename="PeopleGroup-logo" ) -people_group_router_register( +organization_people_group_router_register( one_to_one_router, r"header", PeopleGroupHeaderView, basename="PeopleGroup-header" ) diff --git a/projects/asgi.py b/projects/asgi.py index aae2493a..e59b7e26 100644 --- a/projects/asgi.py +++ b/projects/asgi.py @@ -7,6 +7,7 @@ https://docs.djangoproject.com/en/3.2/howto/deployment/asgi/ """ +import logging import os from django.core.asgi import get_asgi_application @@ -14,3 +15,13 @@ os.environ.setdefault("DJANGO_SETTINGS_MODULE", "projects.settings.base") application = get_asgi_application() + + +from django.conf import settings # noqa: E402 + +from services.crisalid.bus.runner import initial_start_crisalidbus # noqa: E402 + +if settings.ENABLE_CRISALID_BUS: + initial_start_crisalidbus() +else: + logging.info("CrisalidBus is not enabled") diff --git a/projects/settings/base.py b/projects/settings/base.py index 53ea32ca..2d559552 100644 --- a/projects/settings/base.py +++ b/projects/settings/base.py @@ -690,3 +690,5 @@ AZURE_TRANSLATOR_ENDPOINT = os.getenv( "AZURE_TRANSLATOR_ENDPOINT", "https://api.cognitive.microsofttranslator.com" ) + +ENABLE_CRISALID_BUS = os.getenv("ENABLE_CRISALID_BUS", "false").lower() == "true" diff --git a/projects/wsgi.py b/projects/wsgi.py index f44e3d22..65b0d225 100644 --- a/projects/wsgi.py +++ b/projects/wsgi.py @@ -7,6 +7,7 @@ https://docs.djangoproject.com/en/3.2/howto/deployment/wsgi/ """ +import logging import os from django.core.wsgi import get_wsgi_application @@ -14,3 +15,13 @@ os.environ.setdefault("DJANGO_SETTINGS_MODULE", "projects.settings.base") application = get_wsgi_application() + + +from django.conf import settings # noqa: E402 + +from services.crisalid.bus.runner import initial_start_crisalidbus # noqa: E402 + +if settings.ENABLE_CRISALID_BUS: + initial_start_crisalidbus() +else: + logging.info("CrisalidBus is not enabled") diff --git a/services/crisalid/admin.py b/services/crisalid/admin.py index f6e92988..61409aec 100644 --- a/services/crisalid/admin.py +++ b/services/crisalid/admin.py @@ -1,5 +1,3 @@ -from typing import Any, Optional - from apps.accounts.models import ProjectUser from django import forms from django.contrib import admin @@ -53,7 +51,8 @@ class DocumentAdmin(admin.ModelAdmin): "title", "publication_date", "document_type", - "contributors__display_name", + "contributors__given_name", + "contributors__family_name", "identifiers__value", "identifiers__harvester", ) @@ -89,13 +88,13 @@ def get_identifiers(self, instance): class ResearcherAdmin(admin.ModelAdmin): list_display = ( - "display_name", + "given_name", + "family_name", "user", "get_documents", "get_identifiers", ) search_fields = ( - "display_name", "user__given_name", "user__family_name", "identifiers__value", @@ -130,18 +129,10 @@ def assign_user(self, request, queryset): user = ProjectUser.objects.filter(email=identifier.value) if not user: - # TODO(remi): create 2 field in models researcher ? - given_name, family_name = "", "" - splitter = research.display_name.split(" ", 1) - if len(splitter) >= 1: - given_name = splitter[0] - if len(splitter) >= 2: - given_name = " ".join(splitter[1:]) - user = ProjectUser( email=identifier.value, - given_name=given_name, - family_name=family_name, + given_name=research.given_name, + family_name=research.family_name, ) user.save() @@ -176,7 +167,7 @@ class Meta: class CrisalidConfigAdmin(admin.ModelAdmin): list_display = ("organization", "active") - search_fields = ("organization", "active") + search_fields = ("organization__code", "active") autocomplete_fields = ("organization",) form = CrisalidConfigForm diff --git a/services/crisalid/bus/client.py b/services/crisalid/bus/client.py index 5426ff45..fea0c310 100644 --- a/services/crisalid/bus/client.py +++ b/services/crisalid/bus/client.py @@ -1,16 +1,12 @@ import json import logging import time -from collections.abc import Callable import jsonschema import pika from urllib3.util import parse_url -from services.crisalid.bus.constant import ( - CRISALID_MESSAGE_SCHEMA, - CrisalidEventEnum, -) +from services.crisalid.bus.constant import CRISALID_MESSAGE_SCHEMA, CrisalidEventEnum from services.crisalid.models import CrisalidConfig from .consumer import crisalid_consumer @@ -39,13 +35,14 @@ def __init__(self, config: CrisalidConfig): self._run: bool = True self.logger = logging.getLogger(config.organization.code) - def connect(self): - assert self.conn is None, "rabimqt is already started" + def parameters(self) -> dict | None: + """generate parametrs for crislaid and check values""" + # url is complte (ex: "http://crisalid:4325") + # get url without port, and set port for pika url = parse_url(self.config.crisalidbus_url) - parameters = { - "host": url.host, + "host": url.url, "port": url.port, "user": self.config.crisalidbus_username, "password": self.config.crisalidbus_password, @@ -60,6 +57,13 @@ def connect(self): ) return + return parameters + + def connect(self): + assert self.conn is None, "rabimqt is already started" + + parameters = self.parameters() + retry = 1 # run in loop to retry when connection is lost while self._run: diff --git a/services/crisalid/bus/consumer.py b/services/crisalid/bus/consumer.py index acfdcfdb..e16095de 100644 --- a/services/crisalid/bus/consumer.py +++ b/services/crisalid/bus/consumer.py @@ -11,10 +11,13 @@ class CrisalidConsumer: + """class to register callback on rabitmqt event""" + def __init__(self): self.clean() def clean(self): + """remove all registered callback""" self._consumer: dict[CrisalidTypeEnum, dict[CrisalidEventEnum, Callable]] = ( defaultdict(lambda: defaultdict(lambda: None)) ) @@ -33,6 +36,9 @@ def add_callback( self._consumer[crisalid_type.value][crisalid_event.value] = callback return callback + def __getitem__(self, key): + return self._consumer[key] + crisalid_consumer = CrisalidConsumer() diff --git a/services/crisalid/bus/logger.py b/services/crisalid/bus/logger.py deleted file mode 100644 index e69de29b..00000000 diff --git a/services/crisalid/bus/organization.py b/services/crisalid/bus/runner.py similarity index 81% rename from services/crisalid/bus/organization.py rename to services/crisalid/bus/runner.py index 13d8d7dc..a18361d2 100644 --- a/services/crisalid/bus/organization.py +++ b/services/crisalid/bus/runner.py @@ -46,13 +46,11 @@ def stop(self): def start_crisalidbus(config: CrisalidConfig): with rlock: client = organization_maps.get(config.organization.code) - if client is None: - client = OrganizationClient(config) - organization_maps[config.organization.code] = client - else: - client.stop() - client.config = config + if client is not None: + stop_crisalidbus(client.config) + client = OrganizationClient(config) + organization_maps[config.organization.code] = client client.start() @@ -67,13 +65,17 @@ def stop_crisalidbus(config: CrisalidConfig): def delete_crisalidbus(config: CrisalidConfig): with rlock: - client = organization_maps.get(config.organization.code) - if client is None: - return - client.stop() + stop_crisalidbus(config) del organization_maps[config.organization.code] +def initial_start_crisalidbus(): + """ "first start all thread (when server web is started)""" + with rlock: + for config in CrisalidConfig.objects.filter(active=True): + start_crisalidbus(config) + + # safe stop all crisalid bus @atexit.register def _stop_all_crisalid(): diff --git a/services/crisalid/factories.py b/services/crisalid/factories.py index 988755b4..3f0de294 100644 --- a/services/crisalid/factories.py +++ b/services/crisalid/factories.py @@ -16,46 +16,33 @@ faker = Faker() -HAL = "hal" -SCANR = "scanr" -OPENALEX = "openalex" - -IDREF = "idref" -SCOPUS = "scopus" -ORCID = "orcid" -LOCAL = "local" -EPPN = "eppn" -DOI = "doi" -PMID = "pmid" - - -def harvester_values(harvester_type: Identifier.Harvester): - return { - Identifier.Harvester.HAL: factory.Faker("url"), - Identifier.Harvester.SCANR: factory.Faker("url"), - Identifier.Harvester.OPENALEX: factory.Faker("url"), - Identifier.Harvester.IDREF: factory.Faker("uuid4"), - Identifier.Harvester.SCOPUS: factory.Faker("uuid4"), - Identifier.Harvester.ORCID: factory.Faker("uuid4"), - Identifier.Harvester.LOCAL: factory.Faker("uuid4"), - Identifier.Harvester.EPPN: factory.Faker("email"), - Identifier.Harvester.DOI: factory.Faker("doi"), - Identifier.Harvester.PMID: factory.Faker("url"), - }[harvester_type] - class IdentifierFactory(factory.django.DjangoModelFactory): harvester = Identifier.Harvester.EPPN - value = harvester_values(harvester) class Meta: model = Identifier + @factory.lazy_attribute + def value(self): + return { + Identifier.Harvester.HAL: faker.unique.url(), + Identifier.Harvester.SCANR: faker.unique.url(), + Identifier.Harvester.OPENALEX: faker.unique.url(), + Identifier.Harvester.IDREF: faker.unique.uuid4(), + Identifier.Harvester.SCOPUS: faker.unique.uuid4(), + Identifier.Harvester.ORCID: faker.unique.uuid4(), + Identifier.Harvester.LOCAL: faker.unique.uuid4(), + Identifier.Harvester.EPPN: faker.unique.email(), + Identifier.Harvester.DOI: faker.unique.doi(), + Identifier.Harvester.PMID: faker.unique.url(), + }[self.harvester] + class ResearcherFactory(factory.django.DjangoModelFactory): - crisalid_uid = factory.Faker("uuid4") user = factory.LazyFunction(lambda: UserFactory()) - display_name = f"{factory.Faker("first_name")} {factory.Faker("last_name")}" + given_name = faker.first_name() + family_name = faker.last_name() class Meta: model = Researcher @@ -71,10 +58,9 @@ def identifiers(self, create, extracted, **kwargs): class DocumentFactory(factory.django.DjangoModelFactory): - crisalid_uid = factory.Faker("uuid4") - title = factory.Faker("sentence", nb_words=5) - description = factory.Faker("text") - publication_date = factory.Faker("date_time") + title = faker.sentence(nb_words=5) + description = faker.text() + publication_date = faker.date_time() document_type = FuzzyChoice( Document.DocumentType.choices, getter=lambda obj: obj[0] ) @@ -103,11 +89,11 @@ class Meta: class CrisalidConfigFactory(factory.django.DjangoModelFactory): organization = factory.LazyFunction(lambda: OrganizationFactory()) - crisalidbus_host = factory.Factory("url") - crisalidbus_username = factory.Factory("username") - crisalidbus_password = factory.Factory("password") - apollo_host = factory.Factory("url") - apollo_token = factory.Factory("password") + crisalidbus_url = faker.url() + crisalidbus_username = faker.user_name() + crisalidbus_password = faker.password() + apollo_url = faker.url() + apollo_token = faker.password() active = True class Meta: diff --git a/services/crisalid/migrations/0004_remove_crisalidconfig_apollo_host_and_more.py b/services/crisalid/migrations/0004_remove_crisalidconfig_apollo_host_and_more.py new file mode 100644 index 00000000..a53d4534 --- /dev/null +++ b/services/crisalid/migrations/0004_remove_crisalidconfig_apollo_host_and_more.py @@ -0,0 +1,37 @@ +# Generated by Django 4.2.25 on 2025-11-28 08:55 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("crisalid", "0003_alter_crisalidconfig_apollo_host_and_more"), + ] + + operations = [ + migrations.RemoveField( + model_name="crisalidconfig", + name="apollo_host", + ), + migrations.RemoveField( + model_name="crisalidconfig", + name="crisalidbus_host", + ), + migrations.AddField( + model_name="crisalidconfig", + name="apollo_url", + field=models.CharField( + default="", help_text="apollo/graphql host:port", max_length=255 + ), + preserve_default=False, + ), + migrations.AddField( + model_name="crisalidconfig", + name="crisalidbus_url", + field=models.CharField( + default="", help_text="crisalidbus/rabimqt host:port", max_length=255 + ), + preserve_default=False, + ), + ] diff --git a/services/crisalid/migrations/0005_remove_document_crisalid_document_unique_crisalid_uid_and_more.py b/services/crisalid/migrations/0005_remove_document_crisalid_document_unique_crisalid_uid_and_more.py new file mode 100644 index 00000000..d7b214bc --- /dev/null +++ b/services/crisalid/migrations/0005_remove_document_crisalid_document_unique_crisalid_uid_and_more.py @@ -0,0 +1,53 @@ +# Generated by Django 4.2.25 on 2025-11-28 10:16 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("crisalid", "0004_remove_crisalidconfig_apollo_host_and_more"), + ] + + operations = [ + migrations.RemoveConstraint( + model_name="document", + name="crisalid_document_unique_crisalid_uid", + ), + migrations.RemoveConstraint( + model_name="researcher", + name="crisalid_researcher_unique_crisalid_uid", + ), + migrations.RemoveField( + model_name="document", + name="crisalid_uid", + ), + migrations.RemoveField( + model_name="researcher", + name="crisalid_uid", + ), + migrations.RemoveField( + model_name="researcher", + name="display_name", + ), + migrations.AddField( + model_name="document", + name="updated", + field=models.DateTimeField(auto_created=True, auto_now=True), + ), + migrations.AddField( + model_name="researcher", + name="family_name", + field=models.CharField(blank=True, max_length=255), + ), + migrations.AddField( + model_name="researcher", + name="given_name", + field=models.CharField(blank=True, max_length=255), + ), + migrations.AddField( + model_name="researcher", + name="updated", + field=models.DateTimeField(auto_created=True, auto_now=True), + ), + ] diff --git a/services/crisalid/models.py b/services/crisalid/models.py index 7d076d32..16dcae3e 100644 --- a/services/crisalid/models.py +++ b/services/crisalid/models.py @@ -1,11 +1,11 @@ from collections.abc import Generator -from apps.commons.mixins import OrganizationRelated from django import forms from django.contrib.postgres.fields import ArrayField from django.db import models from django.db.models.functions import Lower +from apps.commons.mixins import OrganizationRelated from services.crisalid import relators from services.mistral.models import DocumentEmbedding from services.translator.mixins import HasAutoTranslatedFields @@ -37,17 +37,10 @@ def formfield(self, **kwargs): class CrisalidDataModel(models.Model): - crisalid_uid = models.CharField( - max_length=255, blank=True, null=True, db_index=True - ) + updated = models.DateTimeField(auto_created=True, auto_now=True) class Meta: abstract = True - constraints = ( - models.UniqueConstraint( - "crisalid_uid", name="%(app_label)s_%(class)s_unique_crisalid_uid" - ), - ) class Identifier(models.Model): @@ -90,7 +83,8 @@ class Researcher(CrisalidDataModel): # if no user linked to projects null=True, ) - display_name = models.CharField(max_length=200, blank=True, null=True) + given_name = models.CharField(max_length=255, blank=True) + family_name = models.CharField(max_length=255, blank=True) identifiers = models.ManyToManyField( "crisalid.Identifier", related_name="researchers" ) @@ -98,7 +92,10 @@ class Researcher(CrisalidDataModel): def __str__(self): if hasattr(self, "user") and self.user is not None: return self.user.get_full_name() - return f"{self.display_name}" + return self.get_full_name() + + def get_full_name(self): + return f"{self.given_name.capitalize()} {self.family_name.capitalize()}".strip() class DocumentContributor(models.Model): @@ -280,8 +277,9 @@ class CrisalidConfig(OrganizationRelated, models.Model): related_name="crisalid", ) - crisalidbus_host = models.URLField(help_text="crisalidbus/rabimqt host:port") - # crisalidbu_port = models.URLField(help_text="crisalidbus/rabimqt host:port") + crisalidbus_url = models.CharField( + max_length=255, help_text="crisalidbus/rabimqt host:port" + ) crisalidbus_username = models.CharField( max_length=255, help_text="crisalidbus/rabimqt username" ) @@ -289,7 +287,7 @@ class CrisalidConfig(OrganizationRelated, models.Model): max_length=255, help_text="crisalidbus/rabimqt password" ) - apollo_host = models.URLField(help_text="apollo/graphql host:port") + apollo_url = models.CharField(max_length=255, help_text="apollo/graphql host:port") apollo_token = models.CharField(max_length=255, help_text="apollo token") active = models.BooleanField(help_text="config is enabled/disabled", default=False) diff --git a/services/crisalid/populates/caches.py b/services/crisalid/populates/caches.py index 60389039..79d9333a 100644 --- a/services/crisalid/populates/caches.py +++ b/services/crisalid/populates/caches.py @@ -25,7 +25,7 @@ def model(self, model, *fields): """get object element from model/fields""" @abc.abstractclassmethod - def indentifiers(self, model, identifiers: list[Identifier]): + def from_identifiers(self, model, identifiers: list[Identifier]): """get object element from identifiers lists""" @@ -56,7 +56,7 @@ def model(self, model: Model, **fields): except model.DoesNotExist: return model(**fields) - def identifiers(self, model, identifiers): + def from_identifiers(self, model, identifiers): try: return model.objects.filter(identifiers__in=identifiers).distinct().get() except model.DoesNotExist: diff --git a/services/crisalid/populates/document.py b/services/crisalid/populates/document.py index 385d06eb..2827a1a8 100644 --- a/services/crisalid/populates/document.py +++ b/services/crisalid/populates/document.py @@ -47,7 +47,7 @@ def single(self, data: dict): self.cache.save(identifier) documents_identifiers.append(identifier) - document = self.cache.indentifiers(Document, documents_identifiers) + document = self.cache.from_identifiers(Document, documents_identifiers) self.cache.save( document, title=self.sanitize_languages(data["titles"]), diff --git a/services/crisalid/populates/researcher.py b/services/crisalid/populates/researcher.py index 1032f842..01ae538c 100644 --- a/services/crisalid/populates/researcher.py +++ b/services/crisalid/populates/researcher.py @@ -21,8 +21,10 @@ def check_mapping_user( ) -> ProjectUser | None: """match user from researcher (need eppn)""" + group_organization = self.config.organization.get_users() + if researcher.user: - researcher.user.groups.add(self.config.organization.get_users()) + researcher.user.groups.add(group_organization) return researcher.user for iden in data["identifiers"]: @@ -33,7 +35,6 @@ def check_mapping_user( user = self.cache.model( ProjectUser, email=iden["value"], - groups__in=(self.config.organization.get_users(),), ) # create only user if we have eppn @@ -44,7 +45,7 @@ def check_mapping_user( given_name=given_name, family_name=family_name, ) - user.groups.add(self.config.organization.get_users()) + user.groups.add(group_organization) return user return None @@ -57,18 +58,23 @@ def single(self, data: dict) -> Researcher: self.cache.save(identifier) researcher_identifiers.append(identifier) - # remove local identifiers to match only hal/eppn/orcid ..ect + # remove local/eppn identifiers to match only hal/eppn/orcid ..ect researcher_identifiers_without_local = [ identifier for identifier in researcher_identifiers - if identifier.harvester != Identifier.Harvester.LOCAL + if identifier.harvester + not in [Identifier.Harvester.LOCAL, Identifier.Harvester.EPPN] ] - researcher = self.cache.indentifiers( + researcher = self.cache.from_identifiers( Researcher, researcher_identifiers_without_local ) user = self.check_mapping_user(researcher, data) - self.cache.save(researcher, display_name=data["display_name"], user=user) + given_name, family_name = self.get_names(data) + + self.cache.save( + researcher, given_name=given_name, family_name=family_name, user=user + ) self.cache.save_m2m(researcher, identifiers=researcher_identifiers) return researcher diff --git a/services/crisalid/serializers.py b/services/crisalid/serializers.py index 26da69ed..a58df16f 100644 --- a/services/crisalid/serializers.py +++ b/services/crisalid/serializers.py @@ -1,6 +1,6 @@ +from apps.accounts.models import ProjectUser from rest_framework import serializers -from apps.accounts.models import ProjectUser from services.crisalid.models import Document, Identifier, Researcher from services.translator.serializers import AutoTranslatedModelSerializer @@ -40,7 +40,7 @@ class ResearcherSerializer(serializers.ModelSerializer): class Meta: model = Researcher - exclude = ("crisalid_uid",) + exclude = ("updated",) def get_display_name(self, instance): return str(instance) @@ -49,16 +49,20 @@ def get_display_name(self, instance): class ResearcherDocumentsSerializer(ResearcherSerializer): user = ProjectUserMinimalSerializer() identifiers = IdentifierSerializer(many=True) + display_name = serializers.SerializerMethodField() class Meta: model = Researcher + read_only_fields = ("display_name",) fields = ( "identifiers", - "display_name", "user", "id", ) + def get_display_name(self, instance): + return str(instance) + class DocumentLightSerializer(AutoTranslatedModelSerializer): class Meta: @@ -73,7 +77,7 @@ class DocumentSerializer(DocumentLightSerializer): class Meta: model = Document - exclude = ("crisalid_uid",) + exclude = ("updated",) def get_similars(self, instance: Document): """return similar count""" diff --git a/services/crisalid/signals.py b/services/crisalid/signals.py index 22e01257..eec11e09 100644 --- a/services/crisalid/signals.py +++ b/services/crisalid/signals.py @@ -2,10 +2,7 @@ from django.dispatch import receiver from services.crisalid.apps import CrisalidConfig -from services.crisalid.bus.organization import ( - remove_crisalidbus, - start_crisalidbus, -) +from services.crisalid.bus.runner import delete_crisalidbus, start_crisalidbus @receiver(post_save, sender=CrisalidConfig) @@ -13,9 +10,9 @@ def on_save(sender, instance, **kwargs): if instance.active: start_crisalidbus(instance) else: - remove_crisalidbus(instance) + delete_crisalidbus(instance) @receiver(post_delete, sender=CrisalidConfig) def on_delete(sender, instance, **kwargs): - remove_crisalidbus(instance) + delete_crisalidbus(instance) diff --git a/services/crisalid/tasks.py b/services/crisalid/tasks.py index 0e0afb86..0b21ebf4 100644 --- a/services/crisalid/tasks.py +++ b/services/crisalid/tasks.py @@ -39,6 +39,7 @@ def delete_person(crisalid_config_id: int, fields: dict): config = get_crisalid_config(crisalid_config_id) logger.error("receive %s for organization %s", fields, config.organization) + # TODO(remi): remove crisalid_uid deleted = Researcher.objects.filter( crisalid_uid=fields["uid"], user__groups__in=(config.organization.get_users(),) ).delete() diff --git a/services/crisalid/tests/test_crisalid_bus.py b/services/crisalid/tests/test_crisalid_bus.py index c4c65ce8..de20981f 100644 --- a/services/crisalid/tests/test_crisalid_bus.py +++ b/services/crisalid/tests/test_crisalid_bus.py @@ -6,6 +6,7 @@ from services.crisalid.bus.client import CrisalidBusClient from services.crisalid.bus.constant import CrisalidEventEnum, CrisalidTypeEnum from services.crisalid.bus.consumer import crisalid_consumer +from services.crisalid.factories import CrisalidConfigFactory class TestCrisalidBus(test.TestCase): @@ -24,8 +25,10 @@ def setUpClass(cls): cls.properties = Mock() cls.method = Mock() + cls.config = CrisalidConfigFactory() + def setUp(self): - self.client = CrisalidBusClient() + self.client = CrisalidBusClient(self.config) crisalid_consumer.clean() def test_dispatch_no_callback(self): @@ -42,7 +45,9 @@ def test_dispatch_with_callback(self): self.client._dispatch(self.chanel, self.properties, self.method, self.payload) # normaly is called - callback.assert_called_once_with(json.loads(self.payload)["fields"]) + callback.assert_called_once_with( + self.config.organization.pk, json.loads(self.payload)["fields"] + ) def test_add_callback(self): callback = Mock() diff --git a/services/crisalid/tests/test_populate.py b/services/crisalid/tests/test_populate.py index 577680c8..8bc30b6e 100644 --- a/services/crisalid/tests/test_populate.py +++ b/services/crisalid/tests/test_populate.py @@ -18,7 +18,12 @@ def test_create_researcher(self): popu = PopulateResearcher(self.config) data = { "uid": "05-11-1995-uuid", - "display_name": "marty mcfly", + "names": [ + { + "first_names": [{"value": "marty", "language": "fr"}], + "last_names": [{"value": "mcfly", "language": "fr"}], + } + ], "identifiers": [ {"value": "hals-truc", "type": Identifier.Harvester.HAL.value} ], @@ -30,8 +35,8 @@ def test_create_researcher(self): obj = Researcher.objects.first() self.assertEqual(obj, new_obj) - self.assertEqual(obj.display_name, "marty mcfly") - self.assertEqual(obj.crisalid_uid, "05-11-1995-uuid") + self.assertEqual(obj.given_name, "marty") + self.assertEqual(obj.family_name, "mcfly") self.assertEqual(obj.identifiers.count(), 1) iden = obj.identifiers.first() self.assertEqual(iden.value, "hals-truc") @@ -40,15 +45,18 @@ def test_create_researcher(self): def test_no_change_researcher(self): data = { "uid": "05-11-1995-uuid", - "display_name": "marty mcfly", + "names": [ + { + "first_names": [{"value": "marty", "language": "fr"}], + "last_names": [{"value": "mcfly", "language": "fr"}], + } + ], "identifiers": [ {"value": "hals-truc", "type": Identifier.Harvester.HAL.value} ], } # create same object in db - researcher = Researcher.objects.create( - crisalid_uid=data["uid"], display_name=data["display_name"] - ) + researcher = Researcher.objects.create(given_name="marty", family_name="mcfly") iden = Identifier.objects.create( value="hals-truc", harvester=Identifier.Harvester.HAL.value ) @@ -65,8 +73,6 @@ def test_no_change_researcher(self): obj = Researcher.objects.first() self.assertEqual(new_obj, obj) - self.assertEqual(obj.display_name, "marty mcfly") - self.assertEqual(obj.crisalid_uid, "05-11-1995-uuid") self.assertEqual(obj.identifiers.count(), 1) iden = obj.identifiers.first() self.assertEqual(iden.value, "hals-truc") @@ -74,16 +80,18 @@ def test_no_change_researcher(self): def test_update_identifiers(self): data = { - "uid": "05-11-1995-uuid", - "display_name": "marty mcfly", + "names": [ + { + "first_names": [{"value": "marty", "language": "fr"}], + "last_names": [{"value": "mcfly", "language": "fr"}], + } + ], "identifiers": [ {"value": "hals-truc", "type": Identifier.Harvester.HAL.value} ], } # create same object in db - researcher = Researcher.objects.create( - crisalid_uid=data["uid"], display_name=data["display_name"] - ) + researcher = Researcher.objects.create(given_name="marty", family_name="mcfly") iden = Identifier.objects.create( value="hals-truc", harvester=Identifier.Harvester.HAL.value ) @@ -110,7 +118,6 @@ def test_create_user_researcher(self): "uid": "05-11-1995-uuid", "first_names": "marty", "last_names": "mcfly", - "display_name": "marty mcfly", "identifiers": [ {"value": "hals-truc", "type": Identifier.Harvester.HAL.value}, {"value": "eppn@lpi.com", "type": Identifier.Harvester.EPPN.value}, @@ -130,7 +137,6 @@ def test_match_user_researcher(self): "uid": "05-11-1995-uuid", "first_names": "marty", "last_names": "mcfly", - "display_name": "marty mcfly", "identifiers": [ {"value": "hals-truc", "type": Identifier.Harvester.HAL.value}, {"value": "eppn@lpi.com", "type": Identifier.Harvester.EPPN.value}, @@ -175,7 +181,6 @@ def test_create_publication(self): "contributor": [ { "uid": "local-v9034", - "display_name": "Marty Mcfly", "names": [ { "first_names": [ @@ -211,7 +216,6 @@ def test_create_publication(self): self.assertEqual(obj, new_obj) self.assertEqual(obj.title, "fiction") - self.assertEqual(obj.crisalid_uid, "05-11-1995-uuid") self.assertEqual(obj.identifiers.count(), 1) self.assertEqual(obj.document_type, Document.DocumentType.UNKNOWN.value) iden = obj.identifiers.first() diff --git a/services/crisalid/tests/test_views.py b/services/crisalid/tests/test_views.py index e549626b..ada0d85d 100644 --- a/services/crisalid/tests/test_views.py +++ b/services/crisalid/tests/test_views.py @@ -1,5 +1,6 @@ import datetime +from apps.organizations.factories import OrganizationFactory from django import test from django.urls import reverse from rest_framework import status @@ -19,9 +20,15 @@ class TestDocumentView(test.TestCase): def setUpClass(cls) -> None: super().setUpClass() + cls.organization = OrganizationFactory() + cls.researcher = ResearcherFactory() cls.researcher_2 = ResearcherFactory() + grp = cls.organization.get_users() + cls.researcher.user.groups.add(grp) + cls.researcher_2.user.groups.add(grp) + # only for researcher 1 for i in range(10): document = DocumentFactory( @@ -58,7 +65,13 @@ def setUpClass(cls) -> None: def test_get_publications(self): # researcher 1 result = self.client.get( - reverse("ResearcherPublications-list", args=(self.researcher.pk,)) + reverse( + "ResearcherPublications-list", + args=( + self.organization.code, + self.researcher.pk, + ), + ) ) result = result.json() @@ -67,7 +80,13 @@ def test_get_publications(self): # researcher 2 result = self.client.get( - reverse("ResearcherPublications-list", args=(self.researcher_2.pk,)) + reverse( + "ResearcherPublications-list", + args=( + self.organization.code, + self.researcher_2.pk, + ), + ) ) result = result.json() @@ -76,7 +95,13 @@ def test_get_publications(self): def test_get_analytics(self): result = self.client.get( - reverse("ResearcherPublications-analytics", args=(self.researcher.pk,)) + reverse( + "ResearcherPublications-analytics", + args=( + self.organization.code, + self.researcher.pk, + ), + ) ) data = result.json() @@ -100,7 +125,13 @@ def test_get_analytics(self): def test_get_analytics_limit(self): result = self.client.get( - reverse("ResearcherPublications-analytics", args=(self.researcher.pk,)) + reverse( + "ResearcherPublications-analytics", + args=( + self.organization.code, + self.researcher.pk, + ), + ) + "?limit=4" ) @@ -121,12 +152,29 @@ class TestResearcherView(test.TestCase): @classmethod def setUpClass(cls): super().setUpClass() + cls.organization = OrganizationFactory() + cls.organization_2 = OrganizationFactory() cls.researcher = ResearcherFactory() cls.researcher_2 = ResearcherFactory() + cls.researcher_3 = ResearcherFactory() + + grp = cls.organization.get_users() + cls.researcher.user.groups.add(grp) + cls.researcher_2.user.groups.add(grp) + + # other researcher from other organization is not availables + grp = cls.organization_2.get_users() + cls.researcher_3.user.groups.add(grp) + + def setUp(self) -> None: + self.client.force_login(self.researcher.user) + return super().setUp() def test_get_list(self): - response = self.client.get(reverse("Researcher-list")) + response = self.client.get( + reverse("Researcher-list", args=(self.organization.code,)) + ) data = response.json() researcher_ids = sorted(researcher["id"] for researcher in data["results"]) @@ -135,20 +183,34 @@ def test_get_list(self): def test_get_detail(self): response = self.client.get( - reverse("Researcher-detail", args=(self.researcher.id,)) + reverse( + "Researcher-detail", + args=( + self.organization.code, + self.researcher.id, + ), + ) ) researcher = response.json() self.assertEqual(researcher["id"], self.researcher.id) def test_get_detail_not_know(self): - response = self.client.get(reverse("Researcher-detail", args=(666,))) + response = self.client.get( + reverse( + "Researcher-detail", + args=( + self.organization.code, + 666, + ), + ) + ) self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) def test_search_not_found(self): response = self.client.get( - reverse("Researcher-search"), + reverse("Researcher-search", args=(self.organization.code,)), # data is queryparams data={"harvester": "idref", "values": "6666666"}, ) @@ -157,10 +219,19 @@ def test_search_not_found(self): expected = {} self.assertEqual(data["results"], expected) + def test_not_same_organization(self): + response = self.client.get( + reverse("Researcher-search", args=(self.organization_2.code,)), + # data is queryparams + data={"harvester": "idref", "values": "6666666"}, + ) + + self.assertEqual(response.status_code, status.HTTP_401_UNAUTHORIZED) + def test_search_found(self): identifier = self.researcher.identifiers.first() response = self.client.get( - reverse("Researcher-search"), + reverse("Researcher-search", args=(self.organization.code,)), # data is queryparams data={ "harvester": identifier.harvester, diff --git a/services/crisalid/urls.py b/services/crisalid/urls.py index ec65ac50..8a2b612e 100644 --- a/services/crisalid/urls.py +++ b/services/crisalid/urls.py @@ -1,10 +1,10 @@ +from django.urls import include, path +from rest_framework.routers import DefaultRouter + from apps.commons.urls import ( organization_researcher_router_register, organization_router_register, ) -from django.urls import include, path -from rest_framework.routers import DefaultRouter - from services.crisalid.views import ( ConferenceViewSet, PublicationViewSet, @@ -13,7 +13,9 @@ researcher_router = DefaultRouter() -organization_router_register(r"researcher", ResearcherViewSet, basename="Researcher") +organization_router_register( + researcher_router, r"researcher", ResearcherViewSet, basename="Researcher" +) organization_researcher_router_register( researcher_router, diff --git a/services/crisalid/views.py b/services/crisalid/views.py index ccde94cf..1437e968 100644 --- a/services/crisalid/views.py +++ b/services/crisalid/views.py @@ -2,9 +2,13 @@ from http import HTTPMethod from itertools import chain +from apps.commons.permissions import OrganizationPermission, ReadOnly +from apps.organizations.models import Organization +from apps.organizations.permissions import OrganizationRelatedPermission from django.db.models import Count, QuerySet from django.db.models.functions import ExtractYear from django.http import JsonResponse +from django.shortcuts import get_object_or_404 from django_filters.rest_framework import DjangoFilterBackend from drf_spectacular.utils import ( OpenApiExample, @@ -12,6 +16,7 @@ extend_schema, extend_schema_view, ) +from lib.views import NestedOrganizationViewMixins from rest_framework import viewsets from rest_framework.decorators import action @@ -79,7 +84,9 @@ ], ), ) -class AbstractDocumentViewSet(viewsets.ReadOnlyModelViewSet): +class AbstractDocumentViewSet( + NestedOrganizationViewMixins, viewsets.ReadOnlyModelViewSet +): """Abstract class to get documents info from documents types""" serializer_class = DocumentSerializer @@ -286,15 +293,17 @@ class ConferenceViewSet(AbstractDocumentViewSet): ], ), ) -class ResearcherViewSet(viewsets.ReadOnlyModelViewSet): +class ResearcherViewSet(NestedOrganizationViewMixins, viewsets.ReadOnlyModelViewSet): serializer_class = ResearcherSerializer filter_backends = (DjangoFilterBackend,) - filterset_fields = ("user_id", "crisalid_uid", "id") + filterset_fields = ("user_id", "id") + permission_classes = (OrganizationPermission,) def get_queryset(self): return ( - super() - .get_queryset() + Researcher.objects.filter( + user__isnull=False, user__groups__in=(self.organization.get_users(),) + ) .prefetch_related("identifiers") .select_related("user") ) From 595b5562c134b8596326b61d1925c6fb79817b6f Mon Sep 17 00:00:00 2001 From: rgermain Date: Fri, 28 Nov 2025 13:14:20 +0100 Subject: [PATCH 04/32] fix tests --- apps/commons/permissions.py | 2 -- apps/commons/views.py | 2 -- services/crisalid/admin.py | 3 ++- services/crisalid/bus/client.py | 2 +- services/crisalid/factories.py | 4 ++-- services/crisalid/populates/researcher.py | 1 - services/crisalid/serializers.py | 2 +- services/crisalid/tasks.py | 1 - services/crisalid/tests/test_populate.py | 2 +- services/crisalid/tests/test_views.py | 11 ++++++----- services/crisalid/views.py | 7 ++----- 11 files changed, 15 insertions(+), 22 deletions(-) diff --git a/apps/commons/permissions.py b/apps/commons/permissions.py index af89a98a..5ce10fb4 100644 --- a/apps/commons/permissions.py +++ b/apps/commons/permissions.py @@ -1,5 +1,3 @@ -from typing import Union - from django.db.models import Model from rest_framework import permissions from rest_framework.request import Request diff --git a/apps/commons/views.py b/apps/commons/views.py index 705f880d..eb616211 100644 --- a/apps/commons/views.py +++ b/apps/commons/views.py @@ -1,5 +1,3 @@ -from typing import List, Tuple - from django.shortcuts import get_object_or_404 from rest_framework import mixins, viewsets from rest_framework.response import Response diff --git a/services/crisalid/admin.py b/services/crisalid/admin.py index 61409aec..bcc7dd17 100644 --- a/services/crisalid/admin.py +++ b/services/crisalid/admin.py @@ -1,8 +1,9 @@ -from apps.accounts.models import ProjectUser from django import forms from django.contrib import admin from django.db.models import Count +from apps.accounts.models import ProjectUser + from .models import ( CrisalidConfig, Document, diff --git a/services/crisalid/bus/client.py b/services/crisalid/bus/client.py index fea0c310..2f84fc59 100644 --- a/services/crisalid/bus/client.py +++ b/services/crisalid/bus/client.py @@ -55,7 +55,7 @@ def parameters(self) -> dict | None: self.logger.critical( "Can't instantiate CrisalidBus: invalid parameters, %s", parameters ) - return + return None return parameters diff --git a/services/crisalid/factories.py b/services/crisalid/factories.py index 3f0de294..e7be4ecd 100644 --- a/services/crisalid/factories.py +++ b/services/crisalid/factories.py @@ -1,9 +1,9 @@ import factory -from apps.accounts.factories import UserFactory -from apps.organizations.factories import OrganizationFactory from factory.fuzzy import FuzzyChoice from faker import Faker +from apps.accounts.factories import UserFactory +from apps.organizations.factories import OrganizationFactory from services.crisalid import relators from .models import ( diff --git a/services/crisalid/populates/researcher.py b/services/crisalid/populates/researcher.py index 01ae538c..42bc7fd7 100644 --- a/services/crisalid/populates/researcher.py +++ b/services/crisalid/populates/researcher.py @@ -1,5 +1,4 @@ from apps.accounts.models import ProjectUser - from services.crisalid.models import Identifier, Researcher from .base import AbstractPopulate diff --git a/services/crisalid/serializers.py b/services/crisalid/serializers.py index a58df16f..5fe5ba1b 100644 --- a/services/crisalid/serializers.py +++ b/services/crisalid/serializers.py @@ -1,6 +1,6 @@ -from apps.accounts.models import ProjectUser from rest_framework import serializers +from apps.accounts.models import ProjectUser from services.crisalid.models import Document, Identifier, Researcher from services.translator.serializers import AutoTranslatedModelSerializer diff --git a/services/crisalid/tasks.py b/services/crisalid/tasks.py index 0b21ebf4..2f0d02fe 100644 --- a/services/crisalid/tasks.py +++ b/services/crisalid/tasks.py @@ -1,7 +1,6 @@ import logging from projects.celery import app - from services.crisalid.apps import CrisalidConfig from services.crisalid.bus.constant import CrisalidEventEnum, CrisalidTypeEnum from services.crisalid.bus.consumer import on_event diff --git a/services/crisalid/tests/test_populate.py b/services/crisalid/tests/test_populate.py index 8bc30b6e..a47b6aa4 100644 --- a/services/crisalid/tests/test_populate.py +++ b/services/crisalid/tests/test_populate.py @@ -1,8 +1,8 @@ import datetime -from apps.accounts.models import ProjectUser from django import test +from apps.accounts.models import ProjectUser from services.crisalid.factories import CrisalidConfigFactory from services.crisalid.models import Document, Identifier, Researcher from services.crisalid.populates import PopulateDocument, PopulateResearcher diff --git a/services/crisalid/tests/test_views.py b/services/crisalid/tests/test_views.py index ada0d85d..0ac20050 100644 --- a/services/crisalid/tests/test_views.py +++ b/services/crisalid/tests/test_views.py @@ -1,10 +1,11 @@ import datetime -from apps.organizations.factories import OrganizationFactory from django import test from django.urls import reverse from rest_framework import status +from apps.commons.test import JwtAPITestCase +from apps.organizations.factories import OrganizationFactory from services.crisalid.factories import ( DocumentContributorFactory, DocumentFactory, @@ -148,7 +149,7 @@ def test_get_analytics_limit(self): self.assertEqual(data["years"], expected["years"]) -class TestResearcherView(test.TestCase): +class TestResearcherView(JwtAPITestCase): @classmethod def setUpClass(cls): super().setUpClass() @@ -168,8 +169,8 @@ def setUpClass(cls): cls.researcher_3.user.groups.add(grp) def setUp(self) -> None: - self.client.force_login(self.researcher.user) - return super().setUp() + super().setUp() + self.client.force_authenticate(self.researcher.user) def test_get_list(self): response = self.client.get( @@ -226,7 +227,7 @@ def test_not_same_organization(self): data={"harvester": "idref", "values": "6666666"}, ) - self.assertEqual(response.status_code, status.HTTP_401_UNAUTHORIZED) + self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) def test_search_found(self): identifier = self.researcher.identifiers.first() diff --git a/services/crisalid/views.py b/services/crisalid/views.py index 1437e968..c803a0c8 100644 --- a/services/crisalid/views.py +++ b/services/crisalid/views.py @@ -2,13 +2,9 @@ from http import HTTPMethod from itertools import chain -from apps.commons.permissions import OrganizationPermission, ReadOnly -from apps.organizations.models import Organization -from apps.organizations.permissions import OrganizationRelatedPermission from django.db.models import Count, QuerySet from django.db.models.functions import ExtractYear from django.http import JsonResponse -from django.shortcuts import get_object_or_404 from django_filters.rest_framework import DjangoFilterBackend from drf_spectacular.utils import ( OpenApiExample, @@ -16,10 +12,11 @@ extend_schema, extend_schema_view, ) -from lib.views import NestedOrganizationViewMixins from rest_framework import viewsets from rest_framework.decorators import action +from apps.commons.permissions import OrganizationPermission +from lib.views import NestedOrganizationViewMixins from services.crisalid import relators from services.crisalid.models import ( Document, From be59d7063c87afda9acb15599c8c15a34962136a Mon Sep 17 00:00:00 2001 From: rgermain Date: Fri, 28 Nov 2025 14:33:36 +0100 Subject: [PATCH 05/32] feat: permissions views crisalid --- services/crisalid/utils/__init__.py | 0 services/crisalid/utils/views.py | 13 +++++++++++++ services/crisalid/views.py | 12 ++++++++---- 3 files changed, 21 insertions(+), 4 deletions(-) create mode 100644 services/crisalid/utils/__init__.py create mode 100644 services/crisalid/utils/views.py diff --git a/services/crisalid/utils/__init__.py b/services/crisalid/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/services/crisalid/utils/views.py b/services/crisalid/utils/views.py new file mode 100644 index 00000000..8dff4647 --- /dev/null +++ b/services/crisalid/utils/views.py @@ -0,0 +1,13 @@ +from django.shortcuts import get_object_or_404 + +from services.crisalid.models import Researcher + + +class NestedResearcherViewMixins: + def initial(self, request, *args, **kwargs): + self.researcher = get_object_or_404( + Researcher, + pk=kwargs["researcher_id"], + user__groups__in=(self.organization.get_users()), + ) + super().initial(request, *args, **kwargs) diff --git a/services/crisalid/views.py b/services/crisalid/views.py index c803a0c8..a46bf546 100644 --- a/services/crisalid/views.py +++ b/services/crisalid/views.py @@ -30,6 +30,7 @@ DocumentSerializer, ResearcherSerializer, ) +from services.crisalid.utils.views import NestedResearcherViewMixins OPENAPI_PARAMTERS_DOCUMENTS = [ OpenApiParameter( @@ -82,11 +83,14 @@ ), ) class AbstractDocumentViewSet( - NestedOrganizationViewMixins, viewsets.ReadOnlyModelViewSet + NestedOrganizationViewMixins, + NestedResearcherViewMixins, + viewsets.ReadOnlyModelViewSet, ): """Abstract class to get documents info from documents types""" serializer_class = DocumentSerializer + permission_classes = (OrganizationPermission,) def filter_queryset( self, @@ -109,7 +113,7 @@ def filter_queryset( if roles and roles_enabled: qs = qs.filter( documentcontributor__roles__contains=roles, - documentcontributor__researcher__pk=self.kwargs["researcher_id"], + documentcontributor__researcher=self.researcher, ) # filter by pblication_type @@ -121,7 +125,7 @@ def filter_queryset( def get_queryset(self) -> QuerySet[Document]: return ( Document.objects.filter( - contributors__id=self.kwargs["researcher_id"], + contributors=self.researcher, document_type__in=self.document_types, ) .prefetch_related("identifiers", "contributors__user") @@ -182,7 +186,7 @@ def analytics(self, request, *args, **kwargs): chain( *DocumentContributor.objects.filter( document__in=self.filter_queryset(qs, roles_enabled=False), - researcher__id=self.kwargs["researcher_id"], + researcher=self.researcher, ).values_list("roles", flat=True) ) ) From 4c74117b7e085701344183b508f5fc425a346c86 Mon Sep 17 00:00:00 2001 From: rgermain Date: Mon, 1 Dec 2025 14:31:33 +0100 Subject: [PATCH 06/32] upgrade crisalid --- projects/asgi.py | 14 ++- services/crisalid/admin.py | 12 --- services/crisalid/apps.py | 1 + services/crisalid/bus/client.py | 2 +- services/crisalid/interface.py | 2 +- .../management/commands/csv_crisalid.py | 85 +++++++++++++++++++ .../management/commands/populate_crisalid.py | 21 +++-- services/crisalid/populates/base.py | 8 +- services/crisalid/populates/document.py | 6 +- services/crisalid/populates/researcher.py | 6 +- services/crisalid/signals.py | 2 +- services/crisalid/tasks.py | 7 +- services/crisalid/tests/test_populate.py | 63 ++++++++++++++ services/crisalid/tests/test_tasks.py | 10 +++ services/crisalid/{utils.py => utils/time.py} | 0 15 files changed, 208 insertions(+), 31 deletions(-) create mode 100644 services/crisalid/management/commands/csv_crisalid.py create mode 100644 services/crisalid/tests/test_tasks.py rename services/crisalid/{utils.py => utils/time.py} (100%) diff --git a/projects/asgi.py b/projects/asgi.py index e59b7e26..1076a552 100644 --- a/projects/asgi.py +++ b/projects/asgi.py @@ -7,6 +7,7 @@ https://docs.djangoproject.com/en/3.2/howto/deployment/asgi/ """ +import asyncio import logging import os @@ -19,9 +20,16 @@ from django.conf import settings # noqa: E402 -from services.crisalid.bus.runner import initial_start_crisalidbus # noqa: E402 - if settings.ENABLE_CRISALID_BUS: - initial_start_crisalidbus() + # we are in async context, so wee need to wrap + # the sync function "intitial_tart_crisalid" to a corotine + # and put it in current event loop + from asgiref.sync import sync_to_async + + from services.crisalid.bus.runner import initial_start_crisalidbus # noqa: E402 + + loop = asyncio.get_event_loop() + loop.create_task(sync_to_async(initial_start_crisalidbus)()) + else: logging.info("CrisalidBus is not enabled") diff --git a/services/crisalid/admin.py b/services/crisalid/admin.py index 96e0b1e2..3dfd91b2 100644 --- a/services/crisalid/admin.py +++ b/services/crisalid/admin.py @@ -1,6 +1,5 @@ from contextlib import suppress -from django import forms from django.contrib import admin from django.db.models import Count @@ -161,21 +160,10 @@ def get_identifiers(self, instance): return f"{', '.join(result)} ({len(result)})" -class CrisalidConfigForm(forms.ModelForm): - class Meta: - model = CrisalidConfig - fields = "__all__" - widgets = { - "crisalidbus_password": forms.PasswordInput(), - "apollo_token": forms.PasswordInput(), - } - - class CrisalidConfigAdmin(admin.ModelAdmin): list_display = ("organization", "active") search_fields = ("organization__code", "active") autocomplete_fields = ("organization",) - form = CrisalidConfigForm admin.site.register(CrisalidConfig, CrisalidConfigAdmin) diff --git a/services/crisalid/apps.py b/services/crisalid/apps.py index 955af0c8..5df22f70 100644 --- a/services/crisalid/apps.py +++ b/services/crisalid/apps.py @@ -9,4 +9,5 @@ def __init__(self, *ar, **kw): super().__init__(*ar, **kw) def ready(self): + import services.crisalid.signals # noqa: F401 import services.crisalid.tasks # noqa: F401 diff --git a/services/crisalid/bus/client.py b/services/crisalid/bus/client.py index 2f84fc59..05491607 100644 --- a/services/crisalid/bus/client.py +++ b/services/crisalid/bus/client.py @@ -42,7 +42,7 @@ def parameters(self) -> dict | None: # get url without port, and set port for pika url = parse_url(self.config.crisalidbus_url) parameters = { - "host": url.url, + "host": url.host, "port": url.port, "user": self.config.crisalidbus_username, "password": self.config.crisalidbus_password, diff --git a/services/crisalid/interface.py b/services/crisalid/interface.py index 48ea823a..4df5a95b 100644 --- a/services/crisalid/interface.py +++ b/services/crisalid/interface.py @@ -11,7 +11,7 @@ class CrisalidService: def __init__(self, config: CrisalidConfig): self.transport = RequestsHTTPTransport( - url=config.apollo_host, + url=config.apollo_url, headers={"X-API-Key": config.apollo_token}, ) self.client = Client( diff --git a/services/crisalid/management/commands/csv_crisalid.py b/services/crisalid/management/commands/csv_crisalid.py new file mode 100644 index 00000000..86df928a --- /dev/null +++ b/services/crisalid/management/commands/csv_crisalid.py @@ -0,0 +1,85 @@ +import csv + +from django.core.management.base import BaseCommand + +from services.crisalid.models import Identifier, Researcher + + +class Command(BaseCommand): + help = "create csv files for crisalid dag from our researcher" # noqa: A003 + + def handle(self, **options): + + rows = [ + # headers csv + [ + "first_names", + "last_name", + "main_research_structure", + "tracking_id", + "eppn", + "idhal_s", + "idhal_i", + "orcid", + "idref", + "scopus_eid", + "institution_identifier", + "institution_id_nomenclature", + "position", + "employment_start_date", + "employment_end_date", + "hdr", + ] + ] + + # fetch all users with eppn + for researcher in Researcher.objects.prefetch_related("identifiers").filter( + identifiers__harvester=Identifier.Harvester.EPPN.value + ): + + # convert identifiers to a dict key/value + identifiers = { + identifier.harvester: identifier.value + for identifier in researcher.identifiers.all() + } + + rows.append( + [ + # first_names + researcher.given_name, + # last_name + researcher.family_name, + # main_research_structure + "", + # tracking_id + identifiers.get(Identifier.Harvester.LOCAL.value, ""), + # eppn + identifiers.get(Identifier.Harvester.EPPN.value, ""), + # idhal_s, + "", + # idhal_i, + "", + # orcid + identifiers.get(Identifier.Harvester.ORCID.value, ""), + # idref + identifiers.get(Identifier.Harvester.IDREF.value, ""), + # scopus_eid + "", + # institution_identifier + "", + # institution_id_nomenclature + "", + # position + "", + # employment_start_date + "", + # employment_end_date + "", + # hdr + "", + ] + ) + + with open("people.csv", "w") as f: + writer = csv.writer(f) + writer.writerows(rows) diff --git a/services/crisalid/management/commands/populate_crisalid.py b/services/crisalid/management/commands/populate_crisalid.py index b24943ee..8350182d 100644 --- a/services/crisalid/management/commands/populate_crisalid.py +++ b/services/crisalid/management/commands/populate_crisalid.py @@ -4,6 +4,7 @@ from services.crisalid.interface import CrisalidService from services.crisalid.models import ( + CrisalidConfig, Document, DocumentContributor, Identifier, @@ -11,7 +12,7 @@ ) from services.crisalid.populates import PopulateDocument, PopulateResearcher from services.crisalid.populates.base import AbstractPopulate -from services.crisalid.utils import timeit +from services.crisalid.utils.time import timeit from services.mistral.models import DocumentEmbedding @@ -19,6 +20,13 @@ class Command(BaseCommand): help = "create or update data from researcher/Document crisalid neo4j/graphql" # noqa: A003 def add_arguments(self, parser): + parser.add_argument( + "organization", + choices=CrisalidConfig.objects.filter( + organization__code__isnull=False + ).values_list("organization__code", flat=True), + help="organization code", + ) parser.add_argument( "command", choices=("document", "researcher", "all"), @@ -52,7 +60,7 @@ def populate_crisalid( service: CrisalidService, populate: AbstractPopulate, query: str, - where: None, + where: None = None, **options, ): @@ -65,7 +73,7 @@ def populate_crisalid( while max_elements >= 1: - with timeit(print, "GrapQL request "): + with timeit(print, f"GrapQL request {query}"): data = service.query( query, offset=offset, limit=limit, where=where )[query] @@ -82,18 +90,19 @@ def populate_crisalid( max_elements -= 1 def handle(self, **options): + config = CrisalidConfig.objects.get(organization__code=options["organization"]) if options["delete"]: self.delete_crisalid_models() command = options["command"] - service = CrisalidService() + service = CrisalidService(config) if command in ("all", "document"): - populate = PopulateDocument() + populate = PopulateDocument(config) self.populate_crisalid(service, populate, query="documents", **options) if command in ("all", "researcher"): - populate = PopulateResearcher() + populate = PopulateResearcher(config) self.populate_crisalid( service, populate, diff --git a/services/crisalid/populates/base.py b/services/crisalid/populates/base.py index 813ee799..3b8aed99 100644 --- a/services/crisalid/populates/base.py +++ b/services/crisalid/populates/base.py @@ -59,4 +59,10 @@ def single(self, data): raise NotImplementedError def multiple(self, datas: list) -> list: - return [self.single(data) for data in datas] + """return all objects create""" + final = [] + for data in datas: + el = self.single(data) + if el is not None: + final.append(el) + return final diff --git a/services/crisalid/populates/document.py b/services/crisalid/populates/document.py index 2827a1a8..ab5025b4 100644 --- a/services/crisalid/populates/document.py +++ b/services/crisalid/populates/document.py @@ -34,7 +34,7 @@ def sanitize_roles(self, data: list[str]) -> list[str]: return roles - def single(self, data: dict): + def single(self, data: dict) -> Document | None: """this method create/update only on document from crisalid""" # identifiers (hal, openalex, idref ...ect) documents_identifiers = [] @@ -47,6 +47,10 @@ def single(self, data: dict): self.cache.save(identifier) documents_identifiers.append(identifier) + # no identifiers for this documents, we ignore it + if not documents_identifiers: + return None + document = self.cache.from_identifiers(Document, documents_identifiers) self.cache.save( document, diff --git a/services/crisalid/populates/researcher.py b/services/crisalid/populates/researcher.py index ff12174e..a30bd38c 100644 --- a/services/crisalid/populates/researcher.py +++ b/services/crisalid/populates/researcher.py @@ -57,7 +57,7 @@ def check_mapping_user( return self.create_user(iden["value"], given_name, family_name) return None - def single(self, data: dict) -> Researcher: + def single(self, data: dict) -> Researcher | None: researcher_identifiers = [] for iden in data["identifiers"]: identifier = self.cache.model( @@ -66,6 +66,10 @@ def single(self, data: dict) -> Researcher: self.cache.save(identifier) researcher_identifiers.append(identifier) + # researcher withtout any identifiers no neeeeeeed to be created + if not researcher_identifiers: + return None + # remove local/eppn identifiers to match only hal/eppn/orcid ..ect researcher_identifiers_without_local = [ identifier diff --git a/services/crisalid/signals.py b/services/crisalid/signals.py index eec11e09..3668e490 100644 --- a/services/crisalid/signals.py +++ b/services/crisalid/signals.py @@ -1,8 +1,8 @@ from django.db.models.signals import post_delete, post_save from django.dispatch import receiver -from services.crisalid.apps import CrisalidConfig from services.crisalid.bus.runner import delete_crisalidbus, start_crisalidbus +from services.crisalid.models import CrisalidConfig @receiver(post_save, sender=CrisalidConfig) diff --git a/services/crisalid/tasks.py b/services/crisalid/tasks.py index 98c8923d..674dfd49 100644 --- a/services/crisalid/tasks.py +++ b/services/crisalid/tasks.py @@ -1,19 +1,18 @@ import logging from projects.celery import app -from services.crisalid.apps import CrisalidConfig from services.crisalid.bus.constant import CrisalidEventEnum, CrisalidTypeEnum from services.crisalid.bus.consumer import on_event from services.crisalid.interface import CrisalidService -from services.crisalid.models import Document, Identifier, Researcher +from services.crisalid.models import CrisalidConfig, Document, Identifier, Researcher from services.crisalid.populates import PopulateDocument, PopulateResearcher logger = logging.getLogger(__name__) def get_crisalid_config(crisalid_config_id: int) -> CrisalidConfig: - return CrisalidConfig.objects.get(id=crisalid_config_id).selected_related( - "organization" + return CrisalidConfig.objects.select_related("organization").get( + id=crisalid_config_id ) diff --git a/services/crisalid/tests/test_populate.py b/services/crisalid/tests/test_populate.py index 660ba78b..22290a82 100644 --- a/services/crisalid/tests/test_populate.py +++ b/services/crisalid/tests/test_populate.py @@ -43,6 +43,23 @@ def test_create_researcher(self): self.assertEqual(iden.value, "hals-truc") self.assertEqual(iden.harvester, Identifier.Harvester.HAL.value) + def test_create_researcher_whithout_identifiers(self): + popu = PopulateResearcher(self.config) + data = { + "uid": "05-11-1995-uuid", + "names": [ + { + "first_names": [{"value": "marty", "language": "fr"}], + "last_names": [{"value": "mcfly", "language": "fr"}], + } + ], + } + + new_obj = popu.single(data) + + self.assertIsNone(new_obj) + self.assertEqual(Researcher.objects.count(), 0) + def test_no_change_researcher(self): data = { "uid": "05-11-1995-uuid", @@ -244,6 +261,52 @@ def test_create_publication(self): self.assertEqual(iden.value, "hals-truc") self.assertEqual(iden.harvester, Identifier.Harvester.HAL.value) + def test_create_document_whitout_identifiers(self): + popu = PopulateDocument(self.config) + data = { + "uid": "05-11-1995-uuid", + "document_type": None, + "titles": [ + {"language": "en", "value": "fiction"}, + ], + "abstracts": [ + {"language": "en", "value": "description"}, + ], + "publication_date": "1999", + "has_contributions": [ + { + "roles": ["http://id.loc.gov/vocabulary/relators/aut"], + "contributor": [ + { + "uid": "local-v9034", + "names": [ + { + "first_names": [ + {"value": "Marty", "language": "fr"} + ], + "last_names": [ + {"value": "Mcfly", "language": "fr"} + ], + } + ], + "identifiers": [ + {"type": "eppn", "value": "marty.mcfly@non-de-zeus.fr"}, + {"type": "idref", "value": "4545454545454"}, + {"type": "local", "value": "v55555"}, + ], + } + ], + } + ], + "recorded_by": [], + } + + new_obj = popu.single(data) + + # check obj from db + self.assertIsNone(new_obj) + self.assertEqual(Document.objects.count(), 0) + def test_sanitize_date(self): popu = PopulateDocument(self.config) diff --git a/services/crisalid/tests/test_tasks.py b/services/crisalid/tests/test_tasks.py new file mode 100644 index 00000000..ac23bc2f --- /dev/null +++ b/services/crisalid/tests/test_tasks.py @@ -0,0 +1,10 @@ +from django import test + +from services.crisalid.factories import CrisalidConfigFactory + + +class TestCrisalidTasks(test.TestCase): + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.config = CrisalidConfigFactory() diff --git a/services/crisalid/utils.py b/services/crisalid/utils/time.py similarity index 100% rename from services/crisalid/utils.py rename to services/crisalid/utils/time.py From faeda88e554980109883a05f8dbfea616c699a9d Mon Sep 17 00:00:00 2001 From: rgermain Date: Mon, 1 Dec 2025 15:31:17 +0100 Subject: [PATCH 07/32] thread safe crisalidbus --- services/crisalid/admin.py | 17 +++++++++++++ services/crisalid/bus/client.py | 42 +++++++++++++++++++++------------ services/crisalid/bus/runner.py | 14 ++++------- services/crisalid/signals.py | 6 ++--- 4 files changed, 52 insertions(+), 27 deletions(-) diff --git a/services/crisalid/admin.py b/services/crisalid/admin.py index 3dfd91b2..475109f5 100644 --- a/services/crisalid/admin.py +++ b/services/crisalid/admin.py @@ -164,6 +164,23 @@ class CrisalidConfigAdmin(admin.ModelAdmin): list_display = ("organization", "active") search_fields = ("organization__code", "active") autocomplete_fields = ("organization",) + actions = ["active_connections", "deactive_connections"] + + @admin.action(description="run/reload crisalidbus connections") + def active_connections(self, request, queryset): + """method to change/run crisalidbus listener""" + # we don't update directly queryset for signals dispatch + for obj in queryset: + obj.active = True + obj.save() + + @admin.action(description="stop crisalidbus connections") + def deactive_connections(self, request, queryset): + """method to change/stop crisalidbus listener""" + # we don't update directly queryset for signals dispatch + for obj in queryset: + obj.active = False + obj.save() admin.site.register(CrisalidConfig, CrisalidConfigAdmin) diff --git a/services/crisalid/bus/client.py b/services/crisalid/bus/client.py index 05491607..ae1481fd 100644 --- a/services/crisalid/bus/client.py +++ b/services/crisalid/bus/client.py @@ -1,6 +1,7 @@ import json import logging import time +from threading import Event import jsonschema import pika @@ -32,8 +33,8 @@ def __init__(self, config: CrisalidConfig): self.config = config self.conn: pika.BlockingConnection | None = None self._channel = pika.channel.Channel - self._run: bool = True self.logger = logging.getLogger(config.organization.code) + self._stop_event: Event | None = None def parameters(self) -> dict | None: """generate parametrs for crislaid and check values""" @@ -64,9 +65,14 @@ def connect(self): parameters = self.parameters() + # we need to threading event to stop consumers + # pika is not thread safe so... + # https://pika.readthedocs.io/en/stable/faq.html#frequently-asked-questions + self._stop_event = Event() + retry = 1 # run in loop to retry when connection is lost - while self._run: + while not self._stop_event.is_set(): try: self.logger.info("Create pika connection") @@ -99,8 +105,9 @@ def connect(self): ) self.logger.info("Start channel Consuming") - self._channel.start_consuming() - break + + while not self._stop_event.is_set(): + self.conn.process_data_events(time_limit=1) except pika.exceptions.ConnectionClosedByBroker: self.logger.error("Connection closed by crisalid broker") @@ -108,8 +115,10 @@ def connect(self): self.logger.error("Channel error: %s", str(e)) except pika.exceptions.AMQPConnectionError as e: self.logger.error("Connection closed: %s", str(e)) + except pika.exceptions.AMQPError as e: + self.logger.critical("Exceptions: %s", str(e)) - if not self._run: + if self._stop_event.is_set(): break # incremental retry (max 60s) @@ -119,22 +128,25 @@ def connect(self): # ensure disconect after loop self._disconnect() - def disconnect(self): + def _disconnect(self): """disconnect rabitmqt connection""" - self._run = False - if not self.conn: - return + if self._channel is not None: + self._channel.close() + self._channel = None + + if self.conn is not None: + self.conn.close() + self.conn = None - self.self.logger.info("CrisalidBus connection closed") + self.logger.info("CrisalidBus connection closed") - self.conn.close() - self.conn = None - self._channel.cancel() - self._channel = None + def stop(self): + if self._stop_event is not None: + self._stop_event.set() def __delete__(self): # for disconnect when class is deleted - self.disconnect() + self._disconnect() def _dispatch( self, diff --git a/services/crisalid/bus/runner.py b/services/crisalid/bus/runner.py index a18361d2..14d4f96f 100644 --- a/services/crisalid/bus/runner.py +++ b/services/crisalid/bus/runner.py @@ -35,7 +35,7 @@ def stop(self): self.logger.info("Stop thread %s", self.name) if self.thread is None: return - self.client.disconnect() + self.client.stop() self.thread.join(3) self.thread = None @@ -56,16 +56,12 @@ def start_crisalidbus(config: CrisalidConfig): def stop_crisalidbus(config: CrisalidConfig): with rlock: - client = organization_maps.get(config.organization.code) - if client is None: + if config.organization.code not in organization_maps: return + + client = organization_maps[config.organization.code] client.config = config client.stop() - - -def delete_crisalidbus(config: CrisalidConfig): - with rlock: - stop_crisalidbus(config) del organization_maps[config.organization.code] @@ -81,4 +77,4 @@ def initial_start_crisalidbus(): def _stop_all_crisalid(): with rlock: for client in list(organization_maps.values()): - delete_crisalidbus(client.config) + stop_crisalidbus(client.config) diff --git a/services/crisalid/signals.py b/services/crisalid/signals.py index 3668e490..fd620b66 100644 --- a/services/crisalid/signals.py +++ b/services/crisalid/signals.py @@ -1,7 +1,7 @@ from django.db.models.signals import post_delete, post_save from django.dispatch import receiver -from services.crisalid.bus.runner import delete_crisalidbus, start_crisalidbus +from services.crisalid.bus.runner import start_crisalidbus, stop_crisalidbus from services.crisalid.models import CrisalidConfig @@ -10,9 +10,9 @@ def on_save(sender, instance, **kwargs): if instance.active: start_crisalidbus(instance) else: - delete_crisalidbus(instance) + stop_crisalidbus(instance) @receiver(post_delete, sender=CrisalidConfig) def on_delete(sender, instance, **kwargs): - delete_crisalidbus(instance) + stop_crisalidbus(instance) From 00330bca0915b6cd13984135a5514cd68b3cfa6a Mon Sep 17 00:00:00 2001 From: rgermain Date: Mon, 1 Dec 2025 17:25:57 +0100 Subject: [PATCH 08/32] fix: admin invalid count documents researcher --- services/crisalid/admin.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/services/crisalid/admin.py b/services/crisalid/admin.py index 475109f5..e5331ea1 100644 --- a/services/crisalid/admin.py +++ b/services/crisalid/admin.py @@ -23,8 +23,8 @@ def get_queryset(self, request): super() .get_queryset(request) .prefetch_related("researchers", "documents") - .annotate(documents_count=Count("documents__id")) - .annotate(researchers_count=Count("researchers__id")) + .annotate(documents_count=Count("documents__id", distinct=True)) + .annotate(researchers_count=Count("researchers__id", distinct=True)) ) @admin.display(description="researchers assosiate", ordering="researchers_count") @@ -72,7 +72,7 @@ def get_queryset(self, request): .get_queryset(request) .prefetch_related("contributors", "identifiers") .annotate(identifiers_count=Count("identifiers__id")) - .annotate(contributors_count=Count("contributors__id")) + .annotate(contributors_count=Count("contributors__id", distinct=True)) ) @admin.display(description="contributors count", ordering="contributors_count") @@ -112,7 +112,7 @@ def get_queryset(self, request): .select_related("user") .prefetch_related("identifiers", "documents") .annotate(identifiers_count=Count("identifiers__id")) - .annotate(documents_count=Count("documents__id")) + .annotate(documents_count=Count("documents__id", distinct=True)) ) @admin.action(description="assign researcher on projects") From fe9ae327b12bda45ce52dfbcac44f29250320c4f Mon Sep 17 00:00:00 2001 From: rgermain Date: Mon, 1 Dec 2025 17:26:30 +0100 Subject: [PATCH 09/32] better dump csv command --- .../management/commands/csv_crisalid.py | 39 +++++++++++++++++-- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/services/crisalid/management/commands/csv_crisalid.py b/services/crisalid/management/commands/csv_crisalid.py index 86df928a..d9958e4c 100644 --- a/services/crisalid/management/commands/csv_crisalid.py +++ b/services/crisalid/management/commands/csv_crisalid.py @@ -1,14 +1,35 @@ import csv +import pathlib from django.core.management.base import BaseCommand -from services.crisalid.models import Identifier, Researcher +from apps.organizations.models import Organization +from services.crisalid.models import CrisalidConfig, Identifier, Researcher class Command(BaseCommand): help = "create csv files for crisalid dag from our researcher" # noqa: A003 - def handle(self, **options): + def add_arguments(self, parser): + parser.add_argument( + "organization", + choices=CrisalidConfig.objects.filter( + organization__code__isnull=False + ).values_list("organization__code", flat=True), + help="organization code", + ) + parser.add_argument( + "command", + choices=("researcher", "all"), + help="elements to dumps", + ) + parser.add_argument( + "output", + default="./", + help="output path", + ) + + def csv_researcher(self, organization: Organization, output: pathlib.Path): rows = [ # headers csv @@ -34,7 +55,7 @@ def handle(self, **options): # fetch all users with eppn for researcher in Researcher.objects.prefetch_related("identifiers").filter( - identifiers__harvester=Identifier.Harvester.EPPN.value + user__groups__in=(organization.get_users(),) ): # convert identifiers to a dict key/value @@ -80,6 +101,16 @@ def handle(self, **options): ] ) - with open("people.csv", "w") as f: + path = output / "people.csv" + with path.open("w") as f: writer = csv.writer(f) writer.writerows(rows) + + def handle(self, **options): + command = options["command"] + config = CrisalidConfig.objects.get(organization__code=options["organization"]) + + output = pathlib.Path(options["output"]) + + if command in ("researcher", "all"): + self.csv_researcher(config, output) From 2334f823614f6d6fdc47b94ffde8aab78436e0fb Mon Sep 17 00:00:00 2001 From: rgermain Date: Mon, 1 Dec 2025 17:26:45 +0100 Subject: [PATCH 10/32] test: fix tests --- services/crisalid/bus/client.py | 2 + services/crisalid/bus/runner.py | 4 ++ services/crisalid/factories.py | 2 +- services/crisalid/tests/test_crisalid_bus.py | 66 +++++++++++++++++++- services/crisalid/tests/test_populate.py | 1 + services/crisalid/views.py | 2 +- 6 files changed, 74 insertions(+), 3 deletions(-) diff --git a/services/crisalid/bus/client.py b/services/crisalid/bus/client.py index ae1481fd..16a8b448 100644 --- a/services/crisalid/bus/client.py +++ b/services/crisalid/bus/client.py @@ -64,6 +64,8 @@ def connect(self): assert self.conn is None, "rabimqt is already started" parameters = self.parameters() + if not parameters: + return # we need to threading event to stop consumers # pika is not thread safe so... diff --git a/services/crisalid/bus/runner.py b/services/crisalid/bus/runner.py index 14d4f96f..ca85cb5c 100644 --- a/services/crisalid/bus/runner.py +++ b/services/crisalid/bus/runner.py @@ -49,6 +49,10 @@ def start_crisalidbus(config: CrisalidConfig): if client is not None: stop_crisalidbus(client.config) + assert ( + config.active is True + ), f"can't instanciate crisalidBus for {config.organization.code=}, active=False" + client = OrganizationClient(config) organization_maps[config.organization.code] = client client.start() diff --git a/services/crisalid/factories.py b/services/crisalid/factories.py index e7be4ecd..44435c20 100644 --- a/services/crisalid/factories.py +++ b/services/crisalid/factories.py @@ -94,7 +94,7 @@ class CrisalidConfigFactory(factory.django.DjangoModelFactory): crisalidbus_password = faker.password() apollo_url = faker.url() apollo_token = faker.password() - active = True + active = False class Meta: model = CrisalidConfig diff --git a/services/crisalid/tests/test_crisalid_bus.py b/services/crisalid/tests/test_crisalid_bus.py index de20981f..457cc8f8 100644 --- a/services/crisalid/tests/test_crisalid_bus.py +++ b/services/crisalid/tests/test_crisalid_bus.py @@ -1,11 +1,12 @@ import json -from unittest.mock import Mock +from unittest.mock import Mock, patch from django import test from services.crisalid.bus.client import CrisalidBusClient from services.crisalid.bus.constant import CrisalidEventEnum, CrisalidTypeEnum from services.crisalid.bus.consumer import crisalid_consumer +from services.crisalid.bus.runner import _stop_all_crisalid, organization_maps from services.crisalid.factories import CrisalidConfigFactory @@ -121,3 +122,66 @@ def test_validated_payload(self): ).encode("ascii") self.client._dispatch(self.chanel, self.properties, self.method, payload) callback.assert_not_called() + + +@patch("services.crisalid.bus.runner.threading") +@patch("services.crisalid.bus.runner.CrisalidBusClient") +class TestCrisalidThread(test.TestCase): + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.config = CrisalidConfigFactory() + + def setUp(self) -> None: + super().setUp() + # for stop all instance + _stop_all_crisalid() + + def test_start_signals(self, clientbus_mock, thread_mock): + # not crisalidbus loaded + self.assertEqual(organization_maps, {}) + + self.config.active = True + self.config.save() + + self.assertIn(self.config.organization.code, organization_maps) + client: Mock = organization_maps[self.config.organization.code] + self.assertIsNotNone(client) + + clientbus_mock.assert_called_once_with(self.config) + thread_mock.Thread.assert_called_once() + thread_mock.Thread().start.assert_called_once() + + def test_stop_signals(self, clientbus_mock, thread_mock): + self.config.active = True + self.config.save() + + self.assertIn(self.config.organization.code, organization_maps) + self.config.active = False + self.config.save() + self.assertNotIn(self.config.organization.code, organization_maps) + + clientbus_mock().stop.assert_called_once() + thread_mock.Thread().join.assert_called_once() + + def test_stop_signals_disable(self, clientbus_mock, thread_mock): + self.config.active = False + self.config.save() + + self.assertNotIn(self.config.organization.code, organization_maps) + + def test_start_signals_active(self, clientbus_mock, thread_mock): + self.config.active = True + self.config.save() + self.assertIn(self.config.organization.code, organization_maps) + + self.config.active = True + self.config.save() + self.assertIn(self.config.organization.code, organization_maps) + + def test_delte_signals_active(self, clientbus_mock, thread_mock): + config = CrisalidConfigFactory(active=True) + self.assertIn(config.organization.code, organization_maps) + + config.delete() + self.assertNotIn(config.organization.code, organization_maps) diff --git a/services/crisalid/tests/test_populate.py b/services/crisalid/tests/test_populate.py index 22290a82..8662c797 100644 --- a/services/crisalid/tests/test_populate.py +++ b/services/crisalid/tests/test_populate.py @@ -53,6 +53,7 @@ def test_create_researcher_whithout_identifiers(self): "last_names": [{"value": "mcfly", "language": "fr"}], } ], + "identifiers": [], } new_obj = popu.single(data) diff --git a/services/crisalid/views.py b/services/crisalid/views.py index a46bf546..d1af91d6 100644 --- a/services/crisalid/views.py +++ b/services/crisalid/views.py @@ -16,7 +16,7 @@ from rest_framework.decorators import action from apps.commons.permissions import OrganizationPermission -from lib.views import NestedOrganizationViewMixins +from apps.commons.views import NestedOrganizationViewMixins from services.crisalid import relators from services.crisalid.models import ( Document, From 4b587715d3064d1b4c8fa4af67b6988e80237e76 Mon Sep 17 00:00:00 2001 From: rgermain Date: Mon, 1 Dec 2025 18:25:04 +0100 Subject: [PATCH 11/32] test: add tasks tests --- services/crisalid/manager.py | 11 ++- services/crisalid/populates/base.py | 3 +- services/crisalid/populates/researcher.py | 5 +- services/crisalid/tasks.py | 16 +++-- services/crisalid/tests/test_tasks.py | 86 ++++++++++++++++++++++- 5 files changed, 106 insertions(+), 15 deletions(-) diff --git a/services/crisalid/manager.py b/services/crisalid/manager.py index 032e6aad..544398ce 100644 --- a/services/crisalid/manager.py +++ b/services/crisalid/manager.py @@ -2,7 +2,7 @@ class CrisalidQuerySet(QuerySet): - def from_identifiers(self, identifiers: list): + def from_identifiers(self, identifiers: list, distinct=True): """filter by identifiers""" from services.crisalid.models import Identifier @@ -19,11 +19,10 @@ def from_identifiers(self, identifiers: list): identifiers__harvester=identifier["harvester"], ) - return ( - self.filter(Q(identifiers__pk__in=pks) | filters) - .order_by("pk") - .distinct("pk") - ) + qs = self.filter(Q(identifiers__pk__in=pks) | filters) + if distinct: + qs = qs.order_by("pk").distinct("pk") + return qs class DocumentQuerySet(CrisalidQuerySet): diff --git a/services/crisalid/populates/base.py b/services/crisalid/populates/base.py index 3b8aed99..5c597589 100644 --- a/services/crisalid/populates/base.py +++ b/services/crisalid/populates/base.py @@ -29,7 +29,7 @@ def sanitize_languages(self, values: list[dict[str, str]]) -> str: maps_languages = {} for value in values: - maps_languages[value["language"]] = value["value"] + maps_languages[value["language"]] = (value["value"] or "").strip() return ( maps_languages.get("en") @@ -44,6 +44,7 @@ def sanitize_date(self, value: str | None) -> datetime.date | None: if not value: return None + value = value.strip() for format_date in CRISALID_FORMAT_DATE: try: # parse the value and convert it to date diff --git a/services/crisalid/populates/researcher.py b/services/crisalid/populates/researcher.py index a30bd38c..a6245a80 100644 --- a/services/crisalid/populates/researcher.py +++ b/services/crisalid/populates/researcher.py @@ -12,7 +12,10 @@ def get_names(self, data): given_name = self.sanitize_languages(name["first_names"]) family_name = self.sanitize_languages(name["last_names"]) - return given_name or "", family_name or "" + given_name = (given_name or "").strip() + family_name = (family_name or "").strip() + + return given_name, family_name def create_user(self, eppn: str, given_name: str, family_name: str) -> ProjectUser: diff --git a/services/crisalid/tasks.py b/services/crisalid/tasks.py index 674dfd49..a1d1d043 100644 --- a/services/crisalid/tasks.py +++ b/services/crisalid/tasks.py @@ -22,8 +22,8 @@ def get_crisalid_config(crisalid_config_id: int) -> CrisalidConfig: @on_event(CrisalidTypeEnum.PERSON, CrisalidEventEnum.CREATED) @on_event(CrisalidTypeEnum.PERSON, CrisalidEventEnum.UPDATED) -@app.task(name=f"{__name__}.create_person") -def create_person(crisalid_config_id: int, fields: dict): +@app.task(name=f"{__name__}.create_researcher") +def create_researcher(crisalid_config_id: int, fields: dict): config = get_crisalid_config(crisalid_config_id) logger.error("receive %s for organization %s", fields, config.organization) @@ -32,8 +32,8 @@ def create_person(crisalid_config_id: int, fields: dict): @on_event(CrisalidTypeEnum.PERSON, CrisalidEventEnum.DELETED) -@app.task(name=f"{__name__}.delete_person") -def delete_person(crisalid_config_id: int, fields: dict): +@app.task(name=f"{__name__}.delete_researcher") +def delete_researcher(crisalid_config_id: int, fields: dict): config = get_crisalid_config(crisalid_config_id) logger.error("receive %s for organization %s", fields, config.organization) @@ -44,7 +44,10 @@ def delete_person(crisalid_config_id: int, fields: dict): not in (Identifier.Harvester.LOCAL, Identifier.Harvester.EPPN) ] - deleted = Researcher.objects.from_identifiers(identifiers).delete() + # TODO(remi): check only one elements are deleted + pks = Researcher.objects.from_identifiers(identifiers).values_list("pk", flat=True) + deleted, _ = Researcher.objects.filter(pk__in=pks).delete() + logger.info("deleted = %s", deleted) @@ -80,5 +83,6 @@ def delete_document(crisalid_config_id: int, fields: dict): for iden in fields["recorded_by"] ] - deleted = Document.objects.from_identifiers(identifiers).delete() + pks = Document.objects.from_identifiers(identifiers).values_list("pk", flat=True) + deleted, _ = Document.objects.filter(pk__in=pks).delete() logger.info("deleted = %s", deleted) diff --git a/services/crisalid/tests/test_tasks.py b/services/crisalid/tests/test_tasks.py index ac23bc2f..94fd8805 100644 --- a/services/crisalid/tests/test_tasks.py +++ b/services/crisalid/tests/test_tasks.py @@ -1,6 +1,12 @@ from django import test -from services.crisalid.factories import CrisalidConfigFactory +from services.crisalid.factories import ( + CrisalidConfigFactory, + DocumentFactory, + ResearcherFactory, +) +from services.crisalid.models import Document, Researcher +from services.crisalid.tasks import delete_document, delete_researcher class TestCrisalidTasks(test.TestCase): @@ -8,3 +14,81 @@ class TestCrisalidTasks(test.TestCase): def setUpClass(cls): super().setUpClass() cls.config = CrisalidConfigFactory() + + def test_delete_document(self): + document = DocumentFactory() + document_2 = DocumentFactory() + + fields = { + "recorded_by": [ + { + "harvester": identifier.harvester, + "uid": identifier.value, + } + for identifier in document.identifiers.all() + ] + } + + delete_document(self.config.pk, fields) + + self.assertFalse(Document.objects.filter(pk=document.pk).exists()) + self.assertTrue(Document.objects.filter(pk=document_2.pk).exists()) + + def test_delete_document_unknow(self): + document = DocumentFactory() + + fields = { + "recorded_by": [ + { + "harvester": identifier.harvester, + "uid": identifier.value + "rand", + } + for identifier in document.identifiers.all() + ] + } + + delete_document(self.config.pk, fields) + + self.assertTrue(Document.objects.filter(pk=document.pk).exists()) + + def test_delete_researcher(self): + researcher = ResearcherFactory() + researcher_2 = ResearcherFactory() + + fields = { + "identifiers": [ + { + "type": identifier.harvester, + "value": identifier.value, + } + for identifier in researcher.identifiers.all() + ] + } + + delete_researcher(self.config.pk, fields) + + self.assertFalse(Researcher.objects.filter(pk=researcher.pk).exists()) + self.assertTrue(Researcher.objects.filter(pk=researcher_2.pk).exists()) + + def test_delete_research(self): + researcher = ResearcherFactory() + + fields = { + "identifiers": [ + { + "type": identifier.harvester, + "value": identifier.value + "rand", + } + for identifier in researcher.identifiers.all() + ] + } + + delete_researcher(self.config.pk, fields) + + self.assertTrue(Researcher.objects.filter(pk=researcher.pk).exists()) + + def test_create_document(self): + pass + + def test_create_researcher(self): + pass From 7c53e663aa7b8717f24e13a3f16bb89734f009da Mon Sep 17 00:00:00 2001 From: rgermain Date: Tue, 2 Dec 2025 12:46:48 +0100 Subject: [PATCH 12/32] cleanup all --- services/crisalid/admin.py | 55 ++++++++-- services/crisalid/bus/client.py | 20 ++-- services/crisalid/bus/consumer.py | 9 +- services/crisalid/bus/runner.py | 15 +-- services/crisalid/manager.py | 15 ++- services/crisalid/models.py | 6 +- services/crisalid/tasks.py | 7 ++ services/crisalid/tests/test_crisalid_bus.py | 22 ++-- services/crisalid/tests/test_populate.py | 61 ++++------ services/crisalid/tests/test_tasks.py | 110 ++++++++++++++++++- services/crisalid/views.py | 9 +- services/mistral/tasks.py | 31 ++++-- 12 files changed, 257 insertions(+), 103 deletions(-) diff --git a/services/crisalid/admin.py b/services/crisalid/admin.py index e5331ea1..273ccb4b 100644 --- a/services/crisalid/admin.py +++ b/services/crisalid/admin.py @@ -1,9 +1,10 @@ from contextlib import suppress -from django.contrib import admin +from django.contrib import admin, messages from django.db.models import Count from apps.accounts.models import ProjectUser +from services.crisalid.tasks import vectorize_documents from .models import ( CrisalidConfig, @@ -14,6 +15,7 @@ ) +@admin.register(Identifier) class IdentifierAdmin(admin.ModelAdmin): list_display = ("harvester", "value", "get_researcher", "get_documents") search_fields = ("harvester", "value") @@ -41,6 +43,7 @@ class DocumentContributorAdminInline(admin.StackedInline): extra = 0 +@admin.register(Document) class DocumentAdmin(admin.ModelAdmin): list_display = ( "title", @@ -60,11 +63,17 @@ class DocumentAdmin(admin.ModelAdmin): ) inlines = (DocumentContributorAdminInline,) - actions = ["vectorize"] + actions = ("vectorize",) def vectorize(self, request, queryset): - for document in queryset: - document.vectorize() + # run vecotrize async in celery + documents_pks = list(queryset.values_list("pk", flat=True)) + vectorize_documents.apply_async((documents_pks,)) + messages.add_message( + request, + messages.INFO, + f"Vecotrize Task created for {len(documents_pks)} documents", + ) def get_queryset(self, request): return ( @@ -88,6 +97,7 @@ def get_identifiers(self, instance): return f"{', '.join(result)} ({len(result)})" +@admin.register(Researcher) class ResearcherAdmin(admin.ModelAdmin): list_display = ( "given_name", @@ -119,6 +129,7 @@ def get_queryset(self, request): def assign_user(self, request, queryset): """Assign research to user if matching user/eppn""" researcher_updated = [] + created = assigned = notfound = 0 for research in queryset.prefetch_related("identifiers").select_related("user"): # already set @@ -134,18 +145,33 @@ def assign_user(self, request, queryset): user = ProjectUser.objects.get(email=identifier.value) if not user: + created += 1 user = ProjectUser( email=identifier.value, given_name=research.given_name, family_name=research.family_name, ) user.save() + else: + assigned += 1 research.user = user researcher_updated.append(research) + break + else: + notfound += 1 Researcher.objects.bulk_update(researcher_updated, fields=["user"]) + if created: + messages.add_message(request, messages.INFO, f"Create {created} user.") + if assigned: + messages.add_message(request, messages.INFO, f"Assign {assigned} user.") + if notfound: + messages.add_message( + request, messages.ERROR, f"Can't found {notfound} user with eppn." + ) + @admin.display(description="documents count", ordering="documents_count") def get_documents(self, instance): return instance.documents_count @@ -160,30 +186,39 @@ def get_identifiers(self, instance): return f"{', '.join(result)} ({len(result)})" +@admin.register(CrisalidConfig) class CrisalidConfigAdmin(admin.ModelAdmin): list_display = ("organization", "active") search_fields = ("organization__code", "active") autocomplete_fields = ("organization",) - actions = ["active_connections", "deactive_connections"] + actions = ("active_connections", "deactive_connections") @admin.action(description="run/reload crisalidbus connections") def active_connections(self, request, queryset): """method to change/run crisalidbus listener""" # we don't update directly queryset for signals dispatch + total = queryset.count() for obj in queryset: obj.active = True obj.save() + messages.add_message( + request, + messages.INFO, + f"CrisalidBus listener started or reloaded ({total}).", + ) + @admin.action(description="stop crisalidbus connections") def deactive_connections(self, request, queryset): """method to change/stop crisalidbus listener""" # we don't update directly queryset for signals dispatch + total = queryset.count() for obj in queryset: obj.active = False obj.save() - -admin.site.register(CrisalidConfig, CrisalidConfigAdmin) -admin.site.register(Researcher, ResearcherAdmin) -admin.site.register(Identifier, IdentifierAdmin) -admin.site.register(Document, DocumentAdmin) + messages.add_message( + request, + messages.INFO, + f"CrisalidBus listener stoped ({total}).", + ) diff --git a/services/crisalid/bus/client.py b/services/crisalid/bus/client.py index 16a8b448..08caf3fa 100644 --- a/services/crisalid/bus/client.py +++ b/services/crisalid/bus/client.py @@ -31,7 +31,7 @@ class CrisalidBusClient: def __init__(self, config: CrisalidConfig): self.config = config - self.conn: pika.BlockingConnection | None = None + self._conn: pika.BlockingConnection | None = None self._channel = pika.channel.Channel self.logger = logging.getLogger(config.organization.code) self._stop_event: Event | None = None @@ -61,7 +61,7 @@ def parameters(self) -> dict | None: return parameters def connect(self): - assert self.conn is None, "rabimqt is already started" + assert self._conn is None, "rabimqt is already started" parameters = self.parameters() if not parameters: @@ -82,7 +82,7 @@ def connect(self): parameters["user"], parameters["password"] ) - self.conn = pika.BlockingConnection( + self._conn = pika.BlockingConnection( pika.ConnectionParameters( host=parameters["host"], port=parameters["port"], @@ -90,12 +90,14 @@ def connect(self): virtual_host="/", ), ) - self._channel = self.conn.channel() + self._channel = self._conn.channel() exchange = self.CRISALID_EXCHANGE self._channel.exchange_declare( exchange=exchange, exchange_type="topic", durable=True ) - queue_name = f"projects-backend.{exchange}" + + # queue name in rabitmq + queue_name = f"projects-backend.{self.config.organization.code.lower()}.{exchange}" self._channel.queue_declare(queue=queue_name, exclusive=True) for routing_key in self.CRISALID_ROUTING_KEYS: self._channel.queue_bind( @@ -109,7 +111,7 @@ def connect(self): self.logger.info("Start channel Consuming") while not self._stop_event.is_set(): - self.conn.process_data_events(time_limit=1) + self._conn.process_data_events(time_limit=1) except pika.exceptions.ConnectionClosedByBroker: self.logger.error("Connection closed by crisalid broker") @@ -136,9 +138,9 @@ def _disconnect(self): self._channel.close() self._channel = None - if self.conn is not None: - self.conn.close() - self.conn = None + if self._conn is not None: + self._conn.close() + self._conn = None self.logger.info("CrisalidBus connection closed") diff --git a/services/crisalid/bus/consumer.py b/services/crisalid/bus/consumer.py index e16095de..c6a79672 100644 --- a/services/crisalid/bus/consumer.py +++ b/services/crisalid/bus/consumer.py @@ -14,11 +14,12 @@ class CrisalidConsumer: """class to register callback on rabitmqt event""" def __init__(self): + # initial cosumer dict self.clean() def clean(self): """remove all registered callback""" - self._consumer: dict[CrisalidTypeEnum, dict[CrisalidEventEnum, Callable]] = ( + self._consumers: dict[CrisalidTypeEnum, dict[CrisalidEventEnum, Callable]] = ( defaultdict(lambda: defaultdict(lambda: None)) ) @@ -29,15 +30,15 @@ def add_callback( callback: Callable, ): assert ( - crisalid_event.value not in self._consumer[crisalid_type.value] + crisalid_event.value not in self._consumers[crisalid_type.value] ), f"Event {crisalid_type}::{crisalid_event}, is already set" # add callback - self._consumer[crisalid_type.value][crisalid_event.value] = callback + self._consumers[crisalid_type.value][crisalid_event.value] = callback return callback def __getitem__(self, key): - return self._consumer[key] + return self._consumers[key] crisalid_consumer = CrisalidConsumer() diff --git a/services/crisalid/bus/runner.py b/services/crisalid/bus/runner.py index ca85cb5c..a3cc8c2b 100644 --- a/services/crisalid/bus/runner.py +++ b/services/crisalid/bus/runner.py @@ -40,12 +40,13 @@ def stop(self): self.thread = None -organization_maps: dict[str, OrganizationClient] = {} +# dict registered client/thread by organization code +CLIENTS_ORGA_MAPS: dict[str, OrganizationClient] = {} def start_crisalidbus(config: CrisalidConfig): with rlock: - client = organization_maps.get(config.organization.code) + client = CLIENTS_ORGA_MAPS.get(config.organization.code) if client is not None: stop_crisalidbus(client.config) @@ -54,19 +55,19 @@ def start_crisalidbus(config: CrisalidConfig): ), f"can't instanciate crisalidBus for {config.organization.code=}, active=False" client = OrganizationClient(config) - organization_maps[config.organization.code] = client + CLIENTS_ORGA_MAPS[config.organization.code] = client client.start() def stop_crisalidbus(config: CrisalidConfig): with rlock: - if config.organization.code not in organization_maps: + if config.organization.code not in CLIENTS_ORGA_MAPS: return - client = organization_maps[config.organization.code] + client = CLIENTS_ORGA_MAPS[config.organization.code] client.config = config client.stop() - del organization_maps[config.organization.code] + del CLIENTS_ORGA_MAPS[config.organization.code] def initial_start_crisalidbus(): @@ -80,5 +81,5 @@ def initial_start_crisalidbus(): @atexit.register def _stop_all_crisalid(): with rlock: - for client in list(organization_maps.values()): + for client in list(CLIENTS_ORGA_MAPS.values()): stop_crisalidbus(client.config) diff --git a/services/crisalid/manager.py b/services/crisalid/manager.py index 544398ce..25012b2e 100644 --- a/services/crisalid/manager.py +++ b/services/crisalid/manager.py @@ -3,7 +3,20 @@ class CrisalidQuerySet(QuerySet): def from_identifiers(self, identifiers: list, distinct=True): - """filter by identifiers""" + """filter by identifiers + identifiers is a list of: + - int: pk of the identifier + - Identifier: Identifier object + - dict: dict object with value/harvester key + + example: + Document.objects.form_identifiers([ + Identifier(value="jhon.carter@mars.space", harvester="eppn"), + 45, + {"harvester": "idref", "value": "4855993"} + ]) + + """ from services.crisalid.models import Identifier pks = set() diff --git a/services/crisalid/models.py b/services/crisalid/models.py index 66c79263..f7ce535b 100644 --- a/services/crisalid/models.py +++ b/services/crisalid/models.py @@ -100,7 +100,7 @@ def __str__(self): return self.get_full_name() def get_full_name(self): - return f"{self.given_name.capitalize()} {self.family_name.capitalize()}".strip() + return f"{self.given_name.capitalize()} {self.family_name.capitalize()}" class DocumentContributor(models.Model): @@ -292,7 +292,9 @@ class CrisalidConfig(OrganizationRelated, models.Model): max_length=255, help_text="crisalidbus/rabimqt password" ) - apollo_url = models.CharField(max_length=255, help_text="apollo/graphql host:port") + apollo_url = models.CharField( + max_length=255, help_text="apollo/graphql host:port/graphql" + ) apollo_token = models.CharField(max_length=255, help_text="apollo token") active = models.BooleanField(help_text="config is enabled/disabled", default=False) diff --git a/services/crisalid/tasks.py b/services/crisalid/tasks.py index a1d1d043..b3d7226a 100644 --- a/services/crisalid/tasks.py +++ b/services/crisalid/tasks.py @@ -86,3 +86,10 @@ def delete_document(crisalid_config_id: int, fields: dict): pks = Document.objects.from_identifiers(identifiers).values_list("pk", flat=True) deleted, _ = Document.objects.filter(pk__in=pks).delete() logger.info("deleted = %s", deleted) + + +@app.task(name="Vectorize documents") +def vectorize_documents(documents_pks: list[int]): + for obj in Document.objects.filter(pk__in=documents_pks): + logger.debug("vectorize document=%s", obj) + obj.vectorize() diff --git a/services/crisalid/tests/test_crisalid_bus.py b/services/crisalid/tests/test_crisalid_bus.py index 457cc8f8..c42383d3 100644 --- a/services/crisalid/tests/test_crisalid_bus.py +++ b/services/crisalid/tests/test_crisalid_bus.py @@ -6,7 +6,7 @@ from services.crisalid.bus.client import CrisalidBusClient from services.crisalid.bus.constant import CrisalidEventEnum, CrisalidTypeEnum from services.crisalid.bus.consumer import crisalid_consumer -from services.crisalid.bus.runner import _stop_all_crisalid, organization_maps +from services.crisalid.bus.runner import CLIENTS_ORGA_MAPS, _stop_all_crisalid from services.crisalid.factories import CrisalidConfigFactory @@ -139,13 +139,13 @@ def setUp(self) -> None: def test_start_signals(self, clientbus_mock, thread_mock): # not crisalidbus loaded - self.assertEqual(organization_maps, {}) + self.assertEqual(CLIENTS_ORGA_MAPS, {}) self.config.active = True self.config.save() - self.assertIn(self.config.organization.code, organization_maps) - client: Mock = organization_maps[self.config.organization.code] + self.assertIn(self.config.organization.code, CLIENTS_ORGA_MAPS) + client: Mock = CLIENTS_ORGA_MAPS[self.config.organization.code] self.assertIsNotNone(client) clientbus_mock.assert_called_once_with(self.config) @@ -156,10 +156,10 @@ def test_stop_signals(self, clientbus_mock, thread_mock): self.config.active = True self.config.save() - self.assertIn(self.config.organization.code, organization_maps) + self.assertIn(self.config.organization.code, CLIENTS_ORGA_MAPS) self.config.active = False self.config.save() - self.assertNotIn(self.config.organization.code, organization_maps) + self.assertNotIn(self.config.organization.code, CLIENTS_ORGA_MAPS) clientbus_mock().stop.assert_called_once() thread_mock.Thread().join.assert_called_once() @@ -168,20 +168,20 @@ def test_stop_signals_disable(self, clientbus_mock, thread_mock): self.config.active = False self.config.save() - self.assertNotIn(self.config.organization.code, organization_maps) + self.assertNotIn(self.config.organization.code, CLIENTS_ORGA_MAPS) def test_start_signals_active(self, clientbus_mock, thread_mock): self.config.active = True self.config.save() - self.assertIn(self.config.organization.code, organization_maps) + self.assertIn(self.config.organization.code, CLIENTS_ORGA_MAPS) self.config.active = True self.config.save() - self.assertIn(self.config.organization.code, organization_maps) + self.assertIn(self.config.organization.code, CLIENTS_ORGA_MAPS) def test_delte_signals_active(self, clientbus_mock, thread_mock): config = CrisalidConfigFactory(active=True) - self.assertIn(config.organization.code, organization_maps) + self.assertIn(config.organization.code, CLIENTS_ORGA_MAPS) config.delete() - self.assertNotIn(config.organization.code, organization_maps) + self.assertNotIn(config.organization.code, CLIENTS_ORGA_MAPS) diff --git a/services/crisalid/tests/test_populate.py b/services/crisalid/tests/test_populate.py index 8662c797..408b3853 100644 --- a/services/crisalid/tests/test_populate.py +++ b/services/crisalid/tests/test_populate.py @@ -14,9 +14,9 @@ class TestPopulateResearcher(test.TestCase): def setUpClass(cls): super().setUpClass() cls.config = CrisalidConfigFactory() + cls.popu = PopulateResearcher(cls.config) def test_create_researcher(self): - popu = PopulateResearcher(self.config) data = { "uid": "05-11-1995-uuid", "names": [ @@ -30,7 +30,7 @@ def test_create_researcher(self): ], } - new_obj = popu.single(data) + new_obj = self.popu.single(data) # check obj from db obj = Researcher.objects.first() @@ -44,7 +44,6 @@ def test_create_researcher(self): self.assertEqual(iden.harvester, Identifier.Harvester.HAL.value) def test_create_researcher_whithout_identifiers(self): - popu = PopulateResearcher(self.config) data = { "uid": "05-11-1995-uuid", "names": [ @@ -56,7 +55,7 @@ def test_create_researcher_whithout_identifiers(self): "identifiers": [], } - new_obj = popu.single(data) + new_obj = self.popu.single(data) self.assertIsNone(new_obj) self.assertEqual(Researcher.objects.count(), 0) @@ -81,9 +80,7 @@ def test_no_change_researcher(self): ) researcher.identifiers.add(iden) - popu = PopulateResearcher(self.config) - - new_obj = popu.single(data) + new_obj = self.popu.single(data) # check no new object are created self.assertEqual(Researcher.objects.count(), 1) @@ -120,8 +117,7 @@ def test_update_identifiers(self): data["identifiers"].append( {"value": "000-666-999", "type": Identifier.Harvester.ORCID.value} ) - popu = PopulateResearcher(self.config) - popu.single(data) + self.popu.single(data) # check no new object are created self.assertEqual(Researcher.objects.count(), 1) @@ -147,8 +143,7 @@ def test_create_user_researcher(self): {"value": "eppn@lpi.com", "type": Identifier.Harvester.EPPN.value}, ], } - popu = PopulateResearcher(self.config) - popu.single(data) + self.popu.single(data) user = ProjectUser.objects.first() # check no new object are created @@ -181,8 +176,7 @@ def test_match_user_researcher(self): PrivacySettings.PrivacyChoices.PUBLIC.value, ) - popu = PopulateResearcher(self.config) - popu.single(data) + self.popu.single(data) researcher = Researcher.objects.select_related("user").first() self.assertEqual(researcher.user, user) @@ -202,9 +196,9 @@ class TestPopulateDocument(test.TestCase): def setUpClass(cls): super().setUpClass() cls.config = CrisalidConfigFactory() + cls.popu = PopulateDocument(cls.config) def test_create_publication(self): - popu = PopulateDocument(self.config) data = { "uid": "05-11-1995-uuid", "document_type": None, @@ -249,7 +243,7 @@ def test_create_publication(self): ], } - new_obj = popu.single(data) + new_obj = self.popu.single(data) # check obj from db obj = Document.objects.first() @@ -263,7 +257,6 @@ def test_create_publication(self): self.assertEqual(iden.harvester, Identifier.Harvester.HAL.value) def test_create_document_whitout_identifiers(self): - popu = PopulateDocument(self.config) data = { "uid": "05-11-1995-uuid", "document_type": None, @@ -302,38 +295,34 @@ def test_create_document_whitout_identifiers(self): "recorded_by": [], } - new_obj = popu.single(data) + new_obj = self.popu.single(data) # check obj from db self.assertIsNone(new_obj) self.assertEqual(Document.objects.count(), 0) def test_sanitize_date(self): - popu = PopulateDocument(self.config) - self.assertEqual( - popu.sanitize_date("1999"), datetime.datetime(1999, 1, 1).date() + self.popu.sanitize_date("1999"), datetime.datetime(1999, 1, 1).date() ) self.assertEqual( - popu.sanitize_date("1999-05"), datetime.datetime(1999, 5, 1).date() + self.popu.sanitize_date("1999-05"), datetime.datetime(1999, 5, 1).date() ) self.assertEqual( - popu.sanitize_date("1999-05-11"), datetime.datetime(1999, 5, 11).date() + self.popu.sanitize_date("1999-05-11"), datetime.datetime(1999, 5, 11).date() ) - self.assertEqual(popu.sanitize_date(""), None) - self.assertEqual(popu.sanitize_date(None), None) - self.assertEqual(popu.sanitize_date("invalidDate"), None) + self.assertEqual(self.popu.sanitize_date(""), None) + self.assertEqual(self.popu.sanitize_date(None), None) + self.assertEqual(self.popu.sanitize_date("invalidDate"), None) def test_sanitize_titles(self): - popu = PopulateDocument(self.config) - - self.assertEqual(popu.sanitize_languages([]), "") + self.assertEqual(self.popu.sanitize_languages([]), "") self.assertEqual( - popu.sanitize_languages([{"language": "en", "value": "en-title"}]), + self.popu.sanitize_languages([{"language": "en", "value": "en-title"}]), "en-title", ) self.assertEqual( - popu.sanitize_languages( + self.popu.sanitize_languages( [ {"language": "en", "value": "en-title"}, {"language": "fr", "value": "fr-title"}, @@ -342,7 +331,7 @@ def test_sanitize_titles(self): "en-title", ) self.assertEqual( - popu.sanitize_languages( + self.popu.sanitize_languages( [ {"language": "es", "value": "es-title"}, {"language": "fr", "value": "fr-title"}, @@ -351,23 +340,21 @@ def test_sanitize_titles(self): "fr-title", ) self.assertEqual( - popu.sanitize_languages([{"language": "es", "value": "es-title"}]), + self.popu.sanitize_languages([{"language": "es", "value": "es-title"}]), "es-title", ) def test_sanitize_document_type(self): - popu = PopulateDocument(self.config) - self.assertEqual( - popu.sanitize_document_type(None), + self.popu.sanitize_document_type(None), Document.DocumentType.UNKNOWN.value, ) self.assertEqual( - popu.sanitize_document_type("invalid-Document-type"), + self.popu.sanitize_document_type("invalid-Document-type"), Document.DocumentType.UNKNOWN.value, ) self.assertEqual( - popu.sanitize_document_type( + self.popu.sanitize_document_type( Document.DocumentType.AUDIOVISUAL_DOCUMENT.value ), Document.DocumentType.AUDIOVISUAL_DOCUMENT.value, diff --git a/services/crisalid/tests/test_tasks.py b/services/crisalid/tests/test_tasks.py index 94fd8805..a0faf6e3 100644 --- a/services/crisalid/tests/test_tasks.py +++ b/services/crisalid/tests/test_tasks.py @@ -1,12 +1,20 @@ +from unittest.mock import patch + from django import test from services.crisalid.factories import ( CrisalidConfigFactory, DocumentFactory, ResearcherFactory, + faker, +) +from services.crisalid.models import Document, Identifier, Researcher +from services.crisalid.tasks import ( + create_document, + create_researcher, + delete_document, + delete_researcher, ) -from services.crisalid.models import Document, Researcher -from services.crisalid.tasks import delete_document, delete_researcher class TestCrisalidTasks(test.TestCase): @@ -87,8 +95,98 @@ def test_delete_research(self): self.assertTrue(Researcher.objects.filter(pk=researcher.pk).exists()) - def test_create_document(self): - pass - def test_create_researcher(self): - pass + # other check/tests in test_views.py + fields = { + "uid": "05-11-1995-uuid", + "names": [ + { + "first_names": [{"value": "marty", "language": "fr"}], + "last_names": [{"value": "mcfly", "language": "fr"}], + } + ], + "identifiers": [ + {"value": "hals-truc", "type": Identifier.Harvester.HAL.value} + ], + } + + create_researcher(self.config.pk, fields) + + # check obj from db + obj = Researcher.objects.first() + + self.assertEqual(obj.given_name, "marty") + self.assertEqual(obj.family_name, "mcfly") + self.assertEqual(obj.identifiers.count(), 1) + iden = obj.identifiers.first() + self.assertEqual(iden.value, "hals-truc") + self.assertEqual(iden.harvester, Identifier.Harvester.HAL.value) + + @patch("services.crisalid.interface.Client") + def test_create_document_empty(self, client_gql): + fields = {"uid": faker.uuid4()} + client_gql().execute.return_value = {"documents": []} + + self.assertEqual(Document.objects.count(), 0) + create_document(self.config.pk, fields) + self.assertEqual(Document.objects.count(), 0) + + @patch("services.crisalid.interface.Client") + def test_create_document(self, client_gql): + fields = {"uid": faker.uuid4()} + data = { + "uid": "05-11-1995-uuid", + "document_type": None, + "titles": [ + {"language": "en", "value": "fiction"}, + ], + "abstracts": [ + {"language": "en", "value": "description"}, + ], + "publication_date": "1999", + "has_contributions": [ + { + "roles": ["http://id.loc.gov/vocabulary/relators/aut"], + "contributor": [ + { + "uid": "local-v9034", + "names": [ + { + "first_names": [ + {"value": "Marty", "language": "fr"} + ], + "last_names": [ + {"value": "Mcfly", "language": "fr"} + ], + } + ], + "identifiers": [ + {"type": "eppn", "value": "marty.mcfly@non-de-zeus.fr"}, + {"type": "idref", "value": "4545454545454"}, + {"type": "local", "value": "v55555"}, + ], + } + ], + } + ], + "recorded_by": [ + { + "uid": "hals-truc", + "harvester": Identifier.Harvester.HAL.value, + "value": "", + } + ], + } + + client_gql().execute.return_value = {"documents": [data]} + + create_document(self.config.pk, fields) + # check obj from db + obj = Document.objects.first() + + self.assertEqual(obj.title, "fiction") + self.assertEqual(obj.identifiers.count(), 1) + self.assertEqual(obj.document_type, Document.DocumentType.UNKNOWN.value) + iden = obj.identifiers.first() + self.assertEqual(iden.value, "hals-truc") + self.assertEqual(iden.harvester, Identifier.Harvester.HAL.value) diff --git a/services/crisalid/views.py b/services/crisalid/views.py index d1af91d6..5f450fbf 100644 --- a/services/crisalid/views.py +++ b/services/crisalid/views.py @@ -318,11 +318,12 @@ def search(self, request, *args, **kwargs): """Method to search researchers by harvester type and multiple harvesters value""" qs = self.get_queryset() - harvester_values = request.query_params.get("values").split(",") harvester = request.query_params.get("harvester") - qs = qs.filter( - identifiers__harvester=harvester, identifiers__value__in=harvester_values - ) + harvester_values = request.query_params.get("values").split(",") + identifiers = [ + {"harvester": harvester, "value": value} for value in harvester_values + ] + qs = qs.from_identifiers(identifiers) queryset_page = self.paginate_queryset(qs) data = self.serializer_class( diff --git a/services/mistral/tasks.py b/services/mistral/tasks.py index f2eceaf4..60b794c0 100644 --- a/services/mistral/tasks.py +++ b/services/mistral/tasks.py @@ -1,27 +1,34 @@ import logging -from apps.accounts.models import ProjectUser from apps.commons.utils import clear_memory -from apps.projects.models import Project from projects.celery import app -from .models import ProjectEmbedding, UserEmbedding +from .models import DocumentEmbedding, MistralEmbedding, ProjectEmbedding, UserEmbedding logger = logging.getLogger(__name__) @app.task(name="services.mistral.tasks.vectorize_updated_objects") -@clear_memory def vectorize_updated_objects(): _vectorize_updated_objects() -def _vectorize_updated_objects(): - projects = Project.objects.all() - users = ProjectUser.objects.all() - for project in projects: - embedding, _ = ProjectEmbedding.objects.get_or_create(item=project) - embedding.vectorize() - for user in users: - embedding, _ = UserEmbedding.objects.get_or_create(item=user) +@clear_memory +def _vectorize_objects(model_embedding: MistralEmbedding): + related_model = model_embedding.item.field.related_model + related_query_name = model_embedding.item.field.related_query_name() + + for obj in ( + related_model.objects.select_related(related_query_name).all().iterator() + ): + embedding = getattr(obj, related_query_name, None) + # embedding not exists + if embedding is None: + embedding = model_embedding(item=obj) embedding.vectorize() + + +def _vectorize_updated_objects(): + _vectorize_objects(ProjectEmbedding) + _vectorize_objects(UserEmbedding) + _vectorize_objects(DocumentEmbedding) From 6b654ceda338ea261acb2990e06ce907f9ed2266 Mon Sep 17 00:00:00 2001 From: rgermain Date: Tue, 2 Dec 2025 13:01:20 +0100 Subject: [PATCH 13/32] cleanup migrations --- .../migrations/0002_crisalidconfig.py | 66 -------- .../0002_crisalidconfig_and_more.py | 152 ++++++++++++++++++ ...ter_crisalidconfig_apollo_host_and_more.py | 23 --- ...ove_crisalidconfig_apollo_host_and_more.py | 37 ----- ...d_document_unique_crisalid_uid_and_more.py | 53 ------ services/crisalid/models.py | 5 +- 6 files changed, 155 insertions(+), 181 deletions(-) delete mode 100644 services/crisalid/migrations/0002_crisalidconfig.py create mode 100644 services/crisalid/migrations/0002_crisalidconfig_and_more.py delete mode 100644 services/crisalid/migrations/0003_alter_crisalidconfig_apollo_host_and_more.py delete mode 100644 services/crisalid/migrations/0004_remove_crisalidconfig_apollo_host_and_more.py delete mode 100644 services/crisalid/migrations/0005_remove_document_crisalid_document_unique_crisalid_uid_and_more.py diff --git a/services/crisalid/migrations/0002_crisalidconfig.py b/services/crisalid/migrations/0002_crisalidconfig.py deleted file mode 100644 index 69c48b9b..00000000 --- a/services/crisalid/migrations/0002_crisalidconfig.py +++ /dev/null @@ -1,66 +0,0 @@ -# Generated by Django 4.2.25 on 2025-11-27 10:37 - -import apps.commons.mixins -from django.db import migrations, models -import django.db.models.deletion - - -class Migration(migrations.Migration): - - dependencies = [ - ("organizations", "0003_initial"), - ("crisalid", "0001_initial"), - ] - - operations = [ - migrations.CreateModel( - name="CrisalidConfig", - fields=[ - ( - "id", - models.BigAutoField( - auto_created=True, - primary_key=True, - serialize=False, - verbose_name="ID", - ), - ), - ( - "crisalidbus_host", - models.URLField(help_text="crisalidbus/rabimqt host"), - ), - ( - "crisalidbus_username", - models.CharField( - help_text="crisalidbus/rabimqt username", max_length=255 - ), - ), - ( - "crisalidbus_password", - models.CharField( - help_text="crisalidbus/rabimqt password", max_length=255 - ), - ), - ("apollo_host", models.URLField(help_text="apollo/graphql host")), - ( - "apollo_token", - models.CharField(help_text="apollo token", max_length=255), - ), - ( - "active", - models.BooleanField( - default=False, help_text="config is enabled/disabled" - ), - ), - ( - "organization", - models.OneToOneField( - on_delete=django.db.models.deletion.CASCADE, - related_name="crisalid", - to="organizations.organization", - ), - ), - ], - bases=(apps.commons.mixins.OrganizationRelated, models.Model), - ), - ] diff --git a/services/crisalid/migrations/0002_crisalidconfig_and_more.py b/services/crisalid/migrations/0002_crisalidconfig_and_more.py new file mode 100644 index 00000000..7b9b343f --- /dev/null +++ b/services/crisalid/migrations/0002_crisalidconfig_and_more.py @@ -0,0 +1,152 @@ +# Generated by Django 4.2.25 on 2025-12-02 11:52 + +import apps.commons.mixins +import django.db.models.deletion +import django.db.models.functions.text +from django.db import migrations, models + + +def prepare_display_name(apps, schema_editor): + """split display name to given_name/familly_name""" + Researcher = apps.get_model("crisalid", "Researcher") + db_alias = schema_editor.connection.alias + + to_update = [] + for research in Researcher.objects.using(db_alias).all(): + given_name, family_name = "", "" + splitter = research.display_name.split(" ", 1) + if len(splitter) >= 1: + given_name = splitter[0] + if len(splitter) >= 2: + family_name = " ".join(splitter[1:]) + + research.given_name = given_name + research.family_name = family_name + to_update.append(research) + + Researcher.objects.using(db_alias).bulk_update( + to_update, fields=("given_name", "family_name") + ) + + +class Migration(migrations.Migration): + + dependencies = [ + ("organizations", "0003_initial"), + ("crisalid", "0001_initial"), + ] + + operations = [ + migrations.CreateModel( + name="CrisalidConfig", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "crisalidbus_url", + models.CharField( + help_text="crisalidbus/rabimqt host:port", max_length=255 + ), + ), + ( + "crisalidbus_username", + models.CharField( + help_text="crisalidbus/rabimqt username", max_length=255 + ), + ), + ( + "crisalidbus_password", + models.CharField( + help_text="crisalidbus/rabimqt password", max_length=255 + ), + ), + ( + "apollo_url", + models.CharField( + help_text="apollo/graphql host:port/graphql", max_length=255 + ), + ), + ( + "apollo_token", + models.CharField(help_text="apollo token", max_length=255), + ), + ( + "active", + models.BooleanField( + default=False, help_text="config is enabled/disabled" + ), + ), + ], + bases=(apps.commons.mixins.OrganizationRelated, models.Model), + ), + migrations.RemoveConstraint( + model_name="document", + name="crisalid_document_unique_crisalid_uid", + ), + migrations.RemoveConstraint( + model_name="identifier", + name="unique_harvester", + ), + migrations.RemoveConstraint( + model_name="researcher", + name="crisalid_researcher_unique_crisalid_uid", + ), + migrations.RemoveField( + model_name="document", + name="crisalid_uid", + ), + migrations.RemoveField( + model_name="researcher", + name="crisalid_uid", + ), + migrations.AddField( + model_name="document", + name="updated", + field=models.DateTimeField(auto_created=True, auto_now=True), + ), + migrations.AddField( + model_name="researcher", + name="family_name", + field=models.CharField(blank=True, max_length=255), + ), + migrations.AddField( + model_name="researcher", + name="given_name", + field=models.CharField(blank=True, max_length=255), + ), + migrations.RunPython(prepare_display_name), + migrations.RemoveField( + model_name="researcher", + name="display_name", + ), + migrations.AddField( + model_name="researcher", + name="updated", + field=models.DateTimeField(auto_created=True, auto_now=True), + ), + migrations.AddConstraint( + model_name="identifier", + constraint=models.UniqueConstraint( + django.db.models.functions.text.Lower("harvester"), + django.db.models.functions.text.Lower("value"), + condition=models.Q(("harvester", "local"), _negated=True), + name="unique_harvester", + ), + ), + migrations.AddField( + model_name="crisalidconfig", + name="organization", + field=models.OneToOneField( + on_delete=django.db.models.deletion.CASCADE, + related_name="crisalid", + to="organizations.organization", + ), + ), + ] diff --git a/services/crisalid/migrations/0003_alter_crisalidconfig_apollo_host_and_more.py b/services/crisalid/migrations/0003_alter_crisalidconfig_apollo_host_and_more.py deleted file mode 100644 index 8096bf84..00000000 --- a/services/crisalid/migrations/0003_alter_crisalidconfig_apollo_host_and_more.py +++ /dev/null @@ -1,23 +0,0 @@ -# Generated by Django 4.2.25 on 2025-11-27 12:20 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ("crisalid", "0002_crisalidconfig"), - ] - - operations = [ - migrations.AlterField( - model_name="crisalidconfig", - name="apollo_host", - field=models.URLField(help_text="apollo/graphql host:port"), - ), - migrations.AlterField( - model_name="crisalidconfig", - name="crisalidbus_host", - field=models.URLField(help_text="crisalidbus/rabimqt host:port"), - ), - ] diff --git a/services/crisalid/migrations/0004_remove_crisalidconfig_apollo_host_and_more.py b/services/crisalid/migrations/0004_remove_crisalidconfig_apollo_host_and_more.py deleted file mode 100644 index a53d4534..00000000 --- a/services/crisalid/migrations/0004_remove_crisalidconfig_apollo_host_and_more.py +++ /dev/null @@ -1,37 +0,0 @@ -# Generated by Django 4.2.25 on 2025-11-28 08:55 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ("crisalid", "0003_alter_crisalidconfig_apollo_host_and_more"), - ] - - operations = [ - migrations.RemoveField( - model_name="crisalidconfig", - name="apollo_host", - ), - migrations.RemoveField( - model_name="crisalidconfig", - name="crisalidbus_host", - ), - migrations.AddField( - model_name="crisalidconfig", - name="apollo_url", - field=models.CharField( - default="", help_text="apollo/graphql host:port", max_length=255 - ), - preserve_default=False, - ), - migrations.AddField( - model_name="crisalidconfig", - name="crisalidbus_url", - field=models.CharField( - default="", help_text="crisalidbus/rabimqt host:port", max_length=255 - ), - preserve_default=False, - ), - ] diff --git a/services/crisalid/migrations/0005_remove_document_crisalid_document_unique_crisalid_uid_and_more.py b/services/crisalid/migrations/0005_remove_document_crisalid_document_unique_crisalid_uid_and_more.py deleted file mode 100644 index d7b214bc..00000000 --- a/services/crisalid/migrations/0005_remove_document_crisalid_document_unique_crisalid_uid_and_more.py +++ /dev/null @@ -1,53 +0,0 @@ -# Generated by Django 4.2.25 on 2025-11-28 10:16 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ("crisalid", "0004_remove_crisalidconfig_apollo_host_and_more"), - ] - - operations = [ - migrations.RemoveConstraint( - model_name="document", - name="crisalid_document_unique_crisalid_uid", - ), - migrations.RemoveConstraint( - model_name="researcher", - name="crisalid_researcher_unique_crisalid_uid", - ), - migrations.RemoveField( - model_name="document", - name="crisalid_uid", - ), - migrations.RemoveField( - model_name="researcher", - name="crisalid_uid", - ), - migrations.RemoveField( - model_name="researcher", - name="display_name", - ), - migrations.AddField( - model_name="document", - name="updated", - field=models.DateTimeField(auto_created=True, auto_now=True), - ), - migrations.AddField( - model_name="researcher", - name="family_name", - field=models.CharField(blank=True, max_length=255), - ), - migrations.AddField( - model_name="researcher", - name="given_name", - field=models.CharField(blank=True, max_length=255), - ), - migrations.AddField( - model_name="researcher", - name="updated", - field=models.DateTimeField(auto_created=True, auto_now=True), - ), - ] diff --git a/services/crisalid/models.py b/services/crisalid/models.py index f7ce535b..3c9d96ff 100644 --- a/services/crisalid/models.py +++ b/services/crisalid/models.py @@ -97,9 +97,10 @@ class Researcher(CrisalidDataModel): def __str__(self): if hasattr(self, "user") and self.user is not None: return self.user.get_full_name() - return self.get_full_name() + return self.display_name - def get_full_name(self): + @property + def display_name(self): return f"{self.given_name.capitalize()} {self.family_name.capitalize()}" From 1abe97c08eb682819e92f61d7e9fe8ba98a85d5f Mon Sep 17 00:00:00 2001 From: rgermain Date: Tue, 2 Dec 2025 13:51:54 +0100 Subject: [PATCH 14/32] tests: fix invalid pk --- services/crisalid/bus/consumer.py | 4 ++-- services/crisalid/tests/test_crisalid_bus.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/services/crisalid/bus/consumer.py b/services/crisalid/bus/consumer.py index c6a79672..da1d9829 100644 --- a/services/crisalid/bus/consumer.py +++ b/services/crisalid/bus/consumer.py @@ -15,9 +15,9 @@ class CrisalidConsumer: def __init__(self): # initial cosumer dict - self.clean() + self.clear() - def clean(self): + def clear(self): """remove all registered callback""" self._consumers: dict[CrisalidTypeEnum, dict[CrisalidEventEnum, Callable]] = ( defaultdict(lambda: defaultdict(lambda: None)) diff --git a/services/crisalid/tests/test_crisalid_bus.py b/services/crisalid/tests/test_crisalid_bus.py index c42383d3..8c7ca6b8 100644 --- a/services/crisalid/tests/test_crisalid_bus.py +++ b/services/crisalid/tests/test_crisalid_bus.py @@ -30,7 +30,7 @@ def setUpClass(cls): def setUp(self): self.client = CrisalidBusClient(self.config) - crisalid_consumer.clean() + crisalid_consumer.clear() def test_dispatch_no_callback(self): # this run withtout called any callback @@ -47,7 +47,7 @@ def test_dispatch_with_callback(self): # normaly is called callback.assert_called_once_with( - self.config.organization.pk, json.loads(self.payload)["fields"] + self.config.pk, json.loads(self.payload)["fields"] ) def test_add_callback(self): From 9c987802ab5cbcea3fb5853e156388c92d5f0135 Mon Sep 17 00:00:00 2001 From: rgermain Date: Mon, 8 Dec 2025 11:42:13 +0100 Subject: [PATCH 15/32] fix: permissions researcher --- apps/commons/permissions.py | 10 ---------- .../crisalid/management/commands/csv_crisalid.py | 2 +- services/crisalid/tests/test_views.py | 12 ++++++++++-- services/crisalid/views.py | 9 +++------ 4 files changed, 14 insertions(+), 19 deletions(-) diff --git a/apps/commons/permissions.py b/apps/commons/permissions.py index 5ce10fb4..f2414b25 100644 --- a/apps/commons/permissions.py +++ b/apps/commons/permissions.py @@ -89,13 +89,3 @@ def has_object_permission( self, request: Request, view: GenericViewSet, obj ) -> bool: return self.has_permission(request, view) - - -class OrganizationPermission(permissions.BasePermission): - def has_permission(self, request: Request, view: GenericViewSet, obj=None) -> bool: - if request.user.is_superuser: - return True - grp = view.organization.get_users() - return request.user.groups.contains(grp) - - has_object_permission = has_permission diff --git a/services/crisalid/management/commands/csv_crisalid.py b/services/crisalid/management/commands/csv_crisalid.py index d9958e4c..e36901fa 100644 --- a/services/crisalid/management/commands/csv_crisalid.py +++ b/services/crisalid/management/commands/csv_crisalid.py @@ -112,5 +112,5 @@ def handle(self, **options): output = pathlib.Path(options["output"]) - if command in ("researcher", "all"): + if command in ("all", "researcher"): self.csv_researcher(config, output) diff --git a/services/crisalid/tests/test_views.py b/services/crisalid/tests/test_views.py index d44fa058..a0a703b4 100644 --- a/services/crisalid/tests/test_views.py +++ b/services/crisalid/tests/test_views.py @@ -230,7 +230,11 @@ def test_not_same_organization(self): data={"harvester": "idref", "values": "6666666"}, ) - self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) + self.assertEqual(response.status_code, status.HTTP_200_OK) + results = response.json()["results"] + expected = {} + # not same orga, return empty user + self.assertEqual(results, expected) def test_search_found(self): identifier = self.researcher.identifiers.first() @@ -255,4 +259,8 @@ def test_get_list_not_connected(self): reverse("Researcher-list", args=(self.organization.code,)) ) - self.assertEqual(response.status_code, status.HTTP_401_UNAUTHORIZED) + self.assertEqual(response.status_code, status.HTTP_200_OK) + + results = response.json()["results"] + # 2 user in same organizations + self.assertEqual(len(results), 2) diff --git a/services/crisalid/views.py b/services/crisalid/views.py index 5f450fbf..95debbed 100644 --- a/services/crisalid/views.py +++ b/services/crisalid/views.py @@ -15,7 +15,6 @@ from rest_framework import viewsets from rest_framework.decorators import action -from apps.commons.permissions import OrganizationPermission from apps.commons.views import NestedOrganizationViewMixins from services.crisalid import relators from services.crisalid.models import ( @@ -90,7 +89,6 @@ class AbstractDocumentViewSet( """Abstract class to get documents info from documents types""" serializer_class = DocumentSerializer - permission_classes = (OrganizationPermission,) def filter_queryset( self, @@ -298,15 +296,14 @@ class ResearcherViewSet(NestedOrganizationViewMixins, viewsets.ReadOnlyModelView serializer_class = ResearcherSerializer filter_backends = (DjangoFilterBackend,) filterset_fields = ("user_id", "id") - permission_classes = (OrganizationPermission,) def get_queryset(self): - return ( + return self.request.user.get_user_related_queryset( Researcher.objects.filter( - user__isnull=False, user__groups__in=(self.organization.get_users(),) + user__isnull=False, user__groups__organizations__in=(self.organization,) ) .prefetch_related("identifiers") - .select_related("user") + .select_related("user"), ) @action( From e3b19d7c8e6e3a84cc7ef036acffa6d5bc72d9a1 Mon Sep 17 00:00:00 2001 From: rgermain Date: Tue, 9 Dec 2025 15:12:12 +0100 Subject: [PATCH 16/32] feat: add structure --- services/crisalid/admin.py | 111 ++++++++++++++---- services/crisalid/factories.py | 2 + .../management/commands/populate_crisalid.py | 19 ++- ...identifier_harvester_structure_and_more.py | 86 ++++++++++++++ services/crisalid/models.py | 25 ++++ services/crisalid/populates/__init__.py | 4 + services/crisalid/populates/base.py | 8 +- services/crisalid/populates/document.py | 23 ++-- services/crisalid/populates/identifier.py | 41 +++++++ services/crisalid/populates/researcher.py | 49 +++++--- services/crisalid/populates/structure.py | 57 +++++++++ services/crisalid/queries/documents.graphql | 28 ++++- .../crisalid/queries/organisations.graphql | 14 +++ services/crisalid/queries/people.graphql | 27 ++++- services/crisalid/tasks.py | 74 +++++++++++- .../tests/fixtures/structures.graphql.json | 15 +++ services/crisalid/tests/test_populate.py | 83 ++++++++++--- services/crisalid/tests/test_tasks.py | 23 ++-- 18 files changed, 600 insertions(+), 89 deletions(-) create mode 100644 services/crisalid/migrations/0003_alter_identifier_harvester_structure_and_more.py create mode 100644 services/crisalid/populates/identifier.py create mode 100644 services/crisalid/populates/structure.py create mode 100644 services/crisalid/queries/organisations.graphql create mode 100644 services/crisalid/tests/fixtures/structures.graphql.json diff --git a/services/crisalid/admin.py b/services/crisalid/admin.py index 273ccb4b..cd36937c 100644 --- a/services/crisalid/admin.py +++ b/services/crisalid/admin.py @@ -1,9 +1,13 @@ from contextlib import suppress +from typing import Any from django.contrib import admin, messages from django.db.models import Count +from django.db.models.query import QuerySet +from django.http.request import HttpRequest -from apps.accounts.models import ProjectUser +from apps.accounts.models import PeopleGroup, ProjectUser +from services.crisalid.manager import CrisalidQuerySet from services.crisalid.tasks import vectorize_documents from .models import ( @@ -12,9 +16,21 @@ DocumentContributor, Identifier, Researcher, + Structure, ) +class IdentifierAminMixins: + @admin.display(description="identifiers count", ordering="identifiers_count") + def get_identifiers(self, instance): + # list all harvester name from this profile + result = [o.harvester for o in instance.identifiers.all()] + if not result: + return None + + return f"{', '.join(result)} ({len(result)})" + + @admin.register(Identifier) class IdentifierAdmin(admin.ModelAdmin): list_display = ("harvester", "value", "get_researcher", "get_documents") @@ -44,7 +60,7 @@ class DocumentContributorAdminInline(admin.StackedInline): @admin.register(Document) -class DocumentAdmin(admin.ModelAdmin): +class DocumentAdmin(IdentifierAminMixins, admin.ModelAdmin): list_display = ( "title", "publication_date", @@ -88,22 +104,16 @@ def get_queryset(self, request): def get_contributors(self, instance): return instance.contributors.count() - @admin.display(description="identifiers count", ordering="identifiers_count") - def get_identifiers(self, instance): - # list all harvester name from this profile - result = [o.harvester for o in instance.identifiers.all()] - if not result: - return None - return f"{', '.join(result)} ({len(result)})" - @admin.register(Researcher) -class ResearcherAdmin(admin.ModelAdmin): +class ResearcherAdmin(IdentifierAminMixins, admin.ModelAdmin): list_display = ( "given_name", "family_name", "user", "get_documents", + "get_memberships", + "get_employments", "get_identifiers", ) search_fields = ( @@ -123,6 +133,8 @@ def get_queryset(self, request): .prefetch_related("identifiers", "documents") .annotate(identifiers_count=Count("identifiers__id")) .annotate(documents_count=Count("documents__id", distinct=True)) + .annotate(memberships_count=Count("memberships__id", distinct=True)) + .annotate(employments_count=Count("employments__id", distinct=True)) ) @admin.action(description="assign researcher on projects") @@ -137,17 +149,18 @@ def assign_user(self, request, queryset): continue for identifier in research.identifiers.all(): - if identifier.harvester != Identifier.Harvester.EPPN.value: + if identifier.harvester != Identifier.Harvester.LOCAL.value: continue user = None + email = identifier.value + "@test.fr" with suppress(ProjectUser.DoesNotExist): - user = ProjectUser.objects.get(email=identifier.value) + user = ProjectUser.objects.get(email=email) if not user: created += 1 user = ProjectUser( - email=identifier.value, + email=email, given_name=research.given_name, family_name=research.family_name, ) @@ -176,14 +189,70 @@ def assign_user(self, request, queryset): def get_documents(self, instance): return instance.documents_count - @admin.display(description="identifiers count", ordering="identifiers_count") - def get_identifiers(self, instance): - # list all harvester name from this profile - result = [o.harvester for o in instance.identifiers.all()] - if not result: - return None + @admin.display(description="number of memberships", ordering="-memberships_count") + def get_memberships(self, instance): + return instance.memberships_count - return f"{', '.join(result)} ({len(result)})" + @admin.display(description="number of employments", ordering="-employments_count") + def get_employments(self, instance): + return instance.employments_count + + +@admin.register(Structure) +class StructureAdmin(IdentifierAminMixins, admin.ModelAdmin): + list_display = ( + "acronym", + "name", + "organization", + "get_memberships", + "get_employments", + "get_identifiers", + ) + search_fields = ("acronym", "name", "organization__code") + autocomplete_fields = ("organization",) + actions = ("assign_group",) + + def get_queryset(self, request: HttpRequest) -> QuerySet[Any]: + return ( + super() + .get_queryset(request) + .select_related("organization") + .annotate( + memberships_count=Count("memberships__pk", distinct=True), + employments_count=Count("employments__pk", distinct=True), + ) + ) + + @admin.action(description="create/update groups") + def assign_group(self, request, queryset: CrisalidQuerySet): + for structure in queryset: + name = structure.name or structure.acronym + if not name: + continue + + parent = PeopleGroup.update_or_create_root(structure.organization) + group = PeopleGroup.objects.filter( + parent=parent, name=name, organization=structure.organization + ).first() + if not group: + group = PeopleGroup( + name=name, parent=parent, organization=structure.organization + ) + + group.save() + member_group = group.get_members() + for membership in structure.memberships.select_related("user").filter( + user__isnull=False + ): + membership.user.groups.add(member_group) + + @admin.display(description="number of memberships", ordering="-memberships_count") + def get_memberships(self, instance): + return instance.memberships_count + + @admin.display(description="number of employments", ordering="-employments_count") + def get_employments(self, instance): + return instance.employments_count @admin.register(CrisalidConfig) diff --git a/services/crisalid/factories.py b/services/crisalid/factories.py index 44435c20..bba115b8 100644 --- a/services/crisalid/factories.py +++ b/services/crisalid/factories.py @@ -36,6 +36,8 @@ def value(self): Identifier.Harvester.EPPN: faker.unique.email(), Identifier.Harvester.DOI: faker.unique.doi(), Identifier.Harvester.PMID: faker.unique.url(), + Identifier.Harvester.NNS: faker.unique.uuid4(), + Identifier.Harvester.RNSR: faker.unique.uuid4(), }[self.harvester] diff --git a/services/crisalid/management/commands/populate_crisalid.py b/services/crisalid/management/commands/populate_crisalid.py index 8350182d..8770fdb6 100644 --- a/services/crisalid/management/commands/populate_crisalid.py +++ b/services/crisalid/management/commands/populate_crisalid.py @@ -10,7 +10,11 @@ Identifier, Researcher, ) -from services.crisalid.populates import PopulateDocument, PopulateResearcher +from services.crisalid.populates import ( + PopulateDocument, + PopulateResearcher, + PopulateStructure, +) from services.crisalid.populates.base import AbstractPopulate from services.crisalid.utils.time import timeit from services.mistral.models import DocumentEmbedding @@ -23,13 +27,13 @@ def add_arguments(self, parser): parser.add_argument( "organization", choices=CrisalidConfig.objects.filter( - organization__code__isnull=False + organization__code__isnull=False, active=True ).values_list("organization__code", flat=True), help="organization code", ) parser.add_argument( "command", - choices=("document", "researcher", "all"), + choices=("document", "researcher", "structure", "all"), help="elements to populate", ) parser.add_argument( @@ -111,3 +115,12 @@ def handle(self, **options): where={"external_EQ": False}, **options, ) + + if command in ("all", "structure"): + populate = PopulateStructure(config) + self.populate_crisalid( + service, + populate, + query="organisations", + **options, + ) diff --git a/services/crisalid/migrations/0003_alter_identifier_harvester_structure_and_more.py b/services/crisalid/migrations/0003_alter_identifier_harvester_structure_and_more.py new file mode 100644 index 00000000..7ac0cee2 --- /dev/null +++ b/services/crisalid/migrations/0003_alter_identifier_harvester_structure_and_more.py @@ -0,0 +1,86 @@ +# Generated by Django 4.2.25 on 2025-12-09 09:41 + +import apps.commons.mixins +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ("organizations", "0003_initial"), + ("crisalid", "0002_crisalidconfig_and_more"), + ] + + operations = [ + migrations.AlterField( + model_name="identifier", + name="harvester", + field=models.CharField( + choices=[ + ("hal", "Hal"), + ("scanr", "Scanr"), + ("openalex", "Openalex"), + ("idref", "Idref"), + ("scopus", "Scopus"), + ("orcid", "Orcid"), + ("local", "Local"), + ("eppn", "Eppn"), + ("doi", "Doi"), + ("pmid", "Pmid"), + ("nns", "Nns"), + ("rnsr", "Rnsr"), + ], + max_length=50, + ), + ), + migrations.CreateModel( + name="Structure", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("updated", models.DateTimeField(auto_created=True, auto_now=True)), + ("acronym", models.TextField(blank=True, null=True)), + ("name", models.TextField()), + ( + "identifiers", + models.ManyToManyField( + related_name="structures", to="crisalid.identifier" + ), + ), + ( + "organization", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="structures", + to="organizations.organization", + ), + ), + ], + options={ + "abstract": False, + }, + bases=(apps.commons.mixins.OrganizationRelated, models.Model), + ), + migrations.AddField( + model_name="researcher", + name="employments", + field=models.ManyToManyField( + related_name="employments", to="crisalid.structure" + ), + ), + migrations.AddField( + model_name="researcher", + name="memberships", + field=models.ManyToManyField( + related_name="memberships", to="crisalid.structure" + ), + ), + ] diff --git a/services/crisalid/models.py b/services/crisalid/models.py index 3c9d96ff..b08fa6d7 100644 --- a/services/crisalid/models.py +++ b/services/crisalid/models.py @@ -57,6 +57,8 @@ class Harvester(models.TextChoices): EPPN = "eppn" DOI = "doi" PMID = "pmid" + NNS = "nns" + RNSR = "rnsr" harvester = models.CharField(max_length=50, choices=Harvester.choices) value = models.CharField(max_length=255) @@ -93,6 +95,12 @@ class Researcher(CrisalidDataModel): ) objects = CrisalidQuerySet.as_manager() + memberships = models.ManyToManyField( + "crisalid.Structure", related_name="memberships" + ) + employments = models.ManyToManyField( + "crisalid.Structure", related_name="employments" + ) def __str__(self): if hasattr(self, "user") and self.user is not None: @@ -272,6 +280,23 @@ def values(cls) -> Generator[tuple[str]]: yield v +class Structure(OrganizationRelated, CrisalidDataModel): + acronym = models.TextField(null=True, blank=True) + name = models.TextField() + identifiers = models.ManyToManyField( + "crisalid.Identifier", related_name="structures" + ) + organization = models.ForeignKey( + "organizations.Organization", + on_delete=models.CASCADE, + related_name="structures", + ) + objects = CrisalidQuerySet.as_manager() + + def __str__(self): + return self.name + + class CrisalidConfig(OrganizationRelated, models.Model): """model for crisalid config with host/pass for connected to crisalid, is linked to a one organization diff --git a/services/crisalid/populates/__init__.py b/services/crisalid/populates/__init__.py index b8986cd3..8302bbf8 100644 --- a/services/crisalid/populates/__init__.py +++ b/services/crisalid/populates/__init__.py @@ -1,9 +1,13 @@ from .caches import LiveCache from .document import PopulateDocument +from .identifier import PopulateIdentifier from .researcher import PopulateResearcher +from .structure import PopulateStructure __all__ = ( "PopulateResearcher", "PopulateDocument", + "PopulateStructure", + "PopulateIdentifier", "LiveCache", ) diff --git a/services/crisalid/populates/base.py b/services/crisalid/populates/base.py index 5c597589..cc92ba42 100644 --- a/services/crisalid/populates/base.py +++ b/services/crisalid/populates/base.py @@ -19,6 +19,12 @@ def __init__(self, config: CrisalidConfig, cache: TCACHE = None): self.config = config self.cache = cache or LiveCache() + def sanitize_string(self, value) -> str: + """strip value and convert it to string""" + if not value: + return "" + return str(value).strip() + def sanitize_languages(self, values: list[dict[str, str]]) -> str: """convert languages choices from crisalid fields crisalid return a list of objects with "language" and "value" assosiated from the language @@ -29,7 +35,7 @@ def sanitize_languages(self, values: list[dict[str, str]]) -> str: maps_languages = {} for value in values: - maps_languages[value["language"]] = (value["value"] or "").strip() + maps_languages[value["language"]] = self.sanitize_string(value["value"]) return ( maps_languages.get("en") diff --git a/services/crisalid/populates/document.py b/services/crisalid/populates/document.py index ab5025b4..0069ddef 100644 --- a/services/crisalid/populates/document.py +++ b/services/crisalid/populates/document.py @@ -1,10 +1,6 @@ from services.crisalid import relators -from services.crisalid.models import ( - CrisalidConfig, - Document, - DocumentContributor, - Identifier, -) +from services.crisalid.models import CrisalidConfig, Document, DocumentContributor +from services.crisalid.populates.identifier import PopulateIdentifier from .base import AbstractPopulate from .logger import logger @@ -14,7 +10,10 @@ class PopulateDocument(AbstractPopulate): def __init__(self, config: CrisalidConfig, cache=None): super().__init__(config, cache) - self.populate_researcher = PopulateResearcher(self.config, self.cache) + self.populate_identifiers = PopulateIdentifier(self.config, self.cache) + self.populate_researcher = PopulateResearcher( + self.config, self.cache, populate_identifiers=self.populate_identifiers + ) def sanitize_document_type(self, data: str | None): """Check documentType , and return unknow value if is not set in enum""" @@ -37,15 +36,7 @@ def sanitize_roles(self, data: list[str]) -> list[str]: def single(self, data: dict) -> Document | None: """this method create/update only on document from crisalid""" # identifiers (hal, openalex, idref ...ect) - documents_identifiers = [] - for recorded in data["recorded_by"]: - identifier = self.cache.model( - Identifier, - value=recorded["uid"], - harvester=recorded["harvester"].lower(), - ) - self.cache.save(identifier) - documents_identifiers.append(identifier) + documents_identifiers = self.populate_identifiers.multiple(data["recorded_by"]) # no identifiers for this documents, we ignore it if not documents_identifiers: diff --git a/services/crisalid/populates/identifier.py b/services/crisalid/populates/identifier.py new file mode 100644 index 00000000..97fbf84b --- /dev/null +++ b/services/crisalid/populates/identifier.py @@ -0,0 +1,41 @@ +from services.crisalid.models import Identifier + +from .base import AbstractPopulate + + +class PopulateIdentifier(AbstractPopulate): + """Populate class for identifiers element + + ex: + { + "type": "RNSR", + "value": "200612823S" + } + """ + + def sanitize_harvester(self, harvester: str) -> str: + # harvester can be "orcid_id" or "orcid" + if harvester == "orcid_id": + return Identifier.Harvester.ORCID + + if harvester not in Identifier.Harvester: + return None + + return harvester + + def single(self, data: dict) -> Identifier | None: + harvester = self.sanitize_harvester( + self.sanitize_string(data["harvester"]).lower() + ) + value = self.sanitize_string(data["value"]) + + if not all((harvester, value)): + return None + + identifier = self.cache.model( + Identifier, + value=value, + harvester=harvester, + ) + self.cache.save(identifier) + return identifier diff --git a/services/crisalid/populates/researcher.py b/services/crisalid/populates/researcher.py index a6245a80..1f90f5a4 100644 --- a/services/crisalid/populates/researcher.py +++ b/services/crisalid/populates/researcher.py @@ -1,10 +1,21 @@ from apps.accounts.models import PrivacySettings, ProjectUser from services.crisalid.models import Identifier, Researcher +from services.crisalid.populates.identifier import PopulateIdentifier +from services.crisalid.populates.structure import PopulateStructure from .base import AbstractPopulate class PopulateResearcher(AbstractPopulate): + def __init__(self, *ar, populate_identifiers=None, populate_structures=None, **kw): + super().__init__(*ar, **kw) + self.populate_identifiers = populate_identifiers or PopulateIdentifier( + self.config, self.cache + ) + self.populate_structures = populate_structures or PopulateStructure( + self.config, self.cache, populate_identifiers=self.populate_identifiers + ) + def get_names(self, data): given_name = family_name = "" @@ -45,29 +56,26 @@ def update_user(self, user: ProjectUser) -> ProjectUser: return user def check_mapping_user( - self, researcher: Researcher, data: dict + self, + researcher: Researcher, + identifiers: list[Identifier], + given_name: str, + family_name: str, ) -> ProjectUser | None: """match user from researcher (need eppn)""" if researcher.user: return self.update_user(researcher.user) - for iden in data["identifiers"]: - if iden["type"].lower() != Identifier.Harvester.EPPN.value: + for iden in identifiers: + if iden.harvester != Identifier.Harvester.EPPN: continue - given_name, family_name = self.get_names(data) - return self.create_user(iden["value"], given_name, family_name) + return self.create_user(iden.value, given_name, family_name) return None def single(self, data: dict) -> Researcher | None: - researcher_identifiers = [] - for iden in data["identifiers"]: - identifier = self.cache.model( - Identifier, value=iden["value"], harvester=iden["type"].lower() - ) - self.cache.save(identifier) - researcher_identifiers.append(identifier) + researcher_identifiers = self.populate_identifiers.multiple(data["identifiers"]) # researcher withtout any identifiers no neeeeeeed to be created if not researcher_identifiers: @@ -85,11 +93,24 @@ def single(self, data: dict) -> Researcher | None: ) given_name, family_name = self.get_names(data) - user = self.check_mapping_user(researcher, data) + user = self.check_mapping_user( + researcher, researcher_identifiers, given_name, family_name + ) self.cache.save( researcher, given_name=given_name, family_name=family_name, user=user ) - self.cache.save_m2m(researcher, identifiers=researcher_identifiers) + + m2m = {"identifiers": researcher_identifiers} + + memberships = data.get("memberships") + if memberships: + m2m["memberships"] = self.populate_structures.multiple(memberships) + + employments = data.get("employments") + if employments: + m2m["employments"] = self.populate_structures.multiple(employments) + + self.cache.save_m2m(researcher, **m2m) return researcher diff --git a/services/crisalid/populates/structure.py b/services/crisalid/populates/structure.py new file mode 100644 index 00000000..8681535c --- /dev/null +++ b/services/crisalid/populates/structure.py @@ -0,0 +1,57 @@ +from services.crisalid.models import Structure +from services.crisalid.populates.identifier import PopulateIdentifier + +from .base import AbstractPopulate + + +class PopulateStructure(AbstractPopulate): + """Populate class for structure element + + ex: + { + "acronym": "CES", + "types": [ + "Organisation", + "ResearchStructure" + ], + "names": [ + { + "language": "fr", + "value": "UMR 8174 - CES" + } + ], + "identifiers": [ + { + "type": "RNSR", + "value": "200612823S" + }, + { + "type": "local", + "value": "U02C" + } + ] + } + """ + + def __init__(self, *ar, populate_identifiers=None, **kw): + super().__init__(*ar, **kw) + self.populate_identifiers = populate_identifiers or PopulateIdentifier( + self.config, self.cache + ) + + def single(self, data: dict) -> Structure | None: + acronym = self.sanitize_string(data["acronym"]) + name = self.sanitize_languages(data["names"]) + identifiers = self.populate_identifiers.multiple(data["identifiers"]) + + # no create structure if no identifiers are set + if not identifiers: + return None + + structure = self.cache.from_identifiers(Structure, identifiers) + self.cache.save( + structure, acronym=acronym, name=name, organization=self.config.organization + ) + self.cache.save_m2m(structure, identifiers=identifiers) + + return structure diff --git a/services/crisalid/queries/documents.graphql b/services/crisalid/queries/documents.graphql index 8d587082..e4a936fd 100644 --- a/services/crisalid/queries/documents.graphql +++ b/services/crisalid/queries/documents.graphql @@ -1,6 +1,5 @@ query PopulateFromCrisalid($limit: Int, $offset: Int, $where: DocumentWhere) { documents(limit: $limit, offset: $offset, where: $where) { - uid, publication_date, document_type, @@ -17,7 +16,6 @@ query PopulateFromCrisalid($limit: Int, $offset: Int, $where: DocumentWhere) { has_contributions { roles, contributor { - uid display_name, names { first_names { @@ -30,15 +28,37 @@ query PopulateFromCrisalid($limit: Int, $offset: Int, $where: DocumentWhere) { } } identifiers { - type + harvester: type value } + employments { + acronym + names { + language + value + } + identifiers { + harvester: type + value + } + } + memberships { + acronym + names { + language + value + } + identifiers { + harvester: type + value + } + } } } recorded_by { harvester - uid, + value: uid, } } } \ No newline at end of file diff --git a/services/crisalid/queries/organisations.graphql b/services/crisalid/queries/organisations.graphql new file mode 100644 index 00000000..85d2fc54 --- /dev/null +++ b/services/crisalid/queries/organisations.graphql @@ -0,0 +1,14 @@ +# this query for organisations ( structure / labo ) +query PopulateFromCrisalid($limit: Int, $offset: Int, $where: OrganisationWhere) { + organisations(limit: $limit, offset: $offset, where: $where) { + acronym + names { + language + value + } + identifiers { + harvester: type + value + } + } +} diff --git a/services/crisalid/queries/people.graphql b/services/crisalid/queries/people.graphql index 2f17f160..ff304917 100644 --- a/services/crisalid/queries/people.graphql +++ b/services/crisalid/queries/people.graphql @@ -1,6 +1,5 @@ query PopulateFromCrisalid($limit: Int, $offset: Int, $where: PersonWhere) { people(limit: $limit, offset: $offset, where: $where) { - uid display_name names { first_names { @@ -13,8 +12,32 @@ query PopulateFromCrisalid($limit: Int, $offset: Int, $where: PersonWhere) { } } identifiers { - type + harvester: type value } + + employments { + acronym + names { + language + value + } + identifiers { + harvester: type + value + } + } + + memberships { + acronym + names { + language + value + } + identifiers { + harvester: type + value + } + } } } diff --git a/services/crisalid/tasks.py b/services/crisalid/tasks.py index b3d7226a..8bc2045e 100644 --- a/services/crisalid/tasks.py +++ b/services/crisalid/tasks.py @@ -4,8 +4,15 @@ from services.crisalid.bus.constant import CrisalidEventEnum, CrisalidTypeEnum from services.crisalid.bus.consumer import on_event from services.crisalid.interface import CrisalidService -from services.crisalid.models import CrisalidConfig, Document, Identifier, Researcher +from services.crisalid.models import ( + CrisalidConfig, + Document, + Identifier, + Researcher, + Structure, +) from services.crisalid.populates import PopulateDocument, PopulateResearcher +from services.crisalid.populates.structure import PopulateStructure logger = logging.getLogger(__name__) @@ -16,6 +23,8 @@ def get_crisalid_config(crisalid_config_id: int) -> CrisalidConfig: ) +# TODO(remi): convert fields to graphql request + # https://github.com/CRISalid-esr/crisalid-ikg/blob/dev-main/app/amqp/amqp_person_event_message_factory.py#L28 # https://github.com/CRISalid-esr/crisalid-ikg/blob/dev-main/app/amqp/amqp_document_event_message_factory.py#L37 @@ -27,8 +36,18 @@ def create_researcher(crisalid_config_id: int, fields: dict): config = get_crisalid_config(crisalid_config_id) logger.error("receive %s for organization %s", fields, config.organization) + service = CrisalidService(config) + + # fetch data from apollo + data = service.query("people", offset=0, limit=1, where={"uid_EQ": fields["uid"]})[ + "people" + ] + if not data: + logger.warning("no result fetching crisalid_uid=%s", fields["uid"]) + return + populate = PopulateResearcher(config) - populate.single(fields) + populate.single(data[0]) @on_event(CrisalidTypeEnum.PERSON, CrisalidEventEnum.DELETED) @@ -51,6 +70,54 @@ def delete_researcher(crisalid_config_id: int, fields: dict): logger.info("deleted = %s", deleted) +# ---- +# Documents task (publications/conference ....) +# ---- +@on_event(CrisalidTypeEnum.STRUCTURE, CrisalidEventEnum.CREATED) +@on_event(CrisalidTypeEnum.STRUCTURE, CrisalidEventEnum.UPDATED) +@app.task(name=f"{__name__}.create_structure") +def create_structure(crisalid_config_id: int, fields: dict): + config = get_crisalid_config(crisalid_config_id) + logger.error("receive %s for organization %s", fields, config.organization) + + service = CrisalidService(config) + + # fetch data from apollo + data = service.query( + "organisations", offset=0, limit=1, where={"uid_EQ": fields["uid"]} + )["organisations"] + if not data: + logger.warning("no result fetching crisalid_uid=%s", fields["uid"]) + return + + populate = PopulateStructure(config) + populate.single(data[0]) + + +@on_event(CrisalidTypeEnum.STRUCTURE, CrisalidEventEnum.DELETED) +@app.task(name=f"{__name__}.delete_structure") +def delete_structure(crisalid_config_id: int, fields: dict): + config = get_crisalid_config(crisalid_config_id) + logger.error("receive %s for organization %s", fields, config.organization) + + identifiers = [ + {"harvester": iden["type"].lower(), "value": iden["value"]} + for iden in fields["identifiers"] + if iden["type"].lower() + not in (Identifier.Harvester.LOCAL, Identifier.Harvester.EPPN) + ] + + qs = Structure.objects.from_identifiers(identifiers, distinct=False).filter( + organization=config.organization + ) + deleted, _ = qs.delete() + + logger.info("deleted = %s", deleted) + + +# ---- +# Documents task (publications/conference ....) +# ---- @on_event(CrisalidTypeEnum.DOCUMENT, CrisalidEventEnum.CREATED) @on_event(CrisalidTypeEnum.DOCUMENT, CrisalidEventEnum.UPDATED) @app.task(name=f"{__name__}.create_document") @@ -88,6 +155,9 @@ def delete_document(crisalid_config_id: int, fields: dict): logger.info("deleted = %s", deleted) +# ---- +# Vectorize documents for similarity +# ---- @app.task(name="Vectorize documents") def vectorize_documents(documents_pks: list[int]): for obj in Document.objects.filter(pk__in=documents_pks): diff --git a/services/crisalid/tests/fixtures/structures.graphql.json b/services/crisalid/tests/fixtures/structures.graphql.json new file mode 100644 index 00000000..ce40495a --- /dev/null +++ b/services/crisalid/tests/fixtures/structures.graphql.json @@ -0,0 +1,15 @@ +{ + "acronym": "LabEx CAP", + "names": [ + { + "language": "fr", + "value": "CAP" + } + ], + "identifiers": [ + { + "harvester": "local", + "value": "DGI01" + } + ] +} \ No newline at end of file diff --git a/services/crisalid/tests/test_populate.py b/services/crisalid/tests/test_populate.py index 408b3853..1d08e5fb 100644 --- a/services/crisalid/tests/test_populate.py +++ b/services/crisalid/tests/test_populate.py @@ -5,8 +5,9 @@ from apps.accounts.factories import UserFactory from apps.accounts.models import PrivacySettings, ProjectUser from services.crisalid.factories import CrisalidConfigFactory -from services.crisalid.models import Document, Identifier, Researcher +from services.crisalid.models import Document, Identifier, Researcher, Structure from services.crisalid.populates import PopulateDocument, PopulateResearcher +from services.crisalid.populates.structure import PopulateStructure class TestPopulateResearcher(test.TestCase): @@ -26,7 +27,7 @@ def test_create_researcher(self): } ], "identifiers": [ - {"value": "hals-truc", "type": Identifier.Harvester.HAL.value} + {"value": "hals-truc", "harvester": Identifier.Harvester.HAL.value} ], } @@ -70,7 +71,7 @@ def test_no_change_researcher(self): } ], "identifiers": [ - {"value": "hals-truc", "type": Identifier.Harvester.HAL.value} + {"value": "hals-truc", "harvester": Identifier.Harvester.HAL.value} ], } # create same object in db @@ -104,7 +105,7 @@ def test_update_identifiers(self): } ], "identifiers": [ - {"value": "hals-truc", "type": Identifier.Harvester.HAL.value} + {"value": "hals-truc", "harvester": Identifier.Harvester.HAL.value} ], } # create same object in db @@ -115,7 +116,7 @@ def test_update_identifiers(self): researcher.identifiers.add(iden) data["identifiers"].append( - {"value": "000-666-999", "type": Identifier.Harvester.ORCID.value} + {"value": "000-666-999", "harvester": Identifier.Harvester.ORCID.value} ) self.popu.single(data) @@ -139,8 +140,8 @@ def test_create_user_researcher(self): } ], "identifiers": [ - {"value": "hals-truc", "type": Identifier.Harvester.HAL.value}, - {"value": "eppn@lpi.com", "type": Identifier.Harvester.EPPN.value}, + {"value": "hals-truc", "harvester": Identifier.Harvester.HAL.value}, + {"value": "eppn@lpi.com", "harvester": Identifier.Harvester.EPPN.value}, ], } self.popu.single(data) @@ -165,8 +166,8 @@ def test_match_user_researcher(self): } ], "identifiers": [ - {"value": "hals-truc", "type": Identifier.Harvester.HAL.value}, - {"value": "eppn@lpi.com", "type": Identifier.Harvester.EPPN.value}, + {"value": "hals-truc", "harvester": Identifier.Harvester.HAL.value}, + {"value": "eppn@lpi.com", "harvester": Identifier.Harvester.EPPN.value}, ], } # a project user already exists with same eepn @@ -226,9 +227,12 @@ def test_create_publication(self): } ], "identifiers": [ - {"type": "eppn", "value": "marty.mcfly@non-de-zeus.fr"}, - {"type": "idref", "value": "4545454545454"}, - {"type": "local", "value": "v55555"}, + { + "harvester": "eppn", + "value": "marty.mcfly@non-de-zeus.fr", + }, + {"harvester": "idref", "value": "4545454545454"}, + {"harvester": "local", "value": "v55555"}, ], } ], @@ -236,9 +240,8 @@ def test_create_publication(self): ], "recorded_by": [ { - "uid": "hals-truc", "harvester": Identifier.Harvester.HAL.value, - "value": "", + "value": "hals-truc", } ], } @@ -284,9 +287,12 @@ def test_create_document_whitout_identifiers(self): } ], "identifiers": [ - {"type": "eppn", "value": "marty.mcfly@non-de-zeus.fr"}, - {"type": "idref", "value": "4545454545454"}, - {"type": "local", "value": "v55555"}, + { + "harvester": "eppn", + "value": "marty.mcfly@non-de-zeus.fr", + }, + {"harvester": "idref", "value": "4545454545454"}, + {"harvester": "local", "value": "v55555"}, ], } ], @@ -359,3 +365,46 @@ def test_sanitize_document_type(self): ), Document.DocumentType.AUDIOVISUAL_DOCUMENT.value, ) + + +class TestPopulateStructure(test.TestCase): + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.config = CrisalidConfigFactory() + cls.popu = PopulateStructure(cls.config) + + def test_create_structure(self): + data = { + "acronym": "LabEx CAP", + "names": [{"language": "fr", "value": "CAP"}], + "identifiers": [{"harvester": "local", "value": "DGI01"}], + } + + new_obj = self.popu.single(data) + + # check obj from db + obj = Structure.objects.first() + self.assertEqual(obj, new_obj) + + self.assertEqual(obj.acronym, "LabEx CAP") + self.assertEqual(obj.name, "CAP") + self.assertEqual(obj.organization, self.config.organization) + self.assertEqual(obj.identifiers.count(), 1) + iden = obj.identifiers.first() + self.assertEqual(iden.value, "DGI01") + self.assertEqual(iden.harvester, "local") + + def test_create_structure_whitout_identifiers(self): + data = { + "acronym": "LabEx CAP", + "names": [{"language": "fr", "value": "CAP"}], + "identifiers": [], + } + + new_obj = self.popu.single(data) + + # check obj from db + obj = Structure.objects.first() + self.assertIsNone(obj) + self.assertIsNone(new_obj) diff --git a/services/crisalid/tests/test_tasks.py b/services/crisalid/tests/test_tasks.py index a0faf6e3..f7d2ed66 100644 --- a/services/crisalid/tests/test_tasks.py +++ b/services/crisalid/tests/test_tasks.py @@ -95,10 +95,11 @@ def test_delete_research(self): self.assertTrue(Researcher.objects.filter(pk=researcher.pk).exists()) - def test_create_researcher(self): + @patch("services.crisalid.interface.Client") + def test_create_researcher(self, client_gql): # other check/tests in test_views.py - fields = { - "uid": "05-11-1995-uuid", + fields = {"uid": "05-11-1995-uuid"} + data = { "names": [ { "first_names": [{"value": "marty", "language": "fr"}], @@ -106,10 +107,12 @@ def test_create_researcher(self): } ], "identifiers": [ - {"value": "hals-truc", "type": Identifier.Harvester.HAL.value} + {"value": "hals-truc", "harvester": Identifier.Harvester.HAL.value} ], } + client_gql().execute.return_value = {"people": [data]} + create_researcher(self.config.pk, fields) # check obj from db @@ -161,9 +164,12 @@ def test_create_document(self, client_gql): } ], "identifiers": [ - {"type": "eppn", "value": "marty.mcfly@non-de-zeus.fr"}, - {"type": "idref", "value": "4545454545454"}, - {"type": "local", "value": "v55555"}, + { + "harvester": "eppn", + "value": "marty.mcfly@non-de-zeus.fr", + }, + {"harvester": "idref", "value": "4545454545454"}, + {"harvester": "local", "value": "v55555"}, ], } ], @@ -171,9 +177,8 @@ def test_create_document(self, client_gql): ], "recorded_by": [ { - "uid": "hals-truc", "harvester": Identifier.Harvester.HAL.value, - "value": "", + "value": "hals-truc", } ], } From 8e14c01ea851e56e10d9b9fdf1fbb50991af1aa9 Mon Sep 17 00:00:00 2001 From: rgermain Date: Fri, 9 Jan 2026 16:36:25 +0100 Subject: [PATCH 17/32] test: fix merge harvester --- services/crisalid/populates/identifier.py | 4 +--- services/crisalid/tests/test_populate.py | 26 +++++++++++------------ services/crisalid/tests/test_tasks.py | 4 ++-- 3 files changed, 16 insertions(+), 18 deletions(-) diff --git a/services/crisalid/populates/identifier.py b/services/crisalid/populates/identifier.py index 97fbf84b..e18dbe2a 100644 --- a/services/crisalid/populates/identifier.py +++ b/services/crisalid/populates/identifier.py @@ -24,9 +24,7 @@ def sanitize_harvester(self, harvester: str) -> str: return harvester def single(self, data: dict) -> Identifier | None: - harvester = self.sanitize_harvester( - self.sanitize_string(data["harvester"]).lower() - ) + harvester = self.sanitize_harvester(self.sanitize_string(data["type"]).lower()) value = self.sanitize_string(data["value"]) if not all((harvester, value)): diff --git a/services/crisalid/tests/test_populate.py b/services/crisalid/tests/test_populate.py index e2d0da04..bcbc7ba8 100644 --- a/services/crisalid/tests/test_populate.py +++ b/services/crisalid/tests/test_populate.py @@ -27,7 +27,7 @@ def test_create_researcher(self): } ], "identifiers": [ - {"value": "hals-truc", "harvester": Identifier.Harvester.HAL.value} + {"value": "hals-truc", "type": Identifier.Harvester.HAL.value} ], } @@ -71,7 +71,7 @@ def test_no_change_researcher(self): } ], "identifiers": [ - {"value": "hals-truc", "harvester": Identifier.Harvester.HAL.value} + {"value": "hals-truc", "type": Identifier.Harvester.HAL.value} ], } # create same object in db @@ -105,7 +105,7 @@ def test_update_identifiers(self): } ], "identifiers": [ - {"value": "hals-truc", "harvester": Identifier.Harvester.HAL.value} + {"value": "hals-truc", "type": Identifier.Harvester.HAL.value} ], } # create same object in db @@ -116,7 +116,7 @@ def test_update_identifiers(self): researcher.identifiers.add(iden) data["identifiers"].append( - {"value": "000-666-999", "harvester": Identifier.Harvester.ORCID.value} + {"value": "000-666-999", "type": Identifier.Harvester.ORCID.value} ) self.popu.single(data) @@ -140,8 +140,8 @@ def test_create_user_researcher(self): } ], "identifiers": [ - {"value": "hals-truc", "harvester": Identifier.Harvester.HAL.value}, - {"value": "eppn@lpi.com", "harvester": Identifier.Harvester.EPPN.value}, + {"value": "hals-truc", "type": Identifier.Harvester.HAL.value}, + {"value": "eppn@lpi.com", "type": Identifier.Harvester.EPPN.value}, ], } self.popu.single(data) @@ -166,8 +166,8 @@ def test_match_user_researcher(self): } ], "identifiers": [ - {"value": "hals-truc", "harvester": Identifier.Harvester.HAL.value}, - {"value": "eppn@lpi.com", "harvester": Identifier.Harvester.EPPN.value}, + {"value": "hals-truc", "type": Identifier.Harvester.HAL.value}, + {"value": "eppn@lpi.com", "type": Identifier.Harvester.EPPN.value}, ], } # a project user already exists with same eepn @@ -228,11 +228,11 @@ def test_create_publication(self): ], "identifiers": [ { - "harvester": "eppn", + "type": "eppn", "value": "marty.mcfly@non-de-zeus.fr", }, - {"harvester": "idref", "value": "4545454545454"}, - {"harvester": "local", "value": "v55555"}, + {"type": "idref", "value": "4545454545454"}, + {"type": "local", "value": "v55555"}, ], } ], @@ -240,7 +240,7 @@ def test_create_publication(self): ], "recorded_by": [ { - "harvester": Identifier.Harvester.HAL.value, + "type": Identifier.Harvester.HAL.value, "value": "hals-truc", } ], @@ -375,7 +375,7 @@ def test_create_structure(self): data = { "acronym": "LabEx CAP", "names": [{"language": "fr", "value": "CAP"}], - "identifiers": [{"harvester": "local", "value": "DGI01"}], + "identifiers": [{"type": "local", "value": "DGI01"}], } new_obj = self.popu.single(data) diff --git a/services/crisalid/tests/test_tasks.py b/services/crisalid/tests/test_tasks.py index 34004d70..c30dbb2b 100644 --- a/services/crisalid/tests/test_tasks.py +++ b/services/crisalid/tests/test_tasks.py @@ -174,8 +174,8 @@ def test_create_document(self, client_gql): ], "recorded_by": [ { - "harvester": Identifier.Harvester.HAL.value, - "value": "", + "type": Identifier.Harvester.HAL.value, + "value": "hals-truc", } ], } From e8a449c644231d542687ef9baee8ab451acc4fe2 Mon Sep 17 00:00:00 2001 From: rgermain Date: Tue, 13 Jan 2026 17:37:52 +0100 Subject: [PATCH 18/32] feat: add modules groups --- apps/accounts/models.py | 2 ++ apps/accounts/serializers.py | 17 +++++++++-- apps/accounts/views.py | 50 +++++-------------------------- apps/commons/mixins.py | 9 ++++++ apps/modules/__init__.py | 3 ++ apps/modules/base.py | 49 +++++++++++++++++++++++++++++++ apps/modules/group.py | 57 ++++++++++++++++++++++++++++++++++++ 7 files changed, 143 insertions(+), 44 deletions(-) create mode 100644 apps/modules/__init__.py create mode 100644 apps/modules/base.py create mode 100644 apps/modules/group.py diff --git a/apps/accounts/models.py b/apps/accounts/models.py index 0fbb1fd4..0fba032d 100644 --- a/apps/accounts/models.py +++ b/apps/accounts/models.py @@ -25,6 +25,7 @@ ) from apps.commons.enums import SDG, Language from apps.commons.mixins import ( + HasModulesRelated, HasMultipleIDs, HasOwner, HasPermissionsSetup, @@ -41,6 +42,7 @@ class PeopleGroup( + HasModulesRelated, HasAutoTranslatedFields, HasMultipleIDs, HasPermissionsSetup, diff --git a/apps/accounts/serializers.py b/apps/accounts/serializers.py index 41e79e68..c4abccb4 100644 --- a/apps/accounts/serializers.py +++ b/apps/accounts/serializers.py @@ -237,6 +237,16 @@ class Meta: fields = read_only_fields +class ModulesSerializers(serializers.ModelSerializer): + modules = serializers.SerializerMethodField() + + def get_modules(self, instance): + request = self.context.get("request") + + cls = instance.get_related_module() + return cls(instance, user=request.user).count() + + class PeopleGroupLightSerializer( AutoTranslatedModelSerializer, serializers.ModelSerializer ): @@ -402,7 +412,10 @@ def create(self, validated_data): class PeopleGroupSerializer( - StringsImagesSerializer, AutoTranslatedModelSerializer, serializers.ModelSerializer + ModulesSerializers, + StringsImagesSerializer, + AutoTranslatedModelSerializer, + serializers.ModelSerializer, ): string_images_forbid_fields: List[str] = [ @@ -527,7 +540,7 @@ def save(self, **kwargs): class Meta: model = PeopleGroup - read_only_fields = ["is_root", "slug"] + read_only_fields = ["is_root", "slug", "modules"] fields = read_only_fields + [ "id", "name", diff --git a/apps/accounts/views.py b/apps/accounts/views.py index f537d027..0e89a99c 100644 --- a/apps/accounts/views.py +++ b/apps/accounts/views.py @@ -40,7 +40,6 @@ from apps.files.views import ImageStorageView from apps.organizations.models import Organization from apps.organizations.permissions import HasOrganizationPermission -from apps.projects.models import Project from apps.projects.serializers import ProjectLightSerializer from apps.skills.models import Skill from services.google.models import GoogleAccount, GoogleGroup @@ -682,27 +681,10 @@ def remove_member(self, request, *args, **kwargs): ) def member(self, request, *args, **kwargs): group = self.get_object() - managers_ids = group.managers.all().values_list("id", flat=True) - leaders_ids = group.leaders.all().values_list("id", flat=True) - skills_prefetch = Prefetch( - "skills", queryset=Skill.objects.select_related("tag") - ) - queryset = ( - group.get_all_members() - .distinct() - .annotate( - is_leader=Case( - When(id__in=leaders_ids, then=True), default=Value(False) - ) - ) - .annotate( - is_manager=Case( - When(id__in=managers_ids, then=True), default=Value(False) - ) - ) - .order_by("-is_leader", "-is_manager") - .prefetch_related(skills_prefetch, "groups") - ) + + cls = group.get_related_module() + module = cls(group, request.user) + queryset = module.members() page = self.paginate_queryset(queryset) if page is not None: @@ -790,26 +772,10 @@ def remove_featured_project(self, request, *args, **kwargs): ) def project(self, request, *args, **kwargs): group = self.get_object() - group_projects_ids = ( - Project.objects.filter(groups__people_groups=group) - .distinct() - .values_list("id", flat=True) - ) - queryset = ( - self.request.user.get_project_queryset() - .filter(Q(groups__people_groups=group) | Q(people_groups=group)) - .annotate( - is_group_project=Case( - When(id__in=group_projects_ids, then=True), default=Value(False) - ), - is_featured=Case( - When(people_groups=group, then=True), default=Value(False) - ), - ) - .distinct() - .order_by("-is_featured", "-is_group_project") - .prefetch_related("categories") - ) + cls = group.get_related_module() + module = cls(group, request.user) + queryset = module.featured_projects() + page = self.paginate_queryset(queryset) if page is not None: project_serializer = ProjectLightSerializer( diff --git a/apps/commons/mixins.py b/apps/commons/mixins.py index 84ef04d5..c31f93bc 100644 --- a/apps/commons/mixins.py +++ b/apps/commons/mixins.py @@ -408,3 +408,12 @@ def get_slug(self) -> str: if self.get_id_field_name(slug) != "slug": slug = f"{self.slug_prefix}-{slug}" return slug + + +class HasModulesRelated: + """Mixins for related modules class""" + + def get_related_module(self): + from apps.modules.base import get_module + + return get_module(type(self)) diff --git a/apps/modules/__init__.py b/apps/modules/__init__.py new file mode 100644 index 00000000..98a20100 --- /dev/null +++ b/apps/modules/__init__.py @@ -0,0 +1,3 @@ +from .group import PeopleGroupModules + +__all__ = ["PeopleGroupModules"] diff --git a/apps/modules/base.py b/apps/modules/base.py new file mode 100644 index 00000000..679efe76 --- /dev/null +++ b/apps/modules/base.py @@ -0,0 +1,49 @@ +import inspect + +from django.db import models + + +class AbstractModules: + """abstract class for modules/queryset declarations""" + + def __init__(self, instance, /, user, **kw): + self.instance = instance + self.user = user + + def count(self): + members = inspect.getmembers( + self, + predicate=inspect.ismethod, + ) + + modules = {} + for name, func in members: + # ignore private_method and "count" method (this method :D) + if name.startswith("_") or name in ("count",): + continue + + # func return queryset + modules[name] = func().count() + + return modules + + +_modules: dict[models.Model] = {} + + +def register_module(model: models.Model): + """decorator to register modules assoiate on models + + :param model: _description_ + """ + + def _wrap(cls): + _modules[model] = cls + return cls + + return _wrap + + +def get_module(model: models.Model): + """get regisered module""" + return _modules[model] diff --git a/apps/modules/group.py b/apps/modules/group.py new file mode 100644 index 00000000..7ad52d6c --- /dev/null +++ b/apps/modules/group.py @@ -0,0 +1,57 @@ +from django.db.models import Case, Prefetch, Q, QuerySet, Value, When + +from apps.accounts.models import PeopleGroup, ProjectUser +from apps.modules.base import AbstractModules, register_module +from apps.projects.models import Project +from apps.skills.models import Skill + + +@register_module(PeopleGroup) +class PeopleGroupModules(AbstractModules): + def members(self) -> QuerySet[ProjectUser]: + managers_ids = self.instance.managers.all().values_list("id", flat=True) + leaders_ids = self.instance.leaders.all().values_list("id", flat=True) + skills_prefetch = Prefetch( + "skills", queryset=Skill.objects.select_related("tag") + ) + return ( + self.instance.get_all_members() + .distinct() + .annotate( + is_leader=Case( + When(id__in=leaders_ids, then=True), default=Value(False) + ) + ) + .annotate( + is_manager=Case( + When(id__in=managers_ids, then=True), default=Value(False) + ) + ) + .order_by("-is_leader", "-is_manager") + .prefetch_related(skills_prefetch, "groups") + ) + + def featured_projects(self) -> QuerySet[Project]: + group_projects_ids = ( + Project.objects.filter(groups__people_groups=self.instance) + .distinct() + .values_list("id", flat=True) + ) + + return ( + self.user.get_project_queryset() + .filter( + Q(groups__people_groups=self.instance) | Q(people_groups=self.instance) + ) + .annotate( + is_group_project=Case( + When(id__in=group_projects_ids, then=True), default=Value(False) + ), + is_featured=Case( + When(people_groups=self.instance, then=True), default=Value(False) + ), + ) + .distinct() + .order_by("-is_featured", "-is_group_project") + .prefetch_related("categories") + ) From f9c489f11e36ecdb3b658023a6a9d84ab27bd6bd Mon Sep 17 00:00:00 2001 From: rgermain Date: Tue, 13 Jan 2026 18:03:28 +0100 Subject: [PATCH 19/32] fix: i18n messages --- locale/ca/LC_MESSAGES/django.po | 4 ++-- locale/de/LC_MESSAGES/django.po | 4 ++-- locale/en/LC_MESSAGES/django.po | 4 ++-- locale/es/LC_MESSAGES/django.po | 4 ++-- locale/et/LC_MESSAGES/django.po | 4 ++-- locale/fr/LC_MESSAGES/django.po | 4 ++-- locale/nl/LC_MESSAGES/django.po | 4 ++-- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/locale/ca/LC_MESSAGES/django.po b/locale/ca/LC_MESSAGES/django.po index 2daf0109..0963381d 100644 --- a/locale/ca/LC_MESSAGES/django.po +++ b/locale/ca/LC_MESSAGES/django.po @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: PACKAGE VERSION\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2025-12-17 16:58+0100\n" +"POT-Creation-Date: 2026-01-13 18:03+0100\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" @@ -110,7 +110,7 @@ msgstr "No pots assignar aquest rol a un usuari" msgid "You cannot assign this role to a user : {role}" msgstr "No pots assignar aquest rol a un usuari: {role}" -#: apps/accounts/models.py:140 apps/projects/models.py:161 +#: apps/accounts/models.py:142 apps/projects/models.py:161 msgid "visibility" msgstr "visibilitat" diff --git a/locale/de/LC_MESSAGES/django.po b/locale/de/LC_MESSAGES/django.po index da18dcdb..726ab799 100644 --- a/locale/de/LC_MESSAGES/django.po +++ b/locale/de/LC_MESSAGES/django.po @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: PACKAGE VERSION\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2025-12-17 16:58+0100\n" +"POT-Creation-Date: 2026-01-13 18:03+0100\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" @@ -112,7 +112,7 @@ msgstr "Sie können diese Rolle keinem Benutzer zuweisen" msgid "You cannot assign this role to a user : {role}" msgstr "Sie können diese Rolle keinem Benutzer zuweisen: {role}" -#: apps/accounts/models.py:140 apps/projects/models.py:161 +#: apps/accounts/models.py:142 apps/projects/models.py:161 msgid "visibility" msgstr "Sichtbarkeit" diff --git a/locale/en/LC_MESSAGES/django.po b/locale/en/LC_MESSAGES/django.po index dc59aeda..bc643b3d 100644 --- a/locale/en/LC_MESSAGES/django.po +++ b/locale/en/LC_MESSAGES/django.po @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: PACKAGE VERSION\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2025-12-17 16:58+0100\n" +"POT-Creation-Date: 2026-01-13 18:03+0100\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" @@ -108,7 +108,7 @@ msgstr "" msgid "You cannot assign this role to a user : {role}" msgstr "" -#: apps/accounts/models.py:140 apps/projects/models.py:161 +#: apps/accounts/models.py:142 apps/projects/models.py:161 msgid "visibility" msgstr "" diff --git a/locale/es/LC_MESSAGES/django.po b/locale/es/LC_MESSAGES/django.po index ddb05f29..e1390992 100644 --- a/locale/es/LC_MESSAGES/django.po +++ b/locale/es/LC_MESSAGES/django.po @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: PACKAGE VERSION\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2025-12-17 16:58+0100\n" +"POT-Creation-Date: 2026-01-13 18:03+0100\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" @@ -110,7 +110,7 @@ msgstr "No puedes asignar este rol a un usuario" msgid "You cannot assign this role to a user : {role}" msgstr "No puedes asignar este rol a un usuario: {role}" -#: apps/accounts/models.py:140 apps/projects/models.py:161 +#: apps/accounts/models.py:142 apps/projects/models.py:161 msgid "visibility" msgstr "visibilidad" diff --git a/locale/et/LC_MESSAGES/django.po b/locale/et/LC_MESSAGES/django.po index 29bd2ce7..f5924a85 100644 --- a/locale/et/LC_MESSAGES/django.po +++ b/locale/et/LC_MESSAGES/django.po @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: PACKAGE VERSION\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2025-12-17 16:58+0100\n" +"POT-Creation-Date: 2026-01-13 18:03+0100\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" @@ -110,7 +110,7 @@ msgstr "Sa ei saa seda rolli kasutajale määrata" msgid "You cannot assign this role to a user : {role}" msgstr "Sa ei saa seda rolli kasutajale määrata: {role}" -#: apps/accounts/models.py:140 apps/projects/models.py:161 +#: apps/accounts/models.py:142 apps/projects/models.py:161 msgid "visibility" msgstr "nähtavus" diff --git a/locale/fr/LC_MESSAGES/django.po b/locale/fr/LC_MESSAGES/django.po index cd3ed07a..a39d7ed2 100644 --- a/locale/fr/LC_MESSAGES/django.po +++ b/locale/fr/LC_MESSAGES/django.po @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: PACKAGE VERSION\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2025-12-17 16:58+0100\n" +"POT-Creation-Date: 2026-01-13 18:03+0100\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" @@ -112,7 +112,7 @@ msgstr "Vous ne pouvez pas assigner ce rôle à un·e utilisateur·ice" msgid "You cannot assign this role to a user : {role}" msgstr "Vous ne pouvez pas assigner ce rôle à un·e utilisateur·ice : {role}" -#: apps/accounts/models.py:140 apps/projects/models.py:161 +#: apps/accounts/models.py:142 apps/projects/models.py:161 msgid "visibility" msgstr "visibilité" diff --git a/locale/nl/LC_MESSAGES/django.po b/locale/nl/LC_MESSAGES/django.po index 886b70c0..5287acd7 100644 --- a/locale/nl/LC_MESSAGES/django.po +++ b/locale/nl/LC_MESSAGES/django.po @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: PACKAGE VERSION\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2025-12-17 16:58+0100\n" +"POT-Creation-Date: 2026-01-13 18:03+0100\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" @@ -112,7 +112,7 @@ msgstr "Je kunt deze rol niet toewijzen aan een gebruiker" msgid "You cannot assign this role to a user : {role}" msgstr "Je kunt deze rol niet toewijzen aan een gebruiker: {role}" -#: apps/accounts/models.py:140 apps/projects/models.py:161 +#: apps/accounts/models.py:142 apps/projects/models.py:161 msgid "visibility" msgstr "zichtbaarheid" From be727aaa8d8cd8dd94928cb86e63aafaa5dbae9d Mon Sep 17 00:00:00 2001 From: rgermain Date: Thu, 15 Jan 2026 09:34:08 +0100 Subject: [PATCH 20/32] fix: serializers import --- apps/accounts/serializers.py | 13 ++----------- apps/accounts/views.py | 12 ++++++------ apps/commons/serializers.py | 12 ++++++++++++ 3 files changed, 20 insertions(+), 17 deletions(-) diff --git a/apps/accounts/serializers.py b/apps/accounts/serializers.py index c4abccb4..6962a24f 100644 --- a/apps/accounts/serializers.py +++ b/apps/accounts/serializers.py @@ -16,7 +16,7 @@ ) from apps.commons.mixins import HasPermissionsSetup from apps.commons.models import GroupData -from apps.commons.serializers import StringsImagesSerializer +from apps.commons.serializers import ModulesSerializers, StringsImagesSerializer from apps.files.models import Image from apps.files.serializers import ImageSerializer from apps.notifications.models import Notification @@ -237,16 +237,6 @@ class Meta: fields = read_only_fields -class ModulesSerializers(serializers.ModelSerializer): - modules = serializers.SerializerMethodField() - - def get_modules(self, instance): - request = self.context.get("request") - - cls = instance.get_related_module() - return cls(instance, user=request.user).count() - - class PeopleGroupLightSerializer( AutoTranslatedModelSerializer, serializers.ModelSerializer ): @@ -260,6 +250,7 @@ class PeopleGroupLightSerializer( ) organization = serializers.SlugRelatedField(read_only=True, slug_field="code") + # TODO(remi): replace this by modules def get_members_count(self, group: PeopleGroup) -> int: return group.get_all_members().count() diff --git a/apps/accounts/views.py b/apps/accounts/views.py index 0e89a99c..e31b2e22 100644 --- a/apps/accounts/views.py +++ b/apps/accounts/views.py @@ -682,9 +682,9 @@ def remove_member(self, request, *args, **kwargs): def member(self, request, *args, **kwargs): group = self.get_object() - cls = group.get_related_module() - module = cls(group, request.user) - queryset = module.members() + modules_manager = group.get_related_module() + modules = modules_manager(group, request.user) + queryset = modules.members() page = self.paginate_queryset(queryset) if page is not None: @@ -772,9 +772,9 @@ def remove_featured_project(self, request, *args, **kwargs): ) def project(self, request, *args, **kwargs): group = self.get_object() - cls = group.get_related_module() - module = cls(group, request.user) - queryset = module.featured_projects() + modules_manager = group.get_related_module() + modules = modules_manager(group, request.user) + queryset = modules.featured_projects() page = self.paginate_queryset(queryset) if page is not None: diff --git a/apps/commons/serializers.py b/apps/commons/serializers.py index c6679a7a..048fb80a 100644 --- a/apps/commons/serializers.py +++ b/apps/commons/serializers.py @@ -221,3 +221,15 @@ def save(self, **kwargs): return self.instance instance = super().save(**kwargs) return self.add_string_images_to_instance(instance, images) + + +class ModulesSerializers(serializers.ModelSerializer): + """Modules serializers to return how many elements is linked to objects""" + + modules = serializers.SerializerMethodField() + + def get_modules(self, instance): + request = self.context.get("request") + + modules_manager = instance.get_related_module() + return modules_manager(instance, user=request.user).count() From bfc5498190a476fa9fedc5eb7b53a9f83c65e89f Mon Sep 17 00:00:00 2001 From: rgermain Date: Mon, 19 Jan 2026 17:27:16 +0100 Subject: [PATCH 21/32] feat: add publications/conference groups --- apps/commons/views.py | 8 +++ apps/modules/group.py | 28 ++++++++ services/crisalid/models.py | 3 + services/crisalid/serializers.py | 9 ++- services/crisalid/urls.py | 18 +++++ services/crisalid/views.py | 113 ++++++++++++++++++++++++------- 6 files changed, 151 insertions(+), 28 deletions(-) diff --git a/apps/commons/views.py b/apps/commons/views.py index 647ecb42..b463299a 100644 --- a/apps/commons/views.py +++ b/apps/commons/views.py @@ -3,6 +3,7 @@ from rest_framework.response import Response from rest_framework.settings import api_settings +from apps.accounts.models import PeopleGroup from apps.organizations.models import Organization from .mixins import HasMultipleIDs @@ -150,3 +151,10 @@ def initial(self, request, *args, **kwargs): ) super().initial(request, *args, **kwargs) + + +class NestedPeopleGroupViewMixins: + def initial(self, request, *args, **kwargs): + self.people_group = get_object_or_404(PeopleGroup, id=kwargs["people_group_id"]) + + super().initial(request, *args, **kwargs) diff --git a/apps/modules/group.py b/apps/modules/group.py index 7ad52d6c..eb99a5fb 100644 --- a/apps/modules/group.py +++ b/apps/modules/group.py @@ -1,9 +1,12 @@ +from functools import cached_property + from django.db.models import Case, Prefetch, Q, QuerySet, Value, When from apps.accounts.models import PeopleGroup, ProjectUser from apps.modules.base import AbstractModules, register_module from apps.projects.models import Project from apps.skills.models import Skill +from services.crisalid.models import Document, DocumentTypeCentralized @register_module(PeopleGroup) @@ -55,3 +58,28 @@ def featured_projects(self) -> QuerySet[Project]: .order_by("-is_featured", "-is_group_project") .prefetch_related("categories") ) + + @cached_property + def _is_structure(self): + try: + return self.instance.structure + # TODO + except Exception: + pass + + def _documents(self, documents_type: DocumentTypeCentralized) -> QuerySet[Document]: + # structure = self._is_structure + # if not structure: + # return Document.objects.none() + + members_qs = self.members() + return Document.objects.filter( + document_type__in=documents_type, + contributors__user__in=members_qs, + ).distinct() + + def publications(self) -> QuerySet[Document]: + return self._documents(DocumentTypeCentralized.publications) + + def conferences(self) -> QuerySet[Document]: + return self._documents(DocumentTypeCentralized.conferences) diff --git a/services/crisalid/models.py b/services/crisalid/models.py index acc52570..5a829e95 100644 --- a/services/crisalid/models.py +++ b/services/crisalid/models.py @@ -305,6 +305,9 @@ class Structure(OrganizationRelated, CrisalidDataModel): related_name="structures", ) objects = CrisalidQuerySet.as_manager() + group = models.OneToOneField( + "accounts.PeopleGroup", on_delete=models.SET_NULL, related_name="structure" + ) def __str__(self): return self.name diff --git a/services/crisalid/serializers.py b/services/crisalid/serializers.py index 5fe5ba1b..ff555fea 100644 --- a/services/crisalid/serializers.py +++ b/services/crisalid/serializers.py @@ -40,7 +40,12 @@ class ResearcherSerializer(serializers.ModelSerializer): class Meta: model = Researcher - exclude = ("updated",) + fields = ( + "id", + "user", + "identifiers", + "display_name", + ) def get_display_name(self, instance): return str(instance) @@ -53,9 +58,9 @@ class ResearcherDocumentsSerializer(ResearcherSerializer): class Meta: model = Researcher - read_only_fields = ("display_name",) fields = ( "identifiers", + "display_name", "user", "id", ) diff --git a/services/crisalid/urls.py b/services/crisalid/urls.py index 8a2b612e..5e4b8e59 100644 --- a/services/crisalid/urls.py +++ b/services/crisalid/urls.py @@ -2,11 +2,14 @@ from rest_framework.routers import DefaultRouter from apps.commons.urls import ( + organization_people_group_router_register, organization_researcher_router_register, organization_router_register, ) from services.crisalid.views import ( ConferenceViewSet, + GroupConferenceViewSet, + GroupPublicationViewSet, PublicationViewSet, ResearcherViewSet, ) @@ -31,6 +34,21 @@ basename="ResearcherConferences", ) +# -- group +organization_people_group_router_register( + researcher_router, + r"publications", + GroupPublicationViewSet, + basename="GroupResearcherPublications", +) + +organization_people_group_router_register( + researcher_router, + r"conferences", + GroupConferenceViewSet, + basename="GroupResearcherConferences", +) + urlpatterns = [ path("", include(researcher_router.urls)), ] diff --git a/services/crisalid/views.py b/services/crisalid/views.py index 95debbed..10403406 100644 --- a/services/crisalid/views.py +++ b/services/crisalid/views.py @@ -2,6 +2,7 @@ from http import HTTPMethod from itertools import chain +from annotated_types import doc from django.db.models import Count, QuerySet from django.db.models.functions import ExtractYear from django.http import JsonResponse @@ -15,7 +16,7 @@ from rest_framework import viewsets from rest_framework.decorators import action -from apps.commons.views import NestedOrganizationViewMixins +from apps.commons.views import NestedOrganizationViewMixins, NestedPeopleGroupViewMixins from services.crisalid import relators from services.crisalid.models import ( Document, @@ -83,13 +84,25 @@ ) class AbstractDocumentViewSet( NestedOrganizationViewMixins, - NestedResearcherViewMixins, viewsets.ReadOnlyModelViewSet, ): """Abstract class to get documents info from documents types""" serializer_class = DocumentSerializer + def filter_roles(self, queryset, roles_enabled=True): + # filter only by roles (author, co-authors ...ect) + roles = [ + r.strip() + for r in self.request.query_params.get("roles", "").split(",") + if r.strip() + ] + if roles and roles_enabled: + queryset = queryset.filter( + documentcontributor__roles__contains=roles, + ) + return queryset + def filter_queryset( self, queryset, @@ -102,17 +115,7 @@ def filter_queryset( if year and year_enabled: qs = qs.filter(publication_date__year=year) - # filter only by roles (author, co-authors ...ect) - roles = [ - r.strip() - for r in self.request.query_params.get("roles", "").split(",") - if r.strip() - ] - if roles and roles_enabled: - qs = qs.filter( - documentcontributor__roles__contains=roles, - documentcontributor__researcher=self.researcher, - ) + qs = self.filter_roles(qs, roles_enabled) # filter by pblication_type if "document_type" in self.request.query_params and document_type_enabled: @@ -123,7 +126,6 @@ def filter_queryset( def get_queryset(self) -> QuerySet[Document]: return ( Document.objects.filter( - contributors=self.researcher, document_type__in=self.document_types, ) .prefetch_related("identifiers", "contributors__user") @@ -146,15 +148,7 @@ def similars(self, request, *args, **kwargs): ) return self.get_paginated_response(data.data) - @action( - detail=False, - methods=[HTTPMethod.GET], - url_path="analytics", - serializer_class=DocumentAnalyticsSerializer, - ) - def analytics(self, request, *args, **kwargs): - """methods to return analytics (how many documents/by year / by document_type) from researcher""" - + def get_analytics(self): qs = self.get_queryset() # get counted all document_types types @@ -184,11 +178,23 @@ def analytics(self, request, *args, **kwargs): chain( *DocumentContributor.objects.filter( document__in=self.filter_queryset(qs, roles_enabled=False), - researcher=self.researcher, ).values_list("roles", flat=True) ) ) + return document_types, years, roles + + @action( + detail=False, + methods=[HTTPMethod.GET], + url_path="analytics", + serializer_class=DocumentAnalyticsSerializer, + ) + def analytics(self, request, *args, **kwargs): + """methods to return analytics (how many documents/by year / by document_type) from researcher""" + + document_types, years, roles = self.get_analytics() + return JsonResponse( self.serializer_class( { @@ -200,11 +206,66 @@ def analytics(self, request, *args, **kwargs): ) -class PublicationViewSet(AbstractDocumentViewSet): +class AbstractGroupDocumentViewSet( + NestedPeopleGroupViewMixins, AbstractDocumentViewSet +): + def get_queryset(self): + modules_manager = self.people_group.get_related_module() + modules = modules_manager(self.people_group, self.request.user) + return getattr(modules, self.document_name)() + + +class AbstractResearcherDocumentViewSet( + NestedResearcherViewMixins, AbstractDocumentViewSet +): + + def filter_roles(self, queryset, roles_enabled=True): + # filter only by roles (author, co-authors ...ect) + roles = [ + r.strip() + for r in self.request.query_params.get("roles", "").split(",") + if r.strip() + ] + if roles and roles_enabled: + queryset = queryset.filter( + documentcontributor__roles__contains=roles, + documentcontributor__research=self.researcher, + ) + return queryset + + def get_analytics(self): + document_types, years, _ = super().get_analytics() + qs = self.get_queryset() + roles = Counter( + chain( + *DocumentContributor.objects.filter( + document__in=self.filter_queryset(qs, roles_enabled=False), + researcher=self.researcher, + ).values_list("roles", flat=True) + ) + ) + + return (document_types, years, roles) + + def get_queryset(self) -> QuerySet[Document]: + return super().get_queryset().filter(contributors=self.researcher) + + +class GroupPublicationViewSet(AbstractGroupDocumentViewSet): + document_name = "publications" + document_types = DocumentTypeCentralized.publications + + +class GroupConferenceViewSet(AbstractGroupDocumentViewSet): + document_name = "conferences" + document_types = DocumentTypeCentralized.conferences + + +class PublicationViewSet(AbstractResearcherDocumentViewSet): document_types = DocumentTypeCentralized.publications -class ConferenceViewSet(AbstractDocumentViewSet): +class ConferenceViewSet(AbstractResearcherDocumentViewSet): document_types = DocumentTypeCentralized.conferences From 7d18ca506e62d6d94266c565a24bdd7ffc0af366 Mon Sep 17 00:00:00 2001 From: rgermain Date: Tue, 20 Jan 2026 14:34:17 +0100 Subject: [PATCH 22/32] fix: url/serializers --- apps/projects/serializers.py | 87 +++++++++++++++++++----------------- services/crisalid/urls.py | 8 ++++ services/crisalid/views.py | 15 ++++++- 3 files changed, 66 insertions(+), 44 deletions(-) diff --git a/apps/projects/serializers.py b/apps/projects/serializers.py index 8aa1032f..25f963f7 100644 --- a/apps/projects/serializers.py +++ b/apps/projects/serializers.py @@ -38,7 +38,7 @@ ProjectTemplateSerializer, ) from apps.skills.models import Tag -from apps.skills.serializers import TagRelatedField +from apps.skills.serializers import TagRelatedField, TagSerializer from services.translator.serializers import AutoTranslatedModelSerializer from .exceptions import ( @@ -73,8 +73,8 @@ class BlogEntrySerializer( ProjectRelatedSerializer, serializers.ModelSerializer, ): - string_images_fields: List[str] = ["content"] - string_images_forbid_fields: List[str] = ["title"] + string_images_fields: list[str] = ["content"] + string_images_forbid_fields: list[str] = ["title"] string_images_upload_to: str = "blog_entry/images/" string_images_view: str = "BlogEntry-images-detail" string_images_process_template: bool = True @@ -115,13 +115,13 @@ def update(self, instance, validated_data): instance.refresh_from_db() return super(BlogEntrySerializer, self).update(instance, validated_data) - def get_related_organizations(self) -> List[Organization]: + def get_related_organizations(self) -> list[Organization]: """Retrieve the related organizations""" if "project" in self.validated_data: return self.validated_data["project"].get_related_organizations() return [] - def get_related_project(self) -> Optional[Project]: + def get_related_project(self) -> Project | None: """Retrieve the related projects""" if "project" in self.validated_data: return self.validated_data["project"] @@ -129,7 +129,7 @@ def get_related_project(self) -> Optional[Project]: def get_string_images_kwargs( self, instance: BlogEntry, field_name: str, *args: Any, **kwargs: Any - ) -> Dict[str, Any]: + ) -> dict[str, Any]: """Get additional kwargs for image processing based on the instance.""" return {"project_id": instance.project.id} @@ -141,7 +141,7 @@ class GoalSerializer( ProjectRelatedSerializer, serializers.ModelSerializer, ): - string_images_forbid_fields: List[str] = ["title", "description"] + string_images_forbid_fields: list[str] = ["title", "description"] project_id = serializers.PrimaryKeyRelatedField( many=False, write_only=True, queryset=Project.objects.all(), source="project" @@ -158,13 +158,13 @@ class Meta: "project_id", ] - def get_related_organizations(self) -> List[Organization]: + def get_related_organizations(self) -> list[Organization]: """Retrieve the related organizations""" if "project" in self.validated_data: return self.validated_data["project"].get_related_organizations() return [] - def get_related_project(self) -> Optional[Project]: + def get_related_project(self) -> Project | None: """Retrieve the related projects""" if "project" in self.validated_data: return self.validated_data["project"] @@ -188,7 +188,7 @@ class LocationSerializer( ProjectRelatedSerializer, serializers.ModelSerializer, ): - string_images_forbid_fields: List[str] = ["title", "description"] + string_images_forbid_fields: list[str] = ["title", "description"] project = LocationProjectSerializer(read_only=True) project_id = serializers.PrimaryKeyRelatedField( @@ -209,13 +209,13 @@ class Meta: "project_id", ] - def get_related_organizations(self) -> List[Organization]: + def get_related_organizations(self) -> list[Organization]: """Retrieve the related organizations""" if "project" in self.validated_data: return self.validated_data["project"].get_related_organizations() return [] - def get_related_project(self) -> Optional[Project]: + def get_related_project(self) -> Project | None: """Retrieve the related projects""" if "project" in self.validated_data: return self.validated_data["project"] @@ -238,6 +238,7 @@ class ProjectLightSerializer( is_followed = serializers.SerializerMethodField(read_only=True) is_featured = serializers.BooleanField(read_only=True, required=False) is_group_project = serializers.BooleanField(read_only=True, required=False) + tags = TagSerializer(many=True, read_only=True) class Meta: model = Project @@ -256,9 +257,11 @@ class Meta: "is_followed", "is_featured", "is_group_project", + "updated_at", + "tags", ] - def get_is_followed(self, project: Project) -> Dict[str, Any]: + def get_is_followed(self, project: Project) -> dict[str, Any]: if "request" in self.context: user = self.context["request"].user if not user.is_anonymous: @@ -376,7 +379,7 @@ class ProjectAddTeamMembersSerializer(serializers.Serializer): def add_user( self, user: ProjectUser, project: Project, group: Group, role: str - ) -> Dict[str, Any]: + ) -> dict[str, Any]: created = not project.groups.filter(users=user).exists() if ( group.name == project.get_reviewers().name @@ -401,7 +404,7 @@ def add_user( def add_people_group( self, people_group: PeopleGroup, project: Project, group: Group, role: str - ) -> Dict[str, Any]: + ) -> dict[str, Any]: created = not project.groups.filter(people_groups=people_group).exists() people_group.groups.remove(*project.groups.filter(people_groups=people_group)) people_group.groups.add(group) @@ -456,15 +459,15 @@ class ProjectRemoveTeamMembersSerializer(serializers.Serializer): many=True, write_only=True, required=False, queryset=PeopleGroup.objects.all() ) - def validate_users(self, users: List[ProjectUser]) -> List[ProjectUser]: + def validate_users(self, users: list[ProjectUser]) -> list[ProjectUser]: project = get_object_or_404(Project, pk=self.initial_data["project"]) if all(owner in users for owner in project.get_owners().users.all()): raise RemoveLastProjectOwnerError return list(filter(lambda x: x.groups.filter(projects=project).exists(), users)) def validate_people_groups( - self, people_groups: List[PeopleGroup] - ) -> List[PeopleGroup]: + self, people_groups: list[PeopleGroup] + ) -> list[PeopleGroup]: project = get_object_or_404(Project, pk=self.initial_data["project"]) return list( filter(lambda x: x.groups.filter(projects=project).exists(), people_groups) @@ -501,8 +504,8 @@ class ProjectSerializer( OrganizationRelatedSerializer, serializers.ModelSerializer, ): - string_images_fields: List[str] = ["description"] - string_images_forbid_fields: List[str] = ["title", "purpose"] + string_images_fields: list[str] = ["description"] + string_images_forbid_fields: list[str] = ["title", "purpose"] string_images_upload_to: str = "project/images/" string_images_view: str = "Project-images-detail" string_images_process_template: bool = True @@ -611,19 +614,19 @@ class Meta: ] @staticmethod - def get_last_comment(project: Project) -> Optional[Dict]: + def get_last_comment(project: Project) -> dict | None: last_comment = ( project.comments.filter(reply_on=None).order_by("-created_at").first() ) return CommentSerializer(last_comment).data if last_comment else None - def get_linked_projects(self, project: Project) -> Dict[str, Any]: + def get_linked_projects(self, project: Project) -> dict[str, Any]: queryset = LinkedProject.objects.filter(target=project) user = getattr(self.context.get("request"), "user", AnonymousUser()) queryset = user.get_project_related_queryset(queryset) return LinkedProjectSerializer(queryset, many=True).data - def get_is_followed(self, project: Project) -> Dict[str, Any]: + def get_is_followed(self, project: Project) -> dict[str, Any]: if "request" in self.context: user = self.context["request"].user if not user.is_anonymous: @@ -635,10 +638,10 @@ def get_is_followed(self, project: Project) -> Dict[str, Any]: def get_string_images_kwargs( self, instance: Project, field_name: str, *args: Any, **kwargs: Any - ) -> Dict[str, Any]: + ) -> dict[str, Any]: return {"project_id": instance.id} - def get_related_organizations(self) -> List[Organization]: + def get_related_organizations(self) -> list[Organization]: """Retrieve the related organizations""" if "organizations" in self.validated_data: return self.validated_data["organizations"] @@ -659,7 +662,7 @@ def update(self, instance, validated_data): ) return super(ProjectSerializer, self).update(instance, validated_data) - def validate_organizations_codes(self, value: List[Organization]): + def validate_organizations_codes(self, value: list[Organization]): if len(value) < 1: raise ProjectWithNoOrganizationError request = self.context.get("request") @@ -710,7 +713,7 @@ def validate_description(self, value: str): raise EmptyProjectDescriptionError return value - def validate_categories(self, value: List[ProjectCategory]): + def validate_categories(self, value: list[ProjectCategory]): organizations_codes = self.initial_data.get("organizations_codes", []) if self.instance and not organizations_codes: organizations_codes = self.instance.organizations.all().values_list( @@ -744,7 +747,7 @@ def get_project_id(version) -> str: return version.id @staticmethod - def get_delta(version) -> Dict[str, str]: + def get_delta(version) -> dict[str, str]: previous = version.prev_record while previous: previous_reason = previous.history_change_reason @@ -765,7 +768,7 @@ def get_delta(version) -> Dict[str, str]: return {} @staticmethod - def get_categories(version) -> List[str]: + def get_categories(version) -> list[str]: categories_ids = version.categories.all().values_list( "projectcategory_id", flat=True ) @@ -774,24 +777,24 @@ def get_categories(version) -> List[str]: ) @staticmethod - def get_tags(version) -> List[str]: + def get_tags(version) -> list[str]: tags_ids = version.tags.all().values_list("tag_id", flat=True) return Tag.objects.filter(id__in=tags_ids).values_list("title", flat=True) @staticmethod - def get_members(version) -> List[str]: + def get_members(version) -> list[str]: members = Project.objects.get(id=version.id).get_all_members() return [m.get_full_name() for m in members] @staticmethod - def get_comments(version) -> Dict[str, Any]: + def get_comments(version) -> dict[str, Any]: comments = Comment.history.as_of(version.history_date).filter( project__id=version.id, deleted_at=None ) return CommentSerializer(comments, many=True).data @staticmethod - def get_linked_projects(version) -> Dict[str, Any]: + def get_linked_projects(version) -> dict[str, Any]: linked_projects = LinkedProject.history.as_of(version.history_date).filter( target__id=version.id ) @@ -830,7 +833,7 @@ def get_project_id(version) -> str: return version.id @staticmethod - def get_updated_fields(version) -> List[str]: + def get_updated_fields(version) -> list[str]: previous = version.prev_record while previous: previous_reason = previous.history_change_reason @@ -854,7 +857,7 @@ class Meta: class ProjectMessageSerializer( StringsImagesSerializer, AutoTranslatedModelSerializer, serializers.ModelSerializer ): - string_images_fields: List[str] = ["content"] + string_images_fields: list[str] = ["content"] string_images_upload_to: str = "project_messages/images/" string_images_view: str = "ProjectMessage-images-detail" @@ -897,7 +900,7 @@ def validate_reply_on(self, reply_on: ProjectMessage): def get_string_images_kwargs( self, instance: ProjectMessage, field_name: str, *args: Any, **kwargs: Any - ) -> Dict[str, Any]: + ) -> dict[str, Any]: """Get additional kwargs for image processing based on the instance.""" return {"project_id": instance.project.id} @@ -905,8 +908,8 @@ def get_string_images_kwargs( class ProjectTabSerializer( StringsImagesSerializer, AutoTranslatedModelSerializer, serializers.ModelSerializer ): - string_images_fields: List[str] = ["description"] - string_images_forbid_fields: List[str] = ["title"] + string_images_fields: list[str] = ["description"] + string_images_forbid_fields: list[str] = ["title"] string_images_upload_to: str = "project_tabs/images/" string_images_view: str = "ProjectTab-images-detail" @@ -932,7 +935,7 @@ def validate_type(self, value: str): def get_string_images_kwargs( self, instance: ProjectTab, field_name: str, *args: Any, **kwargs: Any - ) -> Dict[str, Any]: + ) -> dict[str, Any]: """Get additional kwargs for image processing based on the instance.""" return {"project_id": instance.project.id} @@ -940,8 +943,8 @@ def get_string_images_kwargs( class ProjectTabItemSerializer( StringsImagesSerializer, AutoTranslatedModelSerializer, serializers.ModelSerializer ): - string_images_fields: List[str] = ["content"] - string_images_forbid_fields: List[str] = ["title"] + string_images_fields: list[str] = ["content"] + string_images_forbid_fields: list[str] = ["title"] string_images_upload_to: str = "project_tab_items/images/" string_images_view: str = "ProjectTabItem-images-detail" @@ -964,7 +967,7 @@ class Meta: def get_string_images_kwargs( self, instance: ProjectTabItem, field_name: str, *args: Any, **kwargs: Any - ) -> Dict[str, Any]: + ) -> dict[str, Any]: """Get additional kwargs for image processing based on the instance.""" return { "project_id": instance.tab.project.id, diff --git a/services/crisalid/urls.py b/services/crisalid/urls.py index 5e4b8e59..5c869060 100644 --- a/services/crisalid/urls.py +++ b/services/crisalid/urls.py @@ -8,6 +8,7 @@ ) from services.crisalid.views import ( ConferenceViewSet, + DocumentViewSet, GroupConferenceViewSet, GroupPublicationViewSet, PublicationViewSet, @@ -20,6 +21,13 @@ researcher_router, r"researcher", ResearcherViewSet, basename="Researcher" ) +organization_router_register( + researcher_router, + r"document", + DocumentViewSet, + basename="CrisalidDocument", +) + organization_researcher_router_register( researcher_router, r"publications", diff --git a/services/crisalid/views.py b/services/crisalid/views.py index 10403406..7b9fa855 100644 --- a/services/crisalid/views.py +++ b/services/crisalid/views.py @@ -17,6 +17,7 @@ from rest_framework.decorators import action from apps.commons.views import NestedOrganizationViewMixins, NestedPeopleGroupViewMixins +from apps.organizations.models import Organization from services.crisalid import relators from services.crisalid.models import ( Document, @@ -83,7 +84,6 @@ ), ) class AbstractDocumentViewSet( - NestedOrganizationViewMixins, viewsets.ReadOnlyModelViewSet, ): """Abstract class to get documents info from documents types""" @@ -206,6 +206,17 @@ def analytics(self, request, *args, **kwargs): ) +class DocumentViewSet(NestedOrganizationViewMixins, AbstractDocumentViewSet): + """general viewset documents""" + + def get_queryset(self) -> QuerySet[Document]: + return ( + Document.objects.all() + .prefetch_related("identifiers", "contributors__user") + .order_by("-publication_date") + ) + + class AbstractGroupDocumentViewSet( NestedPeopleGroupViewMixins, AbstractDocumentViewSet ): @@ -216,7 +227,7 @@ def get_queryset(self): class AbstractResearcherDocumentViewSet( - NestedResearcherViewMixins, AbstractDocumentViewSet + NestedOrganizationViewMixins, NestedResearcherViewMixins, AbstractDocumentViewSet ): def filter_roles(self, queryset, roles_enabled=True): From ccdbf9113c37754cf23230b3abd4b288120e70a7 Mon Sep 17 00:00:00 2001 From: rgermain Date: Wed, 21 Jan 2026 11:30:01 +0100 Subject: [PATCH 23/32] feat: add group embedding --- .../migrations/0003_peoplegroup_tags.py | 19 ++++++++ apps/accounts/models.py | 44 ++++++++++-------- apps/accounts/serializers.py | 43 ++++++++--------- apps/accounts/views.py | 16 +++++++ apps/commons/mixins.py | 39 ++++++++++++---- apps/modules/group.py | 3 ++ apps/search/filters.py | 4 +- .../migrations/0004_structure_group.py | 25 ++++++++++ services/crisalid/models.py | 32 ++++--------- .../mistral/migrations/0005_groupembedding.py | 46 +++++++++++++++++++ services/mistral/models.py | 27 +++++++++++ services/mistral/tasks.py | 9 +++- 12 files changed, 232 insertions(+), 75 deletions(-) create mode 100644 apps/accounts/migrations/0003_peoplegroup_tags.py create mode 100644 services/crisalid/migrations/0004_structure_group.py create mode 100644 services/mistral/migrations/0005_groupembedding.py diff --git a/apps/accounts/migrations/0003_peoplegroup_tags.py b/apps/accounts/migrations/0003_peoplegroup_tags.py new file mode 100644 index 00000000..93d1b017 --- /dev/null +++ b/apps/accounts/migrations/0003_peoplegroup_tags.py @@ -0,0 +1,19 @@ +# Generated by Django 5.2.10 on 2026-01-21 06:35 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("accounts", "0002_initial"), + ("skills", "0001_initial"), + ] + + operations = [ + migrations.AddField( + model_name="peoplegroup", + name="tags", + field=models.ManyToManyField(related_name="people_groups", to="skills.tag"), + ), + ] diff --git a/apps/accounts/models.py b/apps/accounts/models.py index 0fba032d..643ee3f3 100644 --- a/apps/accounts/models.py +++ b/apps/accounts/models.py @@ -25,6 +25,7 @@ ) from apps.commons.enums import SDG, Language from apps.commons.mixins import ( + HasEmbending, HasModulesRelated, HasMultipleIDs, HasOwner, @@ -42,6 +43,7 @@ class PeopleGroup( + HasEmbending, HasModulesRelated, HasAutoTranslatedFields, HasMultipleIDs, @@ -83,12 +85,12 @@ class PeopleGroup( The visibility setting of the group. """ - _auto_translated_fields: List[str] = [ + _auto_translated_fields: list[str] = [ "name", "html:description", "short_description", ] - slugified_fields: List[str] = ["name"] + slugified_fields: list[str] = ["name"] slug_prefix: str = "group" class PublicationStatus(models.TextChoices): @@ -146,6 +148,10 @@ class PublicationStatus(models.TextChoices): updated_at = models.DateTimeField(auto_now=True) permissions_up_to_date = models.BooleanField(default=False) + tags = models.ManyToManyField("skills.Tag", related_name="people_groups") + # address + # links + def __str__(self) -> str: return str(self.name) @@ -158,7 +164,7 @@ def get_id_field_name(cls, object_id: Any) -> str: except ValueError: return "slug" - def get_related_organizations(self) -> List["Organization"]: + def get_related_organizations(self) -> list["Organization"]: """Return the organizations related to this model.""" return [self.organization] if self.organization else [] @@ -267,23 +273,23 @@ class ProjectUser( """ organization_query_string: str = "groups__organizations" - _auto_translated_fields: List[str] = [ + _auto_translated_fields: list[str] = [ "html:description", "short_description", "job", ] - slugified_fields: List[str] = ["given_name", "family_name"] + slugified_fields: list[str] = ["given_name", "family_name"] slug_prefix: str = "user" def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self._project_queryset: Optional[QuerySet["Project"]] = None - self._user_queryset: Optional[QuerySet["ProjectUser"]] = None - self._people_group_queryset: Optional[QuerySet["PeopleGroup"]] = None - self._news_queryset: Optional[QuerySet["News"]] = None - self._event_queryset: Optional[QuerySet["Event"]] = None - self._instruction_queryset: Optional[QuerySet["Instruction"]] = None - self._related_organizations: list["Organization"] = None + self._project_queryset: QuerySet[Project] | None = None + self._user_queryset: QuerySet[ProjectUser] | None = None + self._people_group_queryset: QuerySet[PeopleGroup] | None = None + self._news_queryset: QuerySet[News] | None = None + self._event_queryset: QuerySet[Event] | None = None + self._instruction_queryset: QuerySet[Instruction] | None = None + self._related_organizations: list[Organization] = None # AbstractUser unused fields username_validator = None @@ -360,7 +366,7 @@ class Meta: permissions = (("get_user_by_email", "Can retrieve a user by email"),) @property - def keycloak_id(self) -> Optional[uuid.UUID]: + def keycloak_id(self) -> uuid.UUID | None: if hasattr(self, "keycloak_account"): return str(self.keycloak_account.keycloak_id) return None @@ -386,7 +392,7 @@ def is_staff(self) -> bool: ) @classmethod - def get_id_field_name(cls, object_id: Union[uuid.UUID, int, str]) -> str: + def get_id_field_name(cls, object_id: uuid.UUID | int | str) -> str: """Get the name of the field which contains the given ID.""" try: uuid.UUID(object_id) @@ -400,7 +406,7 @@ def get_id_field_name(cls, object_id: Union[uuid.UUID, int, str]) -> str: @classmethod def get_main_id( - cls, object_id: Union[uuid.UUID, int, str], returned_field: str = "id" + cls, object_id: uuid.UUID | int | str, returned_field: str = "id" ) -> Any: try: return super().get_main_id(object_id, returned_field) @@ -472,7 +478,7 @@ def get_owner(self) -> "ProjectUser": """Get the owner of the object.""" return self - def get_related_organizations(self) -> List["Organization"]: + def get_related_organizations(self) -> list["Organization"]: """Return the organizations related to this model.""" if self._related_organizations is None: self._related_organizations = list( @@ -680,7 +686,7 @@ def can_see_project(self, project: "Project") -> bool: """Whether the user can see the project.""" return self.get_project_queryset().contains(project) - def get_permissions_representations(self) -> List[str]: + def get_permissions_representations(self) -> list[str]: """Return a list of the permissions representations.""" groups_permissions = [ get_group_permissions(group) @@ -695,7 +701,7 @@ def get_permissions_representations(self) -> List[str]: ] return list(set(groups_permissions)) - def get_instance_permissions_representations(self) -> List[str]: + def get_instance_permissions_representations(self) -> list[str]: """Return a list of the instance permissions representations.""" groups = self.groups.exclude( projects=None, people_groups=None, organizations=None @@ -969,7 +975,7 @@ def get_permissions_representations(self): """Return a list of the permissions representations.""" return [] - def get_related_organizations(self) -> List["Organization"]: + def get_related_organizations(self) -> list["Organization"]: """Return the organizations related to this model.""" return [] diff --git a/apps/accounts/serializers.py b/apps/accounts/serializers.py index 6962a24f..b30cea8b 100644 --- a/apps/accounts/serializers.py +++ b/apps/accounts/serializers.py @@ -23,7 +23,7 @@ from apps.organizations.models import Organization from apps.projects.models import Project from apps.skills.models import Skill -from apps.skills.serializers import SkillLightSerializer +from apps.skills.serializers import SkillLightSerializer, TagSerializer from services.crisalid.serializers import ResearcherSerializerLight from services.translator.serializers import AutoTranslatedModelSerializer @@ -121,11 +121,11 @@ class Meta: ] fields = read_only_fields - def get_profile_picture(self, instance: ProjectUser) -> Optional[Dict[str, Any]]: + def get_profile_picture(self, instance: ProjectUser) -> dict[str, Any] | None: image = instance.profile_picture return ImageSerializer(image).data if image else None - def to_representation(self, instance: ProjectUser) -> Dict[str, Any]: + def to_representation(self, instance: ProjectUser) -> dict[str, Any]: request = self.context.get("request") force_display = self.context.get("force_display", False) if force_display or ( @@ -190,7 +190,7 @@ def to_representation(self, instance): "current_org_role": None, } - def get_profile_picture(self, user: ProjectUser) -> Union[Dict, str]: + def get_profile_picture(self, user: ProjectUser) -> dict | str: if user.profile_picture is None: return None return ImageSerializer(user.profile_picture).data @@ -210,16 +210,16 @@ def get_people_groups(self, user: ProjectUser) -> list: queryset, many=True, context=self.context ).data - def get_skills(self, user: ProjectUser) -> List[Dict]: + def get_skills(self, user: ProjectUser) -> list[dict]: return SkillLightSerializer(user.skills.all(), many=True).data - def get_needs_mentor_on(self, user: ProjectUser) -> List[Dict]: + def get_needs_mentor_on(self, user: ProjectUser) -> list[dict]: if getattr(user, "needs_mentor_on", None): skills = Skill.objects.filter(id__in=user.needs_mentor_on) return SkillLightSerializer(skills, many=True).data return [] - def get_can_mentor_on(self, user: ProjectUser) -> List[Dict]: + def get_can_mentor_on(self, user: ProjectUser) -> list[dict]: if getattr(user, "can_mentor_on", None): skills = Skill.objects.filter(id__in=user.can_mentor_on) return SkillLightSerializer(skills, many=True).data @@ -296,9 +296,7 @@ class Meta: ] fields = read_only_fields - def get_children( - self, people_group: PeopleGroup - ) -> List[Dict[str, Union[str, int]]]: + def get_children(self, people_group: PeopleGroup) -> list[dict[str, str | int]]: context = self.context request = context.get("request") mapping = context.get("mapping") @@ -374,7 +372,7 @@ class PeopleGroupAddFeaturedProjectsSerializer(serializers.Serializer): many=True, write_only=True, required=False, queryset=Project.objects.all() ) - def validate_featured_projects(self, projects: List[Project]) -> List[Project]: + def validate_featured_projects(self, projects: list[Project]) -> list[Project]: request = self.context.get("request") if not all(request.user.can_see_project(project) for project in projects): raise FeaturedProjectPermissionDeniedError @@ -409,7 +407,7 @@ class PeopleGroupSerializer( serializers.ModelSerializer, ): - string_images_forbid_fields: List[str] = [ + string_images_forbid_fields: list[str] = [ "name", "description", "short_description", @@ -438,8 +436,9 @@ class PeopleGroupSerializer( featured_projects = serializers.PrimaryKeyRelatedField( many=True, write_only=True, required=False, queryset=Project.objects.all() ) + tags = TagSerializer(many=True) - def get_hierarchy(self, obj: PeopleGroup) -> List[Dict[str, Union[str, int]]]: + def get_hierarchy(self, obj: PeopleGroup) -> list[dict[str, str | int]]: request = self.context.get("request") queryset = request.user.get_people_group_queryset() hierarchy = [] @@ -451,7 +450,7 @@ def get_hierarchy(self, obj: PeopleGroup) -> List[Dict[str, Union[str, int]]]: ) return [{"order": i, **h} for i, h in enumerate(hierarchy[::-1])] - def get_children(self, obj: PeopleGroup) -> List[Dict[str, Union[str, int]]]: + def get_children(self, obj: PeopleGroup) -> list[dict[str, str | int]]: request = self.context.get("request") queryset = ( request.user.get_people_group_queryset() @@ -464,7 +463,7 @@ def get_children(self, obj: PeopleGroup) -> List[Dict[str, Union[str, int]]]: queryset, many=True, context=self.context ).data - def validate_featured_projects(self, projects: List[Project]) -> List[Project]: + def validate_featured_projects(self, projects: list[Project]) -> list[Project]: request = self.context.get("request") if not all(request.user.can_see_project(project) for project in projects): raise FeaturedProjectPermissionDeniedError @@ -545,6 +544,8 @@ class Meta: "header_image", "logo_image", "roles", + "sdgs", + "tags", "publication_status", "team", "featured_projects", @@ -555,7 +556,7 @@ class Meta: class UserSerializer( StringsImagesSerializer, AutoTranslatedModelSerializer, serializers.ModelSerializer ): - string_images_forbid_fields: List[str] = [ + string_images_forbid_fields: list[str] = [ "description", "short_description", "job", @@ -722,7 +723,7 @@ def _validate_role( self, group: Group, request_user: ProjectUser, - instance: Optional[HasPermissionsSetup] = None, + instance: HasPermissionsSetup | None = None, ): instance = instance or get_instance_from_group(group) if not instance or ( @@ -749,7 +750,7 @@ def _validate_role( ): raise UserRolePermissionDeniedError(group.name) - def validate_roles(self, groups: List[Group]) -> List[Group]: + def validate_roles(self, groups: list[Group]) -> list[Group]: request = self.context.get("request") user = request.user groups_to_add = ( @@ -795,13 +796,13 @@ def validate_roles(self, groups: List[Group]) -> List[Group]: ) ) - def get_permissions(self, user: ProjectUser) -> List[str]: + def get_permissions(self, user: ProjectUser) -> list[str]: return user.get_instance_permissions_representations() - def get_skills(self, user: ProjectUser) -> List[Dict]: + def get_skills(self, user: ProjectUser) -> list[dict]: return SkillLightSerializer(user.skills.all(), many=True).data - def get_profile_picture(self, user: ProjectUser) -> Optional[Dict]: + def get_profile_picture(self, user: ProjectUser) -> dict | None: if user.profile_picture is None: return None return ImageSerializer(user.profile_picture).data diff --git a/apps/accounts/views.py b/apps/accounts/views.py index e31b2e22..40c85b4c 100644 --- a/apps/accounts/views.py +++ b/apps/accounts/views.py @@ -803,6 +803,22 @@ def hierarchy(self, request, *args, **kwargs): status=status.HTTP_200_OK, ) + @action( + detail=True, + methods=["GET"], + url_path="similars", + permission_classes=[ReadOnly], + ) + def similars(self, request, *args, **kwargs): + obj: PeopleGroup = self.get_object() + queryset = obj.similars() + + queryset_page = self.paginate_queryset(queryset) + data = self.serializer_class( + queryset_page, many=True, context={"request": request} + ) + return self.get_paginated_response(data.data) + @extend_schema( parameters=[OpenApiParameter("people_group_id", str, OpenApiParameter.PATH)] diff --git a/apps/commons/mixins.py b/apps/commons/mixins.py index c31f93bc..177a0f25 100644 --- a/apps/commons/mixins.py +++ b/apps/commons/mixins.py @@ -1,4 +1,5 @@ -from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple +from collections.abc import Iterable +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Self, Tuple from django.contrib.auth.models import Group, Permission from django.contrib.contenttypes.models import ContentType @@ -35,7 +36,7 @@ def organization_query(cls, key: str, value: Any) -> Q: return Q(**{cls.organization_query_string: value}) return Q(**{key: value}) - def get_related_organizations(self) -> List["Organization"]: + def get_related_organizations(self) -> list["Organization"]: """Return the organizations related to this model.""" raise NotImplementedError() @@ -91,7 +92,7 @@ def get_related_project(self) -> Optional["Project"]: """Return the projects related to this model.""" raise NotImplementedError() - def get_related_organizations(self) -> List["Organization"]: + def get_related_organizations(self) -> list["Organization"]: """Return the organizations related to this model.""" raise NotImplementedError() @@ -184,7 +185,7 @@ def setup_permissions(self, user: Optional["ProjectUser"] = None): @classmethod def batch_reassign_permissions( - cls, roles_permissions: Tuple[str, Iterable[Permission]] + cls, roles_permissions: tuple[str, Iterable[Permission]] ): """ Reassign permissions for all instances of the model. @@ -320,9 +321,9 @@ def get_id_field_name(cls, object_id: Any) -> str: The outdated slugs of the object. They are kept for url retro-compatibility. """ - _original_slug_fields_value: Dict[str, str] = {} - slugified_fields: List[str] = [] - reserved_slugs: List[str] = [] + _original_slug_fields_value: dict[str, str] = {} + slugified_fields: list[str] = [] + reserved_slugs: list[str] = [] slug_prefix: str = "" def __init__(self, *args, **kwargs): @@ -371,8 +372,8 @@ def get_main_id(cls, object_id: Any, returned_field: str = "id") -> Any: @classmethod def get_main_ids( - cls, objects_ids: List[Any], returned_field: str = "id" - ) -> List[Any]: + cls, objects_ids: list[Any], returned_field: str = "id" + ) -> list[Any]: """Get the main IDs from a list of secondary IDs.""" return [cls.get_main_id(object_id, returned_field) for object_id in objects_ids] @@ -417,3 +418,23 @@ def get_related_module(self): from apps.modules.base import get_module return get_module(type(self)) + + +class HasEmbending: + def vectorize(self): + if not getattr(self, "embedding", None): + model_embending = type(self).embedding.related.model + self.embedding = model_embending(item=self) + self.embedding.save() + self.embedding.vectorize() + + def similars(self, threshold: float = 0.15) -> QuerySet[Self]: + """return similars documents""" + if getattr(self, "embedding", None): + vector = self.embedding.embedding + model_embending = type(self).embedding.related.model + queryset = type(self).objects.all() + return model_embending.vector_search(vector, queryset, threshold).exclude( + pk=self.pk + ) + return type(self).objects.all() diff --git a/apps/modules/group.py b/apps/modules/group.py index eb99a5fb..701e5d25 100644 --- a/apps/modules/group.py +++ b/apps/modules/group.py @@ -59,6 +59,9 @@ def featured_projects(self) -> QuerySet[Project]: .prefetch_related("categories") ) + def similars(self) -> QuerySet[PeopleGroup]: + return self.instance.similars() + @cached_property def _is_structure(self): try: diff --git a/apps/search/filters.py b/apps/search/filters.py index 5728c89c..38f33786 100644 --- a/apps/search/filters.py +++ b/apps/search/filters.py @@ -15,8 +15,8 @@ def MultiMatchSearchFieldsFilter( # noqa: N802 index: str, - fields: Optional[List[str]], - highlight: Optional[List[str]] = None, + fields: list[str] | None, + highlight: list[str] | None = None, highlight_size: int = 150, ): class _MultiMatchSearchFieldsFilter(SearchFilter): diff --git a/services/crisalid/migrations/0004_structure_group.py b/services/crisalid/migrations/0004_structure_group.py new file mode 100644 index 00000000..8be7d9c4 --- /dev/null +++ b/services/crisalid/migrations/0004_structure_group.py @@ -0,0 +1,25 @@ +# Generated by Django 5.2.10 on 2026-01-21 06:35 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("accounts", "0003_peoplegroup_tags"), + ("crisalid", "0003_alter_identifier_harvester_structure_and_more"), + ] + + operations = [ + migrations.AddField( + model_name="structure", + name="group", + field=models.ForeignKey( + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="structure", + to="accounts.peoplegroup", + ), + ), + ] diff --git a/services/crisalid/models.py b/services/crisalid/models.py index 5a829e95..681ff17b 100644 --- a/services/crisalid/models.py +++ b/services/crisalid/models.py @@ -5,10 +5,9 @@ from django.db import models from django.db.models.functions import Lower -from apps.commons.mixins import OrganizationRelated +from apps.commons.mixins import HasEmbending, OrganizationRelated from apps.organizations.models import Organization from services.crisalid import relators -from services.mistral.models import DocumentEmbedding from services.translator.mixins import HasAutoTranslatedFields from .manager import CrisalidQuerySet, DocumentQuerySet @@ -129,7 +128,9 @@ class Meta: ] -class Document(OrganizationRelated, HasAutoTranslatedFields, CrisalidDataModel): +class Document( + HasEmbending, OrganizationRelated, HasAutoTranslatedFields, CrisalidDataModel +): """ Represents a research publicaiton (or 'document') in the Crisalid system. """ @@ -225,24 +226,6 @@ def document_type_centralized(self) -> list[str]: return vals return [self.document_type] - def vectorize(self): - if not getattr(self, "embedding", None): - self.embedding = DocumentEmbedding(item=self) - self.embedding.save() - self.embedding.vectorize() - - def similars(self, threshold: float = 0.15) -> DocumentQuerySet: - """return similars documents""" - if getattr(self, "embedding", None): - vector = self.embedding.embedding - queryset = Document.objects.all() - return ( - DocumentEmbedding.vector_search(vector, queryset, threshold) - .filter(document_type__in=self.document_type_centralized) - .exclude(pk=self.pk) - ) - return Document.objects.none() - def save(self, *ar, **kw): md = super().save(*ar, **kw) # when we update models , re-calculate vectorize @@ -305,8 +288,11 @@ class Structure(OrganizationRelated, CrisalidDataModel): related_name="structures", ) objects = CrisalidQuerySet.as_manager() - group = models.OneToOneField( - "accounts.PeopleGroup", on_delete=models.SET_NULL, related_name="structure" + group = models.ForeignKey( + "accounts.PeopleGroup", + on_delete=models.SET_NULL, + null=True, + related_name="structure", ) def __str__(self): diff --git a/services/mistral/migrations/0005_groupembedding.py b/services/mistral/migrations/0005_groupembedding.py new file mode 100644 index 00000000..305a655a --- /dev/null +++ b/services/mistral/migrations/0005_groupembedding.py @@ -0,0 +1,46 @@ +# Generated by Django 5.2.10 on 2026-01-21 08:13 + +import django.db.models.deletion +import pgvector.django +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("accounts", "0003_peoplegroup_tags"), + ("mistral", "0004_documentembedding"), + ] + + operations = [ + migrations.CreateModel( + name="GroupEmbedding", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("last_update", models.DateTimeField(auto_now=True)), + ("embedding", pgvector.django.VectorField(dimensions=1024, null=True)), + ("is_visible", models.BooleanField(default=False)), + ("summary", models.TextField(blank=True)), + ("prompt_hashcode", models.CharField(default="", max_length=64)), + ( + "item", + models.OneToOneField( + on_delete=django.db.models.deletion.CASCADE, + related_name="embedding", + to="accounts.peoplegroup", + ), + ), + ], + options={ + "abstract": False, + }, + ), + ] diff --git a/services/mistral/models.py b/services/mistral/models.py index c7b67223..27097e00 100644 --- a/services/mistral/models.py +++ b/services/mistral/models.py @@ -8,6 +8,7 @@ from django.utils.html import strip_tags from pgvector.django import CosineDistance, VectorField +from apps.accounts.models import PeopleGroup from apps.commons.models import GroupData from apps.projects.models import Project @@ -458,3 +459,29 @@ def set_embedding(self, *args, **kwargs) -> "DocumentEmbedding": self.prompt_hashcode = prompt_hashcode self.save() return self + + +class GroupEmbedding(MistralEmbedding): + item = models.OneToOneField( + "accounts.PeopleGroup", on_delete=models.CASCADE, related_name="embedding" + ) + + def get_fields(self) -> list[str]: + # TODO(remi): add more fields + return ( + self.item.name, + self.item.description, + ) + + def get_is_visible(self) -> bool: + return any(self.get_fields()) + + def set_embedding(self, *args, **kwargs) -> "DocumentEmbedding": + prompt = self.get_fields() + prompt_hashcode = self.hash_prompt(prompt) + if self.prompt_hashcode != prompt_hashcode: + prompt = "\n\n".join(prompt) + self.embedding = MistralService.get_embedding(prompt) + self.prompt_hashcode = prompt_hashcode + self.save() + return self diff --git a/services/mistral/tasks.py b/services/mistral/tasks.py index 60b794c0..929479ea 100644 --- a/services/mistral/tasks.py +++ b/services/mistral/tasks.py @@ -3,7 +3,13 @@ from apps.commons.utils import clear_memory from projects.celery import app -from .models import DocumentEmbedding, MistralEmbedding, ProjectEmbedding, UserEmbedding +from .models import ( + DocumentEmbedding, + GroupEmbedding, + MistralEmbedding, + ProjectEmbedding, + UserEmbedding, +) logger = logging.getLogger(__name__) @@ -32,3 +38,4 @@ def _vectorize_updated_objects(): _vectorize_objects(ProjectEmbedding) _vectorize_objects(UserEmbedding) _vectorize_objects(DocumentEmbedding) + _vectorize_objects(GroupEmbedding) From fb4034b9321fd4721c62d37edc94c4ce40589a23 Mon Sep 17 00:00:00 2001 From: rgermain Date: Wed, 21 Jan 2026 20:06:16 +0100 Subject: [PATCH 24/32] gorup embendings --- apps/accounts/admin.py | 8 +- ...eoplegrouplocation_peoplegroup_location.py | 88 +++++++++++++++++++ apps/accounts/models.py | 16 +++- apps/accounts/serializers.py | 23 ++++- apps/commons/mixins.py | 10 +-- apps/commons/serializers.py | 43 +++++++-- .../migrations/0002_alter_location_type.py | 26 ++++++ apps/projects/models.py | 85 +++++++++++------- apps/projects/serializers.py | 14 +-- apps/search/filters.py | 2 - services/crisalid/views.py | 2 - services/mistral/models.py | 1 - 12 files changed, 249 insertions(+), 69 deletions(-) create mode 100644 apps/accounts/migrations/0004_peoplegrouplocation_peoplegroup_location.py create mode 100644 apps/projects/migrations/0002_alter_location_type.py diff --git a/apps/accounts/admin.py b/apps/accounts/admin.py index e429c7eb..375e3d44 100644 --- a/apps/accounts/admin.py +++ b/apps/accounts/admin.py @@ -16,7 +16,7 @@ from services.keycloak.interface import KeycloakService from .exports import UserResource -from .models import PeopleGroup, ProjectUser +from .models import PeopleGroup, PeopleGroupLocation, ProjectUser from .utils import get_group_permissions @@ -163,6 +163,12 @@ class PeopleGroupAdmin(TranslateObjectAdminMixin, admin.ModelAdmin): list_filter = ("organization",) +@admin.register(PeopleGroupLocation) +class PeopleGroupLocationAdmin(admin.ModelAdmin): + list_display = ("title", "description", "type") + search_fields = ("title", "description", "type") + + class PermissionAdmin(admin.ModelAdmin): list_display = ("name", "codename", "content_type") search_fields = ("name", "codename", "content_type__model") diff --git a/apps/accounts/migrations/0004_peoplegrouplocation_peoplegroup_location.py b/apps/accounts/migrations/0004_peoplegrouplocation_peoplegroup_location.py new file mode 100644 index 00000000..f47dc9f1 --- /dev/null +++ b/apps/accounts/migrations/0004_peoplegrouplocation_peoplegroup_location.py @@ -0,0 +1,88 @@ +# Generated by Django 5.2.10 on 2026-01-21 10:52 + +import apps.commons.mixins +import django.db.models.deletion +import services.translator.mixins +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("accounts", "0003_peoplegroup_tags"), + ] + + operations = [ + migrations.CreateModel( + name="PeopleGroupLocation", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("title", models.CharField(blank=True, max_length=255)), + ("description", models.TextField(blank=True)), + ("lat", models.FloatField()), + ("lng", models.FloatField()), + ( + "type", + models.CharField( + choices=[ + ("team", "Team"), + ("impact", "Impact"), + ("address", "Address"), + ], + default="team", + max_length=10, + ), + ), + ( + "title_detected_language", + models.CharField(blank=True, max_length=10, null=True), + ), + ("title_en", models.CharField(blank=True, max_length=1020, null=True)), + ("title_fr", models.CharField(blank=True, max_length=1020, null=True)), + ("title_de", models.CharField(blank=True, max_length=1020, null=True)), + ("title_nl", models.CharField(blank=True, max_length=1020, null=True)), + ("title_et", models.CharField(blank=True, max_length=1020, null=True)), + ("title_ca", models.CharField(blank=True, max_length=1020, null=True)), + ("title_es", models.CharField(blank=True, max_length=1020, null=True)), + ( + "description_detected_language", + models.CharField(blank=True, max_length=10, null=True), + ), + ("description_en", models.TextField(blank=True, null=True)), + ("description_fr", models.TextField(blank=True, null=True)), + ("description_de", models.TextField(blank=True, null=True)), + ("description_nl", models.TextField(blank=True, null=True)), + ("description_et", models.TextField(blank=True, null=True)), + ("description_ca", models.TextField(blank=True, null=True)), + ("description_es", models.TextField(blank=True, null=True)), + ], + options={ + "abstract": False, + }, + bases=( + services.translator.mixins.HasAutoTranslatedFields, + apps.commons.mixins.ProjectRelated, + apps.commons.mixins.DuplicableModel, + models.Model, + ), + ), + migrations.AddField( + model_name="peoplegroup", + name="location", + field=models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="people_groups", + to="accounts.peoplegrouplocation", + ), + ), + ] diff --git a/apps/accounts/models.py b/apps/accounts/models.py index 643ee3f3..a5d7ebeb 100644 --- a/apps/accounts/models.py +++ b/apps/accounts/models.py @@ -2,7 +2,7 @@ import uuid from datetime import date from functools import cached_property -from typing import Any, List, Optional, Union +from typing import Any, Optional from django.contrib.auth.models import AbstractUser, Group, Permission from django.contrib.contenttypes.models import ContentType @@ -35,13 +35,17 @@ from apps.commons.models import GroupData from apps.newsfeed.models import Event, Instruction, News from apps.organizations.models import Organization -from apps.projects.models import Project +from apps.projects.models import AbstractLocation, Project from services.keycloak.exceptions import RemoteKeycloakAccountNotFound from services.keycloak.interface import KeycloakService from services.keycloak.models import KeycloakAccount from services.translator.mixins import HasAutoTranslatedFields +class PeopleGroupLocation(AbstractLocation): + """base location for group""" + + class PeopleGroup( HasEmbending, HasModulesRelated, @@ -149,7 +153,13 @@ class PublicationStatus(models.TextChoices): permissions_up_to_date = models.BooleanField(default=False) tags = models.ManyToManyField("skills.Tag", related_name="people_groups") - # address + location = models.ForeignKey( + PeopleGroupLocation, + on_delete=models.SET_NULL, + null=True, + blank=True, + related_name="people_groups", + ) # links def __str__(self) -> str: diff --git a/apps/accounts/serializers.py b/apps/accounts/serializers.py index b30cea8b..d2c4bb80 100644 --- a/apps/accounts/serializers.py +++ b/apps/accounts/serializers.py @@ -1,5 +1,5 @@ import uuid -from typing import Any, Dict, List, Optional, Union +from typing import Any from django.contrib.auth.models import Group from django.contrib.contenttypes.models import ContentType @@ -16,7 +16,11 @@ ) from apps.commons.mixins import HasPermissionsSetup from apps.commons.models import GroupData -from apps.commons.serializers import ModulesSerializers, StringsImagesSerializer +from apps.commons.serializers import ( + BaseLocationSerializer, + ModulesSerializers, + StringsImagesSerializer, +) from apps.files.models import Image from apps.files.serializers import ImageSerializer from apps.notifications.models import Notification @@ -37,7 +41,13 @@ UserRoleAssignmentError, UserRolePermissionDeniedError, ) -from .models import AnonymousUser, PeopleGroup, PrivacySettings, ProjectUser +from .models import ( + AnonymousUser, + PeopleGroup, + PeopleGroupLocation, + PrivacySettings, + ProjectUser, +) from .utils import get_default_group, get_instance_from_group @@ -226,6 +236,11 @@ def get_can_mentor_on(self, user: ProjectUser) -> list[dict]: return [] +class PeopleGroupLocationSerializer(BaseLocationSerializer): + class Meta(BaseLocationSerializer.Meta): + model = PeopleGroupLocation + + class PeopleGroupSuperLightSerializer( AutoTranslatedModelSerializer, serializers.ModelSerializer ): @@ -437,6 +452,7 @@ class PeopleGroupSerializer( many=True, write_only=True, required=False, queryset=Project.objects.all() ) tags = TagSerializer(many=True) + location = PeopleGroupLocationSerializer() def get_hierarchy(self, obj: PeopleGroup) -> list[dict[str, str | int]]: request = self.context.get("request") @@ -546,6 +562,7 @@ class Meta: "roles", "sdgs", "tags", + "location", "publication_status", "team", "featured_projects", diff --git a/apps/commons/mixins.py b/apps/commons/mixins.py index 177a0f25..f1b2e035 100644 --- a/apps/commons/mixins.py +++ b/apps/commons/mixins.py @@ -1,5 +1,5 @@ from collections.abc import Iterable -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Self, Tuple +from typing import TYPE_CHECKING, Any, Optional, Self from django.contrib.auth.models import Group, Permission from django.contrib.contenttypes.models import ContentType @@ -423,8 +423,8 @@ def get_related_module(self): class HasEmbending: def vectorize(self): if not getattr(self, "embedding", None): - model_embending = type(self).embedding.related.model - self.embedding = model_embending(item=self) + model_embedding = type(self.embedding) + self.embedding = model_embedding(item=self) self.embedding.save() self.embedding.vectorize() @@ -432,9 +432,9 @@ def similars(self, threshold: float = 0.15) -> QuerySet[Self]: """return similars documents""" if getattr(self, "embedding", None): vector = self.embedding.embedding - model_embending = type(self).embedding.related.model + model_embedding = type(self.embedding) queryset = type(self).objects.all() - return model_embending.vector_search(vector, queryset, threshold).exclude( + return model_embedding.vector_search(vector, queryset, threshold).exclude( pk=self.pk ) return type(self).objects.all() diff --git a/apps/commons/serializers.py b/apps/commons/serializers.py index 048fb80a..b1f13b7b 100644 --- a/apps/commons/serializers.py +++ b/apps/commons/serializers.py @@ -1,4 +1,5 @@ -from typing import Any, Collection, Dict, List, Optional +from collections.abc import Collection +from typing import Any from django.conf import settings from django.db.models import Model, Q @@ -11,12 +12,13 @@ from apps.files.models import Image from apps.organizations.models import Organization from apps.projects.models import Project +from services.translator.serializers import AutoTranslatedModelSerializer class ProjectRelatedSerializer(serializers.ModelSerializer): """Base serializer for serializers related to projects.""" - def get_related_project(self) -> Optional[Project]: + def get_related_project(self) -> Project | None: """Retrieve the related projects""" raise NotImplementedError() @@ -24,7 +26,7 @@ def get_related_project(self) -> Optional[Project]: class OrganizationRelatedSerializer(serializers.ModelSerializer): """Base serializer for serializers related to organizations.""" - def get_related_organizations(self) -> List[Organization]: + def get_related_organizations(self) -> list[Organization]: """Retrieve the related organizations""" raise NotImplementedError() @@ -155,8 +157,8 @@ class StringsImagesSerializer(serializers.ModelSerializer): It replaces base64 images with uploaded image references during serialization. """ - string_images_fields: List[str] = [] - string_images_forbid_fields: List[str] = [] + string_images_fields: list[str] = [] + string_images_forbid_fields: list[str] = [] string_images_upload_to: str = "" string_images_view: str = "" string_images_process_template: bool = False @@ -164,19 +166,19 @@ class StringsImagesSerializer(serializers.ModelSerializer): def get_string_images_kwargs( self, instance: Model, field_name: str, *args: Any, **kwargs: Any - ) -> Dict[str, Any]: + ) -> dict[str, Any]: """Get additional kwargs for image processing based on the instance.""" return {} def get_string_images_owner( self, instance: Model, field_name: str, *args: Any, **kwargs: Any - ) -> Optional[ProjectUser]: + ) -> ProjectUser | None: """Get the owner for image processing based on the instance.""" request = self.context.get("request") return request.user if request else None def add_string_images_to_instance( - self, instance: Model, images: List["Image"] + self, instance: Model, images: list["Image"] ) -> None: """Add images to the instance's images field.""" if self.instance and images: @@ -233,3 +235,28 @@ def get_modules(self, instance): modules_manager = instance.get_related_module() return modules_manager(instance, user=request.user).count() + + +class BaseLocationSerializer( + StringsImagesSerializer, + AutoTranslatedModelSerializer, + OrganizationRelatedSerializer, + serializers.ModelSerializer, +): + string_images_forbid_fields: list[str] = ["title", "description"] + + class Meta: + fields = [ + "id", + "title", + "description", + "lat", + "lng", + "type", + ] + + def get_related_organizations(self) -> list[Organization]: + """Retrieve the related organizations""" + if "project" in self.validated_data: + return self.validated_data["project"].get_related_organizations() + return [] diff --git a/apps/projects/migrations/0002_alter_location_type.py b/apps/projects/migrations/0002_alter_location_type.py new file mode 100644 index 00000000..80b54438 --- /dev/null +++ b/apps/projects/migrations/0002_alter_location_type.py @@ -0,0 +1,26 @@ +# Generated by Django 5.2.10 on 2026-01-21 10:52 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("projects", "0001_initial"), + ] + + operations = [ + migrations.AlterField( + model_name="location", + name="type", + field=models.CharField( + choices=[ + ("team", "Team"), + ("impact", "Impact"), + ("address", "Address"), + ], + default="team", + max_length=10, + ), + ), + ] diff --git a/apps/projects/models.py b/apps/projects/models.py index 6cf6ee28..57a1a7f6 100644 --- a/apps/projects/models.py +++ b/apps/projects/models.py @@ -1,8 +1,9 @@ import logging import math import os +from copy import deepcopy from functools import reduce -from typing import TYPE_CHECKING, Any, List, Optional +from typing import TYPE_CHECKING, Any, Optional, Self import shortuuid as shortuuid from django.conf import settings @@ -119,9 +120,9 @@ class Project( project_query_string: str = "" organization_query_string: str = "organizations" - slugified_fields: List[str] = ["title"] + slugified_fields: list[str] = ["title"] slug_prefix: str = "project" - _auto_translated_fields: List[str] = ["title", "html:description", "purpose"] + _auto_translated_fields: list[str] = ["title", "html:description", "purpose"] class PublicationStatus(models.TextChoices): """Visibility setting of a project.""" @@ -351,7 +352,7 @@ def get_views(self) -> int: return self.get_cached_views().get("_total", 0) return self.mixpanel_events.count() - def get_views_organizations(self, organizations: List["Organization"]) -> int: + def get_views_organizations(self, organizations: list["Organization"]) -> int: """Return the project's views inside the given organization. If you plan on using this method multiple time, prefetch `organizations` @@ -371,7 +372,7 @@ def get_related_project(self) -> Optional["Project"]: """Return the project related to this model.""" return self - def get_related_organizations(self) -> List["Organization"]: + def get_related_organizations(self) -> list["Organization"]: """Return the organizations related to this model.""" if self._related_organizations is None: self._related_organizations = list(self.organizations.all()) @@ -610,7 +611,7 @@ class ProjectScore(models.Model, ProjectRelated): def get_related_project(self) -> Project: return self.project - def get_related_organizations(self) -> List["Organization"]: + def get_related_organizations(self) -> list["Organization"]: return self.project.get_related_organizations() def get_completeness(self) -> float: @@ -696,7 +697,7 @@ def get_related_project(self) -> Optional["Project"]: """Return the projects related to this model.""" return self.target - def get_related_organizations(self) -> List["Organization"]: + def get_related_organizations(self) -> list["Organization"]: """Return the organizations related to this model.""" return self.target.get_related_organizations() @@ -725,7 +726,7 @@ class BlogEntry( Date of the last change made to the blog entry. """ - _auto_translated_fields: List[str] = ["title", "html:content"] + _auto_translated_fields: list[str] = ["title", "html:content"] project = models.ForeignKey( Project, on_delete=models.CASCADE, related_name="blog_entries" @@ -758,7 +759,7 @@ def get_related_project(self) -> Optional["Project"]: """Return the projects related to this model.""" return self.project - def get_related_organizations(self) -> List["Organization"]: + def get_related_organizations(self) -> list["Organization"]: """Return the organizations related to this model.""" return self.project.get_related_organizations() @@ -811,7 +812,7 @@ class Goal( Status of the Goal. """ - _auto_translated_fields: List[str] = ["title", "html:description"] + _auto_translated_fields: list[str] = ["title", "html:description"] class GoalStatus(models.TextChoices): NONE = "na" @@ -843,7 +844,7 @@ def delete(self, using=None, keep_parents=False): if hasattr(project, "stat"): project.stat.update_goals() - def get_related_organizations(self) -> List["Organization"]: + def get_related_organizations(self) -> list["Organization"]: """Return the organizations related to this model.""" return self.project.get_related_organizations() @@ -861,7 +862,7 @@ def duplicate(self, project: "Project") -> "Goal": ) -class Location( +class AbstractLocation( HasAutoTranslatedFields, ProjectRelated, DuplicableModel, @@ -887,23 +888,24 @@ class Location( Type of the location (team or impact). """ - _auto_translated_fields: List[str] = ["title", "description"] + _auto_translated_fields: list[str] = ["title", "description"] class LocationType(models.TextChoices): """Type of a location.""" TEAM = "team" IMPACT = "impact" + ADDRESS = "address" + + class Meta: + abstract = True - project = models.ForeignKey( - Project, on_delete=models.CASCADE, related_name="locations" - ) title = models.CharField(max_length=255, blank=True) description = models.TextField(blank=True) lat = models.FloatField() lng = models.FloatField() type = models.CharField( - max_length=6, + max_length=10, choices=LocationType.choices, default=LocationType.TEAM, ) @@ -912,19 +914,36 @@ def get_related_project(self) -> Optional["Project"]: """Return the projects related to this model.""" return self.project - def get_related_organizations(self) -> List["Organization"]: + def get_related_organizations(self) -> list["Organization"]: """Return the organizations related to this model.""" return self.project.get_related_organizations() - def duplicate(self, project: "Project") -> "Location": - return Location.objects.create( - project=project, - title=self.title, - description=self.description, - lat=self.lat, - lng=self.lng, - type=self.type, - ) + def duplicate(self) -> Self: + copy = deepcopy(self) + copy.pk = None + return copy + + +class Location(AbstractLocation): + """A project location on Earth. + + Attributes + ---------- + id: Charfield + UUID4 used as the model's PK. + project: ForeignKey + Project at this location. + """ + + project = models.ForeignKey( + Project, on_delete=models.CASCADE, related_name="locations" + ) + + def duplicate(self, project: Project) -> "Location": + copy = super().duplicate() + copy.project = project + copy.save() + return copy class ProjectMessage( @@ -956,7 +975,7 @@ class ProjectMessage( Images used by the message. """ - _auto_translated_fields: List[str] = ["html:content"] + _auto_translated_fields: list[str] = ["html:content"] project = models.ForeignKey( "projects.Project", @@ -988,7 +1007,7 @@ def get_related_project(self) -> "Project": """Return the projects related to this model.""" return self.project - def get_related_organizations(self) -> List["Organization"]: + def get_related_organizations(self) -> list["Organization"]: """Return the organizations related to this model.""" return self.project.get_related_organizations() @@ -1028,7 +1047,7 @@ class ProjectTab( Description of the tab. """ - _auto_translated_fields: List[str] = ["title", "html:description"] + _auto_translated_fields: list[str] = ["title", "html:description"] class TabType(models.TextChoices): """Type of a tab.""" @@ -1051,7 +1070,7 @@ def get_related_project(self) -> Project: """Return the projects related to this model.""" return self.project - def get_related_organizations(self) -> List["Organization"]: + def get_related_organizations(self) -> list["Organization"]: """Return the organizations related to this model.""" return self.project.get_related_organizations() @@ -1076,7 +1095,7 @@ class ProjectTabItem( project_query_string: str = "tab__project" organization_query_string: str = "tab__project__organizations" - _auto_translated_fields: List[str] = ["title", "html:content"] + _auto_translated_fields: list[str] = ["title", "html:content"] tab = models.ForeignKey( "projects.ProjectTab", on_delete=models.CASCADE, related_name="items" @@ -1094,6 +1113,6 @@ def get_related_project(self) -> Project: """Return the projects related to this model.""" return self.tab.project - def get_related_organizations(self) -> List["Organization"]: + def get_related_organizations(self) -> list["Organization"]: """Return the organizations related to this model.""" return self.tab.project.get_related_organizations() diff --git a/apps/projects/serializers.py b/apps/projects/serializers.py index 25f963f7..2ab65ff0 100644 --- a/apps/projects/serializers.py +++ b/apps/projects/serializers.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Optional +from typing import Any from django.apps import apps from django.conf import settings @@ -18,6 +18,7 @@ ) from apps.commons.models import GroupData from apps.commons.serializers import ( + BaseLocationSerializer, OrganizationRelatedSerializer, ProjectRelatedSerializer, StringsImagesSerializer, @@ -182,11 +183,8 @@ class Meta: class LocationSerializer( - StringsImagesSerializer, - AutoTranslatedModelSerializer, - OrganizationRelatedSerializer, ProjectRelatedSerializer, - serializers.ModelSerializer, + BaseLocationSerializer, ): string_images_forbid_fields: list[str] = ["title", "description"] @@ -209,12 +207,6 @@ class Meta: "project_id", ] - def get_related_organizations(self) -> list[Organization]: - """Retrieve the related organizations""" - if "project" in self.validated_data: - return self.validated_data["project"].get_related_organizations() - return [] - def get_related_project(self) -> Project | None: """Retrieve the related projects""" if "project" in self.validated_data: diff --git a/apps/search/filters.py b/apps/search/filters.py index 38f33786..ddcae30e 100644 --- a/apps/search/filters.py +++ b/apps/search/filters.py @@ -1,5 +1,3 @@ -from typing import List, Optional - from django.db.models import BigIntegerField, Case, F, JSONField, Q, Value, When from django_filters import rest_framework as filters from rest_framework.filters import SearchFilter diff --git a/services/crisalid/views.py b/services/crisalid/views.py index 7b9fa855..de0d0c41 100644 --- a/services/crisalid/views.py +++ b/services/crisalid/views.py @@ -2,7 +2,6 @@ from http import HTTPMethod from itertools import chain -from annotated_types import doc from django.db.models import Count, QuerySet from django.db.models.functions import ExtractYear from django.http import JsonResponse @@ -17,7 +16,6 @@ from rest_framework.decorators import action from apps.commons.views import NestedOrganizationViewMixins, NestedPeopleGroupViewMixins -from apps.organizations.models import Organization from services.crisalid import relators from services.crisalid.models import ( Document, diff --git a/services/mistral/models.py b/services/mistral/models.py index 27097e00..c2f09c1d 100644 --- a/services/mistral/models.py +++ b/services/mistral/models.py @@ -8,7 +8,6 @@ from django.utils.html import strip_tags from pgvector.django import CosineDistance, VectorField -from apps.accounts.models import PeopleGroup from apps.commons.models import GroupData from apps.projects.models import Project From bdb5fdeca08c6b0ff61e9195544e48072294dd7d Mon Sep 17 00:00:00 2001 From: rgermain Date: Thu, 22 Jan 2026 19:51:04 +0100 Subject: [PATCH 25/32] feat: subgroups in modules --- apps/accounts/serializers.py | 20 +++----------------- apps/accounts/views.py | 24 ++++++++++++++++++++++-- apps/modules/group.py | 5 +++++ 3 files changed, 30 insertions(+), 19 deletions(-) diff --git a/apps/accounts/serializers.py b/apps/accounts/serializers.py index d2c4bb80..9d1aedd1 100644 --- a/apps/accounts/serializers.py +++ b/apps/accounts/serializers.py @@ -6,6 +6,8 @@ from django.shortcuts import get_object_or_404 from drf_spectacular.utils import extend_schema_serializer from rest_framework import serializers +from services.crisalid.serializers import ResearcherSerializerLight +from services.translator.serializers import AutoTranslatedModelSerializer from apps.commons.fields import ( HiddenPrimaryKeyRelatedField, @@ -28,8 +30,6 @@ from apps.projects.models import Project from apps.skills.models import Skill from apps.skills.serializers import SkillLightSerializer, TagSerializer -from services.crisalid.serializers import ResearcherSerializerLight -from services.translator.serializers import AutoTranslatedModelSerializer from .exceptions import ( FeaturedProjectPermissionDeniedError, @@ -432,7 +432,7 @@ class PeopleGroupSerializer( slug_field="code", queryset=Organization.objects.all() ) hierarchy = serializers.SerializerMethodField() - children = serializers.SerializerMethodField() + # children = serializers.SerializerMethodField() parent = serializers.PrimaryKeyRelatedField( queryset=PeopleGroup.objects.all(), required=False, @@ -466,19 +466,6 @@ def get_hierarchy(self, obj: PeopleGroup) -> list[dict[str, str | int]]: ) return [{"order": i, **h} for i, h in enumerate(hierarchy[::-1])] - def get_children(self, obj: PeopleGroup) -> list[dict[str, str | int]]: - request = self.context.get("request") - queryset = ( - request.user.get_people_group_queryset() - .select_related("organization") - .filter(parent=obj) - .order_by("name") - .distinct() - ) - return PeopleGroupSuperLightSerializer( - queryset, many=True, context=self.context - ).data - def validate_featured_projects(self, projects: list[Project]) -> list[Project]: request = self.context.get("request") if not all(request.user.can_see_project(project) for project in projects): @@ -556,7 +543,6 @@ class Meta: "parent", "organization", "hierarchy", - "children", "header_image", "logo_image", "roles", diff --git a/apps/accounts/views.py b/apps/accounts/views.py index 40c85b4c..b768c8f0 100644 --- a/apps/accounts/views.py +++ b/apps/accounts/views.py @@ -803,6 +803,24 @@ def hierarchy(self, request, *args, **kwargs): status=status.HTTP_200_OK, ) + @action( + detail=True, + methods=["GET"], + url_path="subgroups", + permission_classes=[ReadOnly], + ) + def subgroups(self, request, *args, **kwargs): + group = self.get_object() + modules_manager = group.get_related_module() + modules = modules_manager(group, request.user) + queryset = modules.subgroups() + + queryset_page = self.paginate_queryset(queryset) + data = self.serializer_class( + queryset_page, many=True, context={"request": request} + ) + return self.get_paginated_response(data.data) + @action( detail=True, methods=["GET"], @@ -810,8 +828,10 @@ def hierarchy(self, request, *args, **kwargs): permission_classes=[ReadOnly], ) def similars(self, request, *args, **kwargs): - obj: PeopleGroup = self.get_object() - queryset = obj.similars() + group = self.get_object() + modules_manager = group.get_related_module() + modules = modules_manager(group, request.user) + queryset = modules.similars() queryset_page = self.paginate_queryset(queryset) data = self.serializer_class( diff --git a/apps/modules/group.py b/apps/modules/group.py index 701e5d25..4faa1ff4 100644 --- a/apps/modules/group.py +++ b/apps/modules/group.py @@ -11,6 +11,8 @@ @register_module(PeopleGroup) class PeopleGroupModules(AbstractModules): + instance: PeopleGroup + def members(self) -> QuerySet[ProjectUser]: managers_ids = self.instance.managers.all().values_list("id", flat=True) leaders_ids = self.instance.leaders.all().values_list("id", flat=True) @@ -62,6 +64,9 @@ def featured_projects(self) -> QuerySet[Project]: def similars(self) -> QuerySet[PeopleGroup]: return self.instance.similars() + def subgroups(self) -> QuerySet[PeopleGroup]: + return self.instance.children.all() + @cached_property def _is_structure(self): try: From 45d51fb754b8604a9bbf35df3a63ffd8aabbf732 Mon Sep 17 00:00:00 2001 From: rgermain Date: Mon, 26 Jan 2026 18:09:35 +0100 Subject: [PATCH 26/32] feat; beeter subgroups --- apps/accounts/serializers.py | 41 +++++++++++++++++++++++++++--------- apps/accounts/views.py | 2 +- apps/modules/base.py | 9 +++++--- 3 files changed, 38 insertions(+), 14 deletions(-) diff --git a/apps/accounts/serializers.py b/apps/accounts/serializers.py index 9d1aedd1..cf45e397 100644 --- a/apps/accounts/serializers.py +++ b/apps/accounts/serializers.py @@ -6,8 +6,6 @@ from django.shortcuts import get_object_or_404 from drf_spectacular.utils import extend_schema_serializer from rest_framework import serializers -from services.crisalid.serializers import ResearcherSerializerLight -from services.translator.serializers import AutoTranslatedModelSerializer from apps.commons.fields import ( HiddenPrimaryKeyRelatedField, @@ -30,6 +28,8 @@ from apps.projects.models import Project from apps.skills.models import Skill from apps.skills.serializers import SkillLightSerializer, TagSerializer +from services.crisalid.serializers import ResearcherSerializerLight +from services.translator.serializers import AutoTranslatedModelSerializer from .exceptions import ( FeaturedProjectPermissionDeniedError, @@ -253,10 +253,9 @@ class Meta: class PeopleGroupLightSerializer( - AutoTranslatedModelSerializer, serializers.ModelSerializer + ModulesSerializers, AutoTranslatedModelSerializer, serializers.ModelSerializer ): header_image = ImageSerializer(read_only=True) - members_count = serializers.SerializerMethodField() roles = serializers.SlugRelatedField( many=True, slug_field="name", @@ -265,10 +264,6 @@ class PeopleGroupLightSerializer( ) organization = serializers.SlugRelatedField(read_only=True, slug_field="code") - # TODO(remi): replace this by modules - def get_members_count(self, group: PeopleGroup) -> int: - return group.get_all_members().count() - class Meta: model = PeopleGroup read_only_fields = ["organization", "is_root", "publication_status"] @@ -280,12 +275,25 @@ class Meta: "short_description", "email", "header_image", - "members_count", "roles", + "modules", ] + def get_modules(self, people_group: PeopleGroup): + context = self.context + request = context.get("request") + + modules_manager = people_group.get_related_module() + modules = modules_manager(people_group, request.user) + + return { + "members": modules.members().count(), + "subgroups": modules.subgroups().count(), + } + class PeopleGroupHierarchySerializer( + ModulesSerializers, AutoTranslatedModelSerializer, serializers.ModelSerializer, ): @@ -308,13 +316,27 @@ class Meta: "header_image", "children", "roles", + "modules", ] fields = read_only_fields + def get_modules(self, people_group: PeopleGroup): + context = self.context + request = context.get("request") + + modules_manager = people_group.get_related_module() + modules = modules_manager(people_group, request.user) + + return { + "members": modules.members().count(), + "subgroups": modules.subgroups().count(), + } + def get_children(self, people_group: PeopleGroup) -> list[dict[str, str | int]]: context = self.context request = context.get("request") mapping = context.get("mapping") + if not mapping: base_queryset = request.user.get_people_group_queryset().filter( organization=people_group.organization @@ -432,7 +454,6 @@ class PeopleGroupSerializer( slug_field="code", queryset=Organization.objects.all() ) hierarchy = serializers.SerializerMethodField() - # children = serializers.SerializerMethodField() parent = serializers.PrimaryKeyRelatedField( queryset=PeopleGroup.objects.all(), required=False, diff --git a/apps/accounts/views.py b/apps/accounts/views.py index b768c8f0..02a83813 100644 --- a/apps/accounts/views.py +++ b/apps/accounts/views.py @@ -834,7 +834,7 @@ def similars(self, request, *args, **kwargs): queryset = modules.similars() queryset_page = self.paginate_queryset(queryset) - data = self.serializer_class( + data = PeopleGroupLightSerializer( queryset_page, many=True, context={"request": request} ) return self.get_paginated_response(data.data) diff --git a/apps/modules/base.py b/apps/modules/base.py index 679efe76..16e72037 100644 --- a/apps/modules/base.py +++ b/apps/modules/base.py @@ -10,21 +10,24 @@ def __init__(self, instance, /, user, **kw): self.instance = instance self.user = user - def count(self): + def _items(self): members = inspect.getmembers( self, predicate=inspect.ismethod, ) - modules = {} for name, func in members: # ignore private_method and "count" method (this method :D) if name.startswith("_") or name in ("count",): continue + yield name, func + + def count(self): + modules = {} + for name, func in self._items(): # func return queryset modules[name] = func().count() - return modules From aa445136f7aece3e7ddc2089fd45d10a49fa9d27 Mon Sep 17 00:00:00 2001 From: rgermain Date: Wed, 28 Jan 2026 16:14:07 +0100 Subject: [PATCH 27/32] add fields groups --- apps/accounts/serializers.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/apps/accounts/serializers.py b/apps/accounts/serializers.py index cf45e397..04312b67 100644 --- a/apps/accounts/serializers.py +++ b/apps/accounts/serializers.py @@ -27,7 +27,7 @@ from apps.organizations.models import Organization from apps.projects.models import Project from apps.skills.models import Skill -from apps.skills.serializers import SkillLightSerializer, TagSerializer +from apps.skills.serializers import SkillLightSerializer, TagRelatedField, TagSerializer from services.crisalid.serializers import ResearcherSerializerLight from services.translator.serializers import AutoTranslatedModelSerializer @@ -472,7 +472,12 @@ class PeopleGroupSerializer( featured_projects = serializers.PrimaryKeyRelatedField( many=True, write_only=True, required=False, queryset=Project.objects.all() ) - tags = TagSerializer(many=True) + tags = TagRelatedField(many=True, required=False) + + sdgs = serializers.ListField( + child=serializers.IntegerField(min_value=1, max_value=17), + required=False, + ) location = PeopleGroupLocationSerializer() def get_hierarchy(self, obj: PeopleGroup) -> list[dict[str, str | int]]: @@ -535,6 +540,8 @@ def validate_parent(self, value): def create(self, validated_data): team = validated_data.pop("team", {}) featured_projects = validated_data.pop("featured_projects", []) + tags = validated_data.pop("tags", []) + people_group = super(PeopleGroupSerializer, self).create(validated_data) PeopleGroupAddTeamMembersSerializer().create( {"people_group": people_group, **team} @@ -542,12 +549,19 @@ def create(self, validated_data): PeopleGroupAddFeaturedProjectsSerializer().create( {"people_group": people_group, "featured_projects": featured_projects} ) + + people_group.tags.set(tags) return people_group def update(self, instance, validated_data): validated_data.pop("team", {}) validated_data.pop("featured_projects", []) - return super(PeopleGroupSerializer, self).update(instance, validated_data) + tags = validated_data.pop("tags", []) + people_group = super(PeopleGroupSerializer, self).update( + instance, validated_data + ) + people_group.tags.set(tags) + return people_group def save(self, **kwargs): return super().save(**kwargs) From 524f530b90da807db398efc79a7e04ff106350bc Mon Sep 17 00:00:00 2001 From: rgermain Date: Fri, 30 Jan 2026 17:10:13 +0100 Subject: [PATCH 28/32] add locations --- apps/accounts/serializers.py | 6 ++--- apps/accounts/views.py | 49 ++++++++++++++++++++++++++---------- 2 files changed, 39 insertions(+), 16 deletions(-) diff --git a/apps/accounts/serializers.py b/apps/accounts/serializers.py index 04312b67..3888221d 100644 --- a/apps/accounts/serializers.py +++ b/apps/accounts/serializers.py @@ -6,6 +6,8 @@ from django.shortcuts import get_object_or_404 from drf_spectacular.utils import extend_schema_serializer from rest_framework import serializers +from services.crisalid.serializers import ResearcherSerializerLight +from services.translator.serializers import AutoTranslatedModelSerializer from apps.commons.fields import ( HiddenPrimaryKeyRelatedField, @@ -27,9 +29,7 @@ from apps.organizations.models import Organization from apps.projects.models import Project from apps.skills.models import Skill -from apps.skills.serializers import SkillLightSerializer, TagRelatedField, TagSerializer -from services.crisalid.serializers import ResearcherSerializerLight -from services.translator.serializers import AutoTranslatedModelSerializer +from apps.skills.serializers import SkillLightSerializer, TagRelatedField from .exceptions import ( FeaturedProjectPermissionDeniedError, diff --git a/apps/accounts/views.py b/apps/accounts/views.py index 02a83813..0a9bc5be 100644 --- a/apps/accounts/views.py +++ b/apps/accounts/views.py @@ -25,33 +25,39 @@ from rest_framework.decorators import action from rest_framework.filters import OrderingFilter from rest_framework.parsers import JSONParser -from rest_framework.permissions import IsAuthenticated, IsAuthenticatedOrReadOnly +from rest_framework.permissions import ( + IsAuthenticated, + IsAuthenticatedOrReadOnly, +) from rest_framework.response import Response from rest_framework.serializers import BooleanField from rest_framework.views import APIView +from services.google.models import GoogleAccount, GoogleGroup +from services.google.tasks import ( + create_google_account, + create_google_group, + suspend_google_account, + update_google_account, + update_google_group, +) +from services.keycloak.exceptions import KeycloakAccountNotFound +from services.keycloak.interface import KeycloakService from apps.commons.filters import UnaccentSearchFilter from apps.commons.models import GroupData from apps.commons.permissions import IsOwner, ReadOnly, WillBeOwner -from apps.commons.serializers import EmailAddressSerializer, RetrieveUpdateModelViewSet +from apps.commons.serializers import ( + EmailAddressSerializer, + RetrieveUpdateModelViewSet, +) from apps.commons.utils import map_action_to_permission from apps.commons.views import DetailOnlyViewsetMixin, MultipleIDViewsetMixin from apps.files.models import Image from apps.files.views import ImageStorageView from apps.organizations.models import Organization from apps.organizations.permissions import HasOrganizationPermission -from apps.projects.serializers import ProjectLightSerializer +from apps.projects.serializers import LocationSerializer, ProjectLightSerializer from apps.skills.models import Skill -from services.google.models import GoogleAccount, GoogleGroup -from services.google.tasks import ( - create_google_account, - create_google_group, - suspend_google_account, - update_google_account, - update_google_group, -) -from services.keycloak.exceptions import KeycloakAccountNotFound -from services.keycloak.interface import KeycloakService from .exceptions import EmailTypeMissingError, PermissionNotFoundError from .filters import PeopleGroupFilter, UserFilter @@ -839,6 +845,23 @@ def similars(self, request, *args, **kwargs): ) return self.get_paginated_response(data.data) + @action( + detail=True, + methods=["GET"], + url_path="locations", + permission_classes=[ReadOnly], + ) + def locations(self, request, *args, **kwargs): + group = self.get_object() + modules_manager = group.get_related_module() + modules = modules_manager(group, request.user) + queryset = modules.locations() + + return Response( + LocationSerializer(queryset, many=True, context={"request": request}).data, + status=status.HTTP_200_OK, + ) + @extend_schema( parameters=[OpenApiParameter("people_group_id", str, OpenApiParameter.PATH)] From 12d8deb717a7b73027ac15d509762c846a0dbb6f Mon Sep 17 00:00:00 2001 From: rgermain Date: Mon, 2 Feb 2026 18:42:44 +0100 Subject: [PATCH 29/32] fix:groups maps --- .../0005_alter_peoplegroup_location.py | 25 ++++++++++++++++ apps/accounts/models.py | 4 +-- apps/accounts/serializers.py | 16 ++++++++-- apps/accounts/views.py | 30 ++++++++----------- apps/modules/group.py | 5 +++- apps/projects/urls.py | 9 ++++-- apps/projects/views.py | 28 +++++++++++++---- 7 files changed, 84 insertions(+), 33 deletions(-) create mode 100644 apps/accounts/migrations/0005_alter_peoplegroup_location.py diff --git a/apps/accounts/migrations/0005_alter_peoplegroup_location.py b/apps/accounts/migrations/0005_alter_peoplegroup_location.py new file mode 100644 index 00000000..29fb9b0e --- /dev/null +++ b/apps/accounts/migrations/0005_alter_peoplegroup_location.py @@ -0,0 +1,25 @@ +# Generated by Django 5.2.10 on 2026-02-02 16:10 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("accounts", "0004_peoplegrouplocation_peoplegroup_location"), + ] + + operations = [ + migrations.AlterField( + model_name="peoplegroup", + name="location", + field=models.OneToOneField( + blank=True, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="people_group", + to="accounts.peoplegrouplocation", + ), + ), + ] diff --git a/apps/accounts/models.py b/apps/accounts/models.py index a5d7ebeb..19208853 100644 --- a/apps/accounts/models.py +++ b/apps/accounts/models.py @@ -153,12 +153,12 @@ class PublicationStatus(models.TextChoices): permissions_up_to_date = models.BooleanField(default=False) tags = models.ManyToManyField("skills.Tag", related_name="people_groups") - location = models.ForeignKey( + location = models.OneToOneField( PeopleGroupLocation, on_delete=models.SET_NULL, null=True, blank=True, - related_name="people_groups", + related_name="people_group", ) # links diff --git a/apps/accounts/serializers.py b/apps/accounts/serializers.py index 3888221d..cf47ba55 100644 --- a/apps/accounts/serializers.py +++ b/apps/accounts/serializers.py @@ -6,8 +6,6 @@ from django.shortcuts import get_object_or_404 from drf_spectacular.utils import extend_schema_serializer from rest_framework import serializers -from services.crisalid.serializers import ResearcherSerializerLight -from services.translator.serializers import AutoTranslatedModelSerializer from apps.commons.fields import ( HiddenPrimaryKeyRelatedField, @@ -30,6 +28,8 @@ from apps.projects.models import Project from apps.skills.models import Skill from apps.skills.serializers import SkillLightSerializer, TagRelatedField +from services.crisalid.serializers import ResearcherSerializerLight +from services.translator.serializers import AutoTranslatedModelSerializer from .exceptions import ( FeaturedProjectPermissionDeniedError, @@ -248,7 +248,7 @@ class PeopleGroupSuperLightSerializer( class Meta: model = PeopleGroup - read_only_fields = ["id", "slug", "name", "organization"] + read_only_fields = ["id", "slug", "name", "short_description", "organization"] fields = read_only_fields @@ -590,6 +590,16 @@ class Meta: ] +class LocationPeopleGroupSerializer( + AutoTranslatedModelSerializer, serializers.ModelSerializer +): + group = PeopleGroupSuperLightSerializer(source="people_group", read_only=True) + + class Meta: + model = PeopleGroupLocation + fields = "__all__" + + @extend_schema_serializer(exclude_fields=("roles",)) class UserSerializer( StringsImagesSerializer, AutoTranslatedModelSerializer, serializers.ModelSerializer diff --git a/apps/accounts/views.py b/apps/accounts/views.py index 0a9bc5be..9823d954 100644 --- a/apps/accounts/views.py +++ b/apps/accounts/views.py @@ -25,31 +25,15 @@ from rest_framework.decorators import action from rest_framework.filters import OrderingFilter from rest_framework.parsers import JSONParser -from rest_framework.permissions import ( - IsAuthenticated, - IsAuthenticatedOrReadOnly, -) +from rest_framework.permissions import IsAuthenticated, IsAuthenticatedOrReadOnly from rest_framework.response import Response from rest_framework.serializers import BooleanField from rest_framework.views import APIView -from services.google.models import GoogleAccount, GoogleGroup -from services.google.tasks import ( - create_google_account, - create_google_group, - suspend_google_account, - update_google_account, - update_google_group, -) -from services.keycloak.exceptions import KeycloakAccountNotFound -from services.keycloak.interface import KeycloakService from apps.commons.filters import UnaccentSearchFilter from apps.commons.models import GroupData from apps.commons.permissions import IsOwner, ReadOnly, WillBeOwner -from apps.commons.serializers import ( - EmailAddressSerializer, - RetrieveUpdateModelViewSet, -) +from apps.commons.serializers import EmailAddressSerializer, RetrieveUpdateModelViewSet from apps.commons.utils import map_action_to_permission from apps.commons.views import DetailOnlyViewsetMixin, MultipleIDViewsetMixin from apps.files.models import Image @@ -58,6 +42,16 @@ from apps.organizations.permissions import HasOrganizationPermission from apps.projects.serializers import LocationSerializer, ProjectLightSerializer from apps.skills.models import Skill +from services.google.models import GoogleAccount, GoogleGroup +from services.google.tasks import ( + create_google_account, + create_google_group, + suspend_google_account, + update_google_account, + update_google_group, +) +from services.keycloak.exceptions import KeycloakAccountNotFound +from services.keycloak.interface import KeycloakService from .exceptions import EmailTypeMissingError, PermissionNotFoundError from .filters import PeopleGroupFilter, UserFilter diff --git a/apps/modules/group.py b/apps/modules/group.py index 4faa1ff4..bcac76f0 100644 --- a/apps/modules/group.py +++ b/apps/modules/group.py @@ -4,7 +4,7 @@ from apps.accounts.models import PeopleGroup, ProjectUser from apps.modules.base import AbstractModules, register_module -from apps.projects.models import Project +from apps.projects.models import Location, Project from apps.skills.models import Skill from services.crisalid.models import Document, DocumentTypeCentralized @@ -67,6 +67,9 @@ def similars(self) -> QuerySet[PeopleGroup]: def subgroups(self) -> QuerySet[PeopleGroup]: return self.instance.children.all() + def locations(self) -> QuerySet[Location]: + return Location.objects.filter(project__in=self.featured_projects()) + @cached_property def _is_structure(self): try: diff --git a/apps/projects/urls.py b/apps/projects/urls.py index 889fcc31..7852f827 100644 --- a/apps/projects/urls.py +++ b/apps/projects/urls.py @@ -1,7 +1,7 @@ from rest_framework.routers import DefaultRouter from apps.announcements.views import AnnouncementViewSet -from apps.commons.urls import project_router_register +from apps.commons.urls import organization_router_register, project_router_register from apps.feedbacks.views import ( CommentImagesView, CommentViewSet, @@ -12,6 +12,7 @@ from .views import ( BlogEntryImagesView, BlogEntryViewSet, + GeneralLocationView, GoalViewSet, HistoricalProjectViewSet, LinkedProjectViewSet, @@ -25,11 +26,13 @@ ProjectTabItemViewset, ProjectTabViewset, ProjectViewSet, - ReadLocationViewSet, ) router = DefaultRouter() -router.register(r"location", ReadLocationViewSet, basename="Read-location") + +organization_router_register( + router, r"location", GeneralLocationView, basename="General-location" +) router.register(r"project", ProjectViewSet, basename="Project") project_router_register( diff --git a/apps/projects/views.py b/apps/projects/views.py index 5e41ada8..71cf7472 100644 --- a/apps/projects/views.py +++ b/apps/projects/views.py @@ -17,7 +17,9 @@ from rest_framework.response import Response from simple_history.utils import update_change_reason +from apps.accounts.models import PeopleGroupLocation from apps.accounts.permissions import HasBasePermission +from apps.accounts.serializers import LocationPeopleGroupSerializer from apps.analytics.models import Stat from apps.commons.cache import clear_cache_with_key, redis_cache_view from apps.commons.permissions import IsOwner, ReadOnly @@ -43,7 +45,7 @@ ) from services.mistral.models import ProjectEmbedding -from .filters import LocationFilter, ProjectFilter +from .filters import ProjectFilter from .models import ( BlogEntry, Goal, @@ -612,11 +614,6 @@ def dispatch(self, request, *args, **kwargs): return super(LocationViewSet, self).dispatch(request, *args, **kwargs) -class ReadLocationViewSet(LocationViewSet): - http_method_names = ["get", "list"] - filterset_class = LocationFilter - - class HistoricalProjectViewSet(MultipleIDViewsetMixin, viewsets.ReadOnlyModelViewSet): lookup_field = "pk" permission_classes = [ReadOnly] @@ -1004,3 +1001,22 @@ def add_image_to_model(self, image, *args, **kwargs): tab_item.save() return f"/v1/project/{self.kwargs['project_id']}/tab/{self.kwargs['tab_id']}/item-image/{image.id}" return None + + +class GeneralLocationView(viewsets.GenericViewSet): + http_method_names = ["get", "list"] + + def list(self, request, *args, **kwargs): + qs_project = self.request.user.get_project_related_queryset( + Location.objects + ).select_related("project") + + qs_group = self.request.user.get_people_group_related_queryset( + PeopleGroupLocation.objects + ).select_related("people_group") + + data = { + "groups": LocationPeopleGroupSerializer(qs_group, many=True).data, + "projects": LocationSerializer(qs_project, many=True).data, + } + return Response(data, status=status.HTTP_200_OK) From c97fa2aad7264a1097199fd40f1177d99f5731ff Mon Sep 17 00:00:00 2001 From: rgermain Date: Tue, 3 Feb 2026 11:47:16 +0100 Subject: [PATCH 30/32] fix locations --- apps/accounts/serializers.py | 38 +++++++++++++++++++++++++++--------- apps/projects/models.py | 11 +++++------ 2 files changed, 34 insertions(+), 15 deletions(-) diff --git a/apps/accounts/serializers.py b/apps/accounts/serializers.py index cf47ba55..d2d34c27 100644 --- a/apps/accounts/serializers.py +++ b/apps/accounts/serializers.py @@ -237,10 +237,24 @@ def get_can_mentor_on(self, user: ProjectUser) -> list[dict]: class PeopleGroupLocationSerializer(BaseLocationSerializer): + class Meta(BaseLocationSerializer.Meta): model = PeopleGroupLocation +class PeopleGroupLocationRelated(serializers.RelatedField): + def get_queryset(self): + return PeopleGroupLocation.objects.all() + + def to_representation(self, instance: PeopleGroupLocation) -> dict: + return PeopleGroupLocationSerializer(instance=instance).data + + def to_internal_value(self, element: dict) -> PeopleGroupLocation: + if element.get("pk"): + return PeopleGroupLocation.objects.get(pk=element["pk"]) + return PeopleGroupLocation(**element) + + class PeopleGroupSuperLightSerializer( AutoTranslatedModelSerializer, serializers.ModelSerializer ): @@ -478,7 +492,7 @@ class PeopleGroupSerializer( child=serializers.IntegerField(min_value=1, max_value=17), required=False, ) - location = PeopleGroupLocationSerializer() + location = PeopleGroupLocationRelated(required=False, allow_null=True) def get_hierarchy(self, obj: PeopleGroup) -> list[dict[str, str | int]]: request = self.context.get("request") @@ -540,7 +554,11 @@ def validate_parent(self, value): def create(self, validated_data): team = validated_data.pop("team", {}) featured_projects = validated_data.pop("featured_projects", []) - tags = validated_data.pop("tags", []) + location = validated_data.pop("location", {}) + + if location: + location.save() + validated_data["id"] = location people_group = super(PeopleGroupSerializer, self).create(validated_data) PeopleGroupAddTeamMembersSerializer().create( @@ -549,23 +567,25 @@ def create(self, validated_data): PeopleGroupAddFeaturedProjectsSerializer().create( {"people_group": people_group, "featured_projects": featured_projects} ) - - people_group.tags.set(tags) return people_group def update(self, instance, validated_data): validated_data.pop("team", {}) validated_data.pop("featured_projects", []) - tags = validated_data.pop("tags", []) + location = validated_data.pop("location") + + if not location and getattr(instance, "location", None): + instance.location.delete() + validated_data["location"] = None + elif location: + location.save() + validated_data["location"] = location + people_group = super(PeopleGroupSerializer, self).update( instance, validated_data ) - people_group.tags.set(tags) return people_group - def save(self, **kwargs): - return super().save(**kwargs) - class Meta: model = PeopleGroup read_only_fields = ["is_root", "slug", "modules"] diff --git a/apps/projects/models.py b/apps/projects/models.py index 57a1a7f6..97805ab4 100644 --- a/apps/projects/models.py +++ b/apps/projects/models.py @@ -864,7 +864,6 @@ def duplicate(self, project: "Project") -> "Goal": class AbstractLocation( HasAutoTranslatedFields, - ProjectRelated, DuplicableModel, models.Model, ): @@ -910,10 +909,6 @@ class Meta: default=LocationType.TEAM, ) - def get_related_project(self) -> Optional["Project"]: - """Return the projects related to this model.""" - return self.project - def get_related_organizations(self) -> list["Organization"]: """Return the organizations related to this model.""" return self.project.get_related_organizations() @@ -924,7 +919,7 @@ def duplicate(self) -> Self: return copy -class Location(AbstractLocation): +class Location(ProjectRelated, AbstractLocation): """A project location on Earth. Attributes @@ -939,6 +934,10 @@ class Location(AbstractLocation): Project, on_delete=models.CASCADE, related_name="locations" ) + def get_related_project(self) -> Optional["Project"]: + """Return the projects related to this model.""" + return self.project + def duplicate(self, project: Project) -> "Location": copy = super().duplicate() copy.project = project From aa76158e9194001b6ca88642d7f1fc5b6d7870d5 Mon Sep 17 00:00:00 2001 From: rgermain Date: Tue, 3 Feb 2026 12:20:09 +0100 Subject: [PATCH 31/32] fix documents filters --- services/crisalid/views.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/services/crisalid/views.py b/services/crisalid/views.py index de0d0c41..63a60c5c 100644 --- a/services/crisalid/views.py +++ b/services/crisalid/views.py @@ -152,8 +152,11 @@ def get_analytics(self): # get counted all document_types types # use only here the filter_queryset, # the next years values need to have all document_types (non filtered) + document_types = Counter( - self.filter_queryset(qs, document_type_enabled=False) + Document.objects.filter( + id__in=self.filter_queryset(qs, document_type_enabled=False) + ) .order_by("document_type") .values_list("document_type", flat=True) ) From fb75930e555723d16ebd3afa711ce16d7c19068b Mon Sep 17 00:00:00 2001 From: rgermain Date: Wed, 4 Feb 2026 15:40:53 +0100 Subject: [PATCH 32/32] add model image people gorup --- ...05_alter_image_options_peoplegroupimage.py | 77 +++++++++++++++ apps/files/models.py | 99 +++++++++++++------ apps/files/serializers.py | 7 ++ apps/files/urls.py | 6 ++ apps/files/views.py | 19 +++- apps/modules/group.py | 3 + services/crisalid/models.py | 4 + 7 files changed, 183 insertions(+), 32 deletions(-) create mode 100644 apps/files/migrations/0005_alter_image_options_peoplegroupimage.py diff --git a/apps/files/migrations/0005_alter_image_options_peoplegroupimage.py b/apps/files/migrations/0005_alter_image_options_peoplegroupimage.py new file mode 100644 index 00000000..12ae550c --- /dev/null +++ b/apps/files/migrations/0005_alter_image_options_peoplegroupimage.py @@ -0,0 +1,77 @@ +# Generated by Django 5.2.10 on 2026-02-04 12:18 + +import apps.commons.mixins +import apps.files.models +import django.db.models.deletion +import stdimage.models +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("accounts", "0005_alter_peoplegroup_location"), + ("files", "0004_projectuserattachmentlink_projectuserattachmentfile_and_more"), + ] + + operations = [ + migrations.AlterModelOptions( + name="image", + options={"ordering": ("-created_at",)}, + ), + migrations.CreateModel( + name="PeopleGroupImage", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("name", models.CharField(blank=True, max_length=255, null=True)), + ( + "file", + stdimage.models.StdImageField( + force_min_size=False, + height_field="height", + upload_to=apps.files.models.dynamic_upload_to, + variations={ + "full": (1920, 10000), + "large": (1024, 10000), + "medium": (768, 10000), + "small": (500, 10000), + }, + width_field="width", + ), + ), + ("height", models.IntegerField(blank=True, null=True)), + ("width", models.IntegerField(blank=True, null=True)), + ("natural_ratio", models.FloatField(blank=True, null=True)), + ("scale_x", models.FloatField(blank=True, null=True)), + ("scale_y", models.FloatField(blank=True, null=True)), + ("left", models.FloatField(blank=True, null=True)), + ("top", models.FloatField(blank=True, null=True)), + ("created_at", models.DateTimeField(auto_now_add=True)), + ( + "people_group", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="images", + to="accounts.peoplegroup", + ), + ), + ], + options={ + "ordering": ("-created_at",), + "abstract": False, + }, + bases=( + models.Model, + apps.commons.mixins.DuplicableModel, + apps.commons.mixins.HasOwners, + ), + ), + ] diff --git a/apps/files/models.py b/apps/files/models.py index 42fa76f9..65c41d68 100644 --- a/apps/files/models.py +++ b/apps/files/models.py @@ -1,6 +1,7 @@ import datetime import uuid from contextlib import suppress +from copy import deepcopy from typing import TYPE_CHECKING, Any, Optional from azure.core.exceptions import ResourceNotFoundError @@ -16,6 +17,7 @@ from apps.commons.mixins import ( DuplicableModel, HasOwner, + HasOwners, OrganizationRelated, ProjectRelated, ) @@ -38,7 +40,7 @@ def dynamic_upload_to(instance: Model, filename: str): "argument should have a dynamic attribute `_upload_to` set before " "saving it for the first time." % instance.__class__.__name__ ) - upload_to = instance.__dict__.pop("_upload_to") + upload_to = instance.__dict__.pop("_upload_to", instance._upload_to) return upload_to(instance, filename) @@ -73,6 +75,11 @@ def user_attachment_directory_path( return f"users/attachments/{instance.owner.pk}/{instance.attachment_type}/{date_part}-{filename}" +def people_group_images_directory_path(instance: "PeopleGroupImage", filename: str): + date_part = f"{datetime.datetime.today():%Y-%m-%d}" + return f"peoplegroup/images/{instance.pk}/{date_part}-{filename}" + + class AttachmentLink( HasAutoTranslatedFields, DuplicableModel, @@ -238,10 +245,8 @@ def duplicate(self, project: "Project") -> Optional["AttachmentFile"]: return None -class Image( - models.Model, HasOwner, ProjectRelated, OrganizationRelated, DuplicableModel -): - name = models.CharField(max_length=255) +class BaseImage(models.Model, DuplicableModel): + name = models.CharField(max_length=255, null=True, blank=True) file = StdImageField( upload_to=dynamic_upload_to, height_field="height", @@ -263,6 +268,35 @@ class Image( left = models.FloatField(blank=True, null=True) top = models.FloatField(blank=True, null=True) created_at = models.DateTimeField(auto_now_add=True) + + class Meta: + ordering = ("-created_at",) + abstract = True + + def duplicate(self, upload_to: str = "") -> Optional["BaseImage"]: + with suppress(ResourceNotFoundError): + file_path = self.file.name.split("/") + file_name = file_path.pop() + file_extension = file_name.split(".")[-1] + if upload_to: + upload_to = f"{upload_to}{uuid.uuid4()}.{file_extension}" + else: + upload_to = "/".join([*file_path, f"{uuid.uuid4()}.{file_extension}"]) + new_file = SimpleUploadedFile( + name=upload_to, + content=self.file.read(), + content_type=f"image/{file_extension}", + ) + image = deepcopy(self) + image.file = new_file + image.id = None + image._upload_to = lambda instance, filename: upload_to + return image + return None + + +class Image(BaseImage, HasOwner, ProjectRelated, OrganizationRelated): + name = models.CharField(max_length=255) owner = models.ForeignKey( "accounts.ProjectUser", on_delete=models.CASCADE, @@ -403,32 +437,9 @@ def get_related_project(self) -> Optional["Project"]: def duplicate( self, owner: Optional["ProjectUser"] = None, upload_to: str = "" ) -> Optional["Image"]: - with suppress(ResourceNotFoundError): - file_path = self.file.name.split("/") - file_name = file_path.pop() - file_extension = file_name.split(".")[-1] - if upload_to: - upload_to = f"{upload_to}{uuid.uuid4()}.{file_extension}" - else: - upload_to = "/".join([*file_path, f"{uuid.uuid4()}.{file_extension}"]) - new_file = SimpleUploadedFile( - name=upload_to, - content=self.file.read(), - content_type=f"image/{file_extension}", - ) - image = Image( - name=self.name, - file=new_file, - height=self.height, - width=self.width, - natural_ratio=self.natural_ratio, - scale_x=self.scale_x, - scale_y=self.scale_y, - left=self.left, - top=self.top, - owner=owner or self.owner, - ) - image._upload_to = lambda instance, filename: upload_to + image = super().duplicate(upload_to) + if image: + image.owner = (owner or self.owner,) image.save() return image return None @@ -491,3 +502,29 @@ def get_owner(self): def is_owned_by(self, user: "ProjectUser") -> bool: return user == self.get_owner() + + +class PeopleGroupImage(BaseImage, HasOwners): + people_group = models.ForeignKey( + "accounts.PeopleGroup", + on_delete=models.CASCADE, + null=False, + related_name="images", + ) + + def _upload_to(self, instance, filename): + return people_group_images_directory_path(instance, filename) + + def is_owned_by(self, user: "ProjectUser") -> bool: + """Whether the given user is the owners of the group.""" + people_group = self.people_group + members = people_group.managers() | people_group.leaders() + + return members.contains(user) + + def get_owners(self): + """Get the owners of the group.""" + people_group = self.people_group + members = people_group.managers() | people_group.leaders() + + return list(members) diff --git a/apps/files/serializers.py b/apps/files/serializers.py index d8747b6e..996e67d1 100644 --- a/apps/files/serializers.py +++ b/apps/files/serializers.py @@ -34,6 +34,7 @@ AttachmentType, Image, OrganizationAttachmentFile, + PeopleGroupImage, ProjectUserAttachmentFile, ProjectUserAttachmentLink, ) @@ -463,3 +464,9 @@ def validate_file(self, file): if file.size > limit: raise FileTooLargeError return file + + +class PeopleGroupImageSerializer(ImageSerializer): + class Meta(ImageSerializer.Meta): + model = PeopleGroupImage + fields = (*ImageSerializer.Meta.fields, "people_group") diff --git a/apps/files/urls.py b/apps/files/urls.py index fa3b591a..3f7cb917 100644 --- a/apps/files/urls.py +++ b/apps/files/urls.py @@ -1,6 +1,7 @@ from rest_framework.routers import DefaultRouter from apps.commons.urls import ( + organization_people_group_router_register, organization_router_register, project_router_register, user_router_register, @@ -9,6 +10,7 @@ AttachmentFileViewSet, AttachmentLinkViewSet, OrganizationAttachmentFileViewSet, + PeopleGroupGalleryViewSet, ProjectUserAttachmentFileViewSet, ProjectUserAttachmentLinkViewSet, ) @@ -44,3 +46,7 @@ ProjectUserAttachmentLinkViewSet, basename="ProjectUserAttachmentLink", ) + +organization_people_group_router_register( + router, r"gallery", PeopleGroupGalleryViewSet, basename="PeopleGroupGallery" +) diff --git a/apps/files/views.py b/apps/files/views.py index d0b39a9f..19937df3 100644 --- a/apps/files/views.py +++ b/apps/files/views.py @@ -17,11 +17,12 @@ from apps.accounts.permissions import HasBasePermission from apps.commons.permissions import IsOwner, ReadOnly, WillBeOwner from apps.commons.utils import map_action_to_permission -from apps.commons.views import MultipleIDViewsetMixin +from apps.commons.views import MultipleIDViewsetMixin, NestedPeopleGroupViewMixins from apps.organizations.models import Organization from apps.organizations.permissions import HasOrganizationPermission from apps.projects.models import Project from apps.projects.permissions import HasProjectPermission, ProjectIsNotLocked +from lib.views import NestedOrganizationViewMixins from .exceptions import ProtectedImageError from .models import ( @@ -37,6 +38,7 @@ AttachmentLinkSerializer, ImageSerializer, OrganizationAttachmentFileSerializer, + PeopleGroupImageSerializer, ProjectUserAttachmentFileSerializer, ProjectUserAttachmentLinkSerializer, ) @@ -276,3 +278,18 @@ def get_queryset(self) -> QuerySet: def create(self, request, *ar, **kw): request.data["owner"] = int(self.kwargs["user_id"]) return super().create(request, *ar, **kw) + + +class PeopleGroupGalleryViewSet( + NestedOrganizationViewMixins, NestedPeopleGroupViewMixins, viewsets.ModelViewSet +): + serializer_class = PeopleGroupImageSerializer + + def get_queryset(self): + modules_manager = self.people_group.get_related_module() + modules = modules_manager(self.people_group, self.request.user) + return modules.gallery() + + def create(self, request, *ar, **kw): + request.data["people_group"] = self.people_group.id + return super().create(request, *ar, **kw) diff --git a/apps/modules/group.py b/apps/modules/group.py index bcac76f0..6d99fadf 100644 --- a/apps/modules/group.py +++ b/apps/modules/group.py @@ -70,6 +70,9 @@ def subgroups(self) -> QuerySet[PeopleGroup]: def locations(self) -> QuerySet[Location]: return Location.objects.filter(project__in=self.featured_projects()) + def gallery(self): + return self.instance.images.all() + @cached_property def _is_structure(self): try: diff --git a/services/crisalid/models.py b/services/crisalid/models.py index 681ff17b..f3e9b8e9 100644 --- a/services/crisalid/models.py +++ b/services/crisalid/models.py @@ -208,6 +208,10 @@ class DocumentType(models.TextChoices): organization_query_string = "contributors__user__groups__organizations" + class Meta: + # order by publicattion date, and put "null date" at last + ordering = (models.F("publication_date").desc(nulls_last=True),) + def get_related_organizations(self): """organizations from user""" return list(