From ad2dd5f89f5bc816b7c452f8704ed87b4b466def Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o?= Date: Tue, 31 Mar 2026 15:32:44 +0200 Subject: [PATCH 1/4] Bump version to 1.4.2 in pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index e164057..ca935a3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "welearn-database" -version = "1.4.0" +version = "1.4.2" description = "All stuff related to relationnal database from the WeLearn project" authors = [ {name = "Théo",email = "theo.nardin@cri-paris.org"} From a2ded4b38d399c706bf66b0cd1e4c5c514c54cef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o?= Date: Tue, 31 Mar 2026 16:03:27 +0200 Subject: [PATCH 2/4] Add new fields to Corpus model for embeddings and categorization --- welearn_database/data/models/corpus_related.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/welearn_database/data/models/corpus_related.py b/welearn_database/data/models/corpus_related.py index d4d1ca8..84a13de 100644 --- a/welearn_database/data/models/corpus_related.py +++ b/welearn_database/data/models/corpus_related.py @@ -107,8 +107,12 @@ class CorpusNameEmbeddingModelLang(Base): __table_args__ = {"schema": schema_name} __read_only__ = True source_name: Mapped[str] = mapped_column(primary_key=True) + corpus_id: Mapped[UUID] + embedding_model_id: Mapped[UUID] title: Mapped[str] lang: Mapped[str] + used_since: Mapped[datetime] + category_id: Mapped[UUID] class CorpusEmbeddingModel(Base): From d116bec8e0f3815c344fb8d54e3356e1950334af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o?= Date: Tue, 31 Mar 2026 16:40:17 +0200 Subject: [PATCH 3/4] Modify corpus_name_embedding_model_lang view to enhance data retrieval logic --- ...odify_corpus_name_embedding_model_lang_.py | 93 +++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 welearn_database/alembic/versions/b049924f7067_modify_corpus_name_embedding_model_lang_.py diff --git a/welearn_database/alembic/versions/b049924f7067_modify_corpus_name_embedding_model_lang_.py b/welearn_database/alembic/versions/b049924f7067_modify_corpus_name_embedding_model_lang_.py new file mode 100644 index 0000000..c254048 --- /dev/null +++ b/welearn_database/alembic/versions/b049924f7067_modify_corpus_name_embedding_model_lang_.py @@ -0,0 +1,93 @@ +"""modify corpus_name_embedding_model_lang view + +Revision ID: b049924f7067 +Revises: f8602200fa99 +Create Date: 2026-03-31 16:09:12.085443 + +""" + +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "b049924f7067" +down_revision: Union[str, None] = "f8602200fa99" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.execute( + """ + DROP MATERIALIZED VIEW corpus_related.corpus_name_embedding_model_lang; + """ + ) + op.execute( + """ + +CREATE MATERIALIZED VIEW corpus_related.corpus_name_embedding_model_lang +TABLESPACE pg_default +AS WITH ranked AS ( +SELECT + c.source_name, + cem.corpus_id, + cem.embedding_model_id, + em.title, + em.lang, + cem.used_since, + c.category_id, + ROW_NUMBER() OVER ( + PARTITION BY cem.corpus_id, + em.lang +ORDER BY + cem.used_since DESC + ) AS rn +FROM + corpus_related.corpus_embedding_model cem +JOIN corpus_related.corpus c ON + c.id = cem.corpus_id +JOIN corpus_related.embedding_model em ON + em.id = cem.embedding_model_id +WHERE + c.is_active +) + SELECT + source_name, + corpus_id, + embedding_model_id, + title, + lang, + used_since, + category_id + FROM + ranked + WHERE + rn = 1 + +WITH DATA; + """ + ) + + +def downgrade() -> None: + op.execute( + """ + DROP MATERIALIZED VIEW corpus_related.corpus_name_embedding_model_lang; + """ + ) + op.execute( + """ + CREATE MATERIALIZED VIEW corpus_related.corpus_name_embedding_model_lang + TABLESPACE pg_default + AS SELECT corpus.source_name, + embedding_model.title, + embedding_model.lang + FROM corpus_related.corpus + JOIN corpus_related.corpus_embedding_model ON corpus_embedding_model.corpus_id = corpus.id + JOIN corpus_related.embedding_model ON embedding_model.id = corpus_embedding_model.embedding_model_id + WHERE corpus.is_active + WITH DATA; + """ + ) From c30a8b5e7eb2bdf3b7bf3f3c23875a9c9a61565c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o?= <133012334+lpi-tn@users.noreply.github.com> Date: Wed, 29 Apr 2026 16:08:15 +0200 Subject: [PATCH 4/4] Update welearn_database/data/models/corpus_related.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- welearn_database/data/models/corpus_related.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/welearn_database/data/models/corpus_related.py b/welearn_database/data/models/corpus_related.py index 84a13de..a54e0b3 100644 --- a/welearn_database/data/models/corpus_related.py +++ b/welearn_database/data/models/corpus_related.py @@ -106,11 +106,11 @@ class CorpusNameEmbeddingModelLang(Base): __tablename__ = "corpus_name_embedding_model_lang" __table_args__ = {"schema": schema_name} __read_only__ = True - source_name: Mapped[str] = mapped_column(primary_key=True) - corpus_id: Mapped[UUID] + source_name: Mapped[str] = mapped_column() + corpus_id: Mapped[UUID] = mapped_column(primary_key=True) embedding_model_id: Mapped[UUID] title: Mapped[str] - lang: Mapped[str] + lang: Mapped[str] = mapped_column(primary_key=True) used_since: Mapped[datetime] category_id: Mapped[UUID]