From 449f41ddcb263651b5eceb0cabded3aee508cb59 Mon Sep 17 00:00:00 2001 From: Pal Kerecsenyi Date: Thu, 25 Sep 2025 11:09:50 +0100 Subject: [PATCH 01/12] WIP: feat(vcs): new data model * Updated the data model to accommodate the new generic approach to VCS integration. This involves renaming the `github_...` tables to `vcs_...`, adding a new column to the relevant tables to identify which provider the records relate to, and more. * Added an Alembic migration, including moving the repository data from `oauthclient_remoteaccount` to the `vcs_repositories` table, which is a complex and long-running operation. This will be supplemented by a manual migration guide for instances like Zenodo where a several-minute full DB lock is not feasible. The difference between whether to use the automated migration or the manual one will be clarified in the docs. * Added a repo-user m-to-m mapping table. By not storing repos in the Remote Accounts table, we need a different way of associating users with the repos they have access to. This table is synced using code that will be included in other PRs. * This PR contains only the data model changes themselves and not the associated functional changes needed to do anything useful. * This commit on its own is UNRELEASABLE. We will merge multiple commits related to the VCS upgrade into the `vcs-staging` branch and then merge them all into `master` once we have a fully release-ready prototype. At that point, we will create a squash commit. --- ...54318294_switch_to_generic_git_services.py | 331 +++++++++++++++++ invenio_vcs/models.py | 347 ++++++++++++++++++ 2 files changed, 678 insertions(+) create mode 100644 invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py create mode 100644 invenio_vcs/models.py diff --git a/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py b/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py new file mode 100644 index 00000000..9a58c6dc --- /dev/null +++ b/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py @@ -0,0 +1,331 @@ +# +# This file is part of Invenio. +# Copyright (C) 2025 CERN. +# +# Invenio is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. + +"""Switch to generic git services""" + +import uuid +from datetime import datetime, timezone + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.ext.mutable import MutableDict +from sqlalchemy_utils import JSONType, UUIDType + +# revision identifiers, used by Alembic. +revision = "1754318294" +down_revision = "b0eaee37b545" +# You cannot rename an Alembic branch. So we will have to keep +# the branch label `invenio-github` despite changing the module +# to `invenio-vcs`. +branch_labels = () +depends_on = None + + +def upgrade(): + """Upgrade database.""" + op.rename_table("github_repositories", "vcs_repositories") + op.alter_column( + "vcs_repositories", + "github_id", + new_column_name="provider_id", + type_=sa.String(length=255), + nullable=False, + existing_type=sa.Integer(), + existing_nullable=True, + ) + op.alter_column( + "vcs_repositories", + "hook", + type_=sa.String(length=255), + nullable=True, + existing_type=sa.Integer(), + existing_nullable=True, + ) + op.add_column( + "vcs_repositories", + # We use the provider name "github" by default as this is what we're already using across the codebase + sa.Column("provider", sa.String(255), nullable=False, server_default="github"), + ) + op.add_column( + "vcs_repositories", + sa.Column( + "default_branch", sa.String(255), nullable=False, server_default="master" + ), + ) + op.add_column( + "vcs_repositories", sa.Column("description", sa.String(10000), nullable=True) + ) + op.add_column( + # Nullable for now (see below) + "vcs_repositories", + sa.Column("html_url", sa.String(10000), nullable=True), + ) + op.add_column( + "vcs_repositories", sa.Column("license_spdx", sa.String(255), nullable=True) + ) + op.alter_column("vcs_repositories", "user_id", new_column_name="enabled_by_id") + op.drop_index("ix_github_repositories_name") + op.drop_index("ix_github_repositories_github_id") + + # Because they rely on the `provider` column, these are automatically + # deleted when downgrading so we don't need a separate drop command + # for them. + op.create_unique_constraint( + constraint_name=op.f("uq_vcs_repositories_provider_provider_id"), + table_name="vcs_repositories", + columns=["provider", "provider_id"], + ) + op.create_unique_constraint( + constraint_name=op.f("uq_vcs_repositories_provider_name"), + table_name="vcs_repositories", + columns=["provider", "name"], + ) + + # Migrate data from the OAuth remote `extra_data` field to the repositories table + # where we will now store everything directly. + # + # We need to recreate the SQLAlchemy models for `RemoteAccount` and `Repository` here but + # in a much more lightweight way. We cannot simply import the models because (a) they depend + # on the full Invenio app being initialised and all extensions available and (b) we need + # to work with the models as they stand precisely at this point in the migration chain + # rather than the model file itself which may be at a later commit. + # + # We only include here the columns, constraints, and relations that we actually need to + # perform the migration, therefore keeping these models as lightweight as possible. + remote_account_table = sa.table( + "oauthclient_remoteaccount", + sa.Column("id", sa.Integer, primary_key=True), + sa.Column("user_id", sa.Integer, sa.ForeignKey("account_user.id")), + sa.Column("client_id", sa.String(255)), + sa.Column("extra_data", MutableDict.as_mutable(JSONType)), + ) + vcs_repositories_table = sa.table( + "vcs_repositories", + sa.Column("id", UUIDType, primary_key=True), + sa.Column("provider_id", sa.String(255), nullable=True), + sa.Column("provider", sa.String(255), nullable=True), + sa.Column("description", sa.String(10000), nullable=True), + sa.Column("html_url", sa.String(10000), nullable=False), + sa.Column("license_spdx", sa.String(255), nullable=True), + sa.Column("default_branch", sa.String(255), nullable=False), + sa.Column("name", sa.String(255), nullable=False), + sa.Column("hook", sa.String(255), nullable=True), + sa.Column( + "enabled_by_id", sa.Integer, sa.ForeignKey("account_user.id"), nullable=True + ), + sa.Column("created", sa.DateTime, nullable=False), + sa.Column("updated", sa.DateTime, nullable=False), + ) + + # This is the recommended way to run SQLAlchemy operations in a migration, see https://alembic.sqlalchemy.org/en/latest/ops.html#alembic.operations.Operations.execute + session = op.get_bind() + + # We don't know the client ID as this is a config variable. + # So to find the RemoteAccounts that correspond to GitHub, we need to check for the existence + # of the `repos` key in the `extra_data` JSON. We cannot make this very efficient sadly, because + # (a) in Postgres we are using JSON not JSONB so there is no efficient JSON querying and (b) the + # instance might be using MySQL/SQLite where we store it as `TEXT`. + + remote_accounts = session.execute(sa.select(remote_account_table)) + for remote_account in remote_accounts.mappings(): + if "repos" not in remote_account["extra_data"]: + continue + + repos = remote_account["extra_data"]["repos"] + + for id, github_repo in repos.items(): + # `id` (the dict key) is a string because JSON keys must be strings + + matching_db_repo_id = session.scalar( + sa.select(vcs_repositories_table).filter_by(provider_id=id) + ) + + if matching_db_repo_id is None: + # We are now storing _all_ repositories (even non-enabled ones) in the DB. + # The repo-user association will be created on the first sync after this migration, we need to download + # the list of users with access to the repo from the GitHub API. + session.execute( + vcs_repositories_table.insert().values( + id=uuid.uuid4(), + provider_id=id, + provider="github", + description=github_repo["description"], + name=github_repo["full_name"], + default_branch=github_repo["default_branch"], + # So far we have only supported github.com so we can safely assume the URL + html_url=f'https://github.com/{github_repo["full_name"]}', + # We have never stored this, it is queried at runtime right now. When the first + # sync happens after this migration, we will download all the license IDs from the VCS. + license_spdx=None, + # This repo wasn't enabled + hook=None, + enabled_by_id=None, + created=datetime.now(tz=timezone.utc), + updated=datetime.now(tz=timezone.utc), + ) + ) + else: + session.execute( + vcs_repositories_table.update() + .filter_by(id=matching_db_repo_id) + .values( + description=github_repo["description"], + name=github_repo["full_name"], + default_branch=github_repo["default_branch"], + html_url=f'https://github.com/{github_repo["full_name"]}', + updated=datetime.now(tz=timezone.utc), + ) + ) + + # Remove `repos` from the existing `extra_data`, leaving only the last sync timestamp + session.execute( + remote_account_table.update() + .filter_by(id=remote_account["id"]) + .values(extra_data={"last_sync": remote_account["extra_data"]["last_sync"]}) + ) + + # We initially set this to nullable=True so we can create the column without an error + # (it would be null for existing records) but after the SQLAlchemy operations above we + # have populated it so we can mark it non-nullable. + op.alter_column( + "vcs_repositories", "html_url", nullable=False, existing_nullable=True + ) + + op.rename_table("github_releases", "vcs_releases") + op.alter_column( + "vcs_releases", + "release_id", + new_column_name="provider_id", + type_=sa.String(length=255), + nullable=False, + existing_type=sa.Integer(), + existing_nullable=True, + ) + op.add_column( + "vcs_releases", + sa.Column("provider", sa.String(255), nullable=False, server_default="github"), + ) + if op.get_context().dialect.name == "postgresql": + op.alter_column( + "vcs_releases", + "errors", + type_=sa.dialects.postgresql.JSONB, + postgresql_using="errors::text::jsonb", + ) + + op.drop_constraint( + op.f("uq_github_releases_release_id"), table_name="vcs_releases", type_="unique" + ) + # A given provider cannot have duplicate repository IDs. + # These constraints are also inherently deleted when the `provider` column is dropped + op.create_unique_constraint( + constraint_name=op.f("uq_vcs_releases_provider_id_provider"), + table_name="vcs_releases", + columns=["provider_id", "provider"], + ) + # A specific repository from a given provider cannot have multiple releases of the same tag + op.create_unique_constraint( + constraint_name=op.f("uq_vcs_releases_provider_id_provider_tag"), + table_name="vcs_releases", + columns=["provider_id", "provider", "tag"], + ) + + op.create_table( + "vcs_repository_users", + sa.Column("repository_id", UUIDType(), primary_key=True), + sa.Column("user_id", sa.Integer(), primary_key=True), + sa.ForeignKeyConstraint( + ["repository_id"], + ["vcs_repositories.id"], + name=op.f("fk_vcs_repository_users_repository_id_vcs_repositories"), + ), + sa.ForeignKeyConstraint( + ["user_id"], + ["accounts_user.id"], + name=op.f("fk_vcs_repository_users_user_id_accounts_user"), + ), + ) + # ### end Alembic commands ### + + +def downgrade(): + """Downgrade database.""" + + # Currently, the downgrade can only be peformed **without data**. The tables are transformed but + # data will not be successfully migrated. The upgrade migration has a large amount of custom logic + # for migrating the data into the new format, and this is not replicated/reversed for downgrading. + + op.alter_column( + "vcs_repositories", + "enabled_by_id", + new_column_name="user_id", + ) + op.drop_table("vcs_repository_users") + + op.rename_table("vcs_repositories", "github_repositories") + op.alter_column( + "github_repositories", + "provider_id", + new_column_name="github_id", + type_=sa.Integer(), + nullable=True, + existing_type=sa.String(length=255), + existing_nullable=False, + postgresql_using="provider_id::integer", + ) + op.alter_column( + "github_repositories", + "hook", + type_=sa.Integer(), + nullable=True, + existing_type=sa.String(length=255), + existing_nullable=True, + postgresql_using="hook::integer", + ) + op.drop_column("github_repositories", "provider") + op.drop_column("github_repositories", "description") + op.drop_column("github_repositories", "html_url") + op.drop_column("github_repositories", "license_spdx") + op.drop_column("github_repositories", "default_branch") + op.create_index( + op.f("ix_github_repositories_github_id"), + "github_repositories", + ["github_id"], + unique=True, + ) + op.create_index( + op.f("ix_github_repositories_name"), + "github_repositories", + ["name"], + unique=True, + ) + + op.rename_table("vcs_releases", "github_releases") + op.alter_column( + "github_releases", + "provider_id", + new_column_name="release_id", + type_=sa.Integer(), + nullable=True, + existing_type=sa.String(length=255), + existing_nullable=False, + postgresql_using="provider_id::integer", + ) + op.drop_column("github_releases", "provider") + if op.get_context().dialect.name == "postgresql": + op.alter_column( + "github_releases", + "errors", + type_=sa.dialects.postgresql.JSON, + postgresql_using="errors::text::json", + ) + op.create_unique_constraint( + op.f("uq_github_releases_release_id"), + table_name="github_releases", + columns=["release_id"], + ) + # ### end Alembic commands ### diff --git a/invenio_vcs/models.py b/invenio_vcs/models.py new file mode 100644 index 00000000..c45f1c89 --- /dev/null +++ b/invenio_vcs/models.py @@ -0,0 +1,347 @@ +# -*- coding: utf-8 -*- +# +# This file is part of Invenio. +# Copyright (C) 2023 CERN. +# +# Invenio is free software; you can redistribute it +# and/or modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# Invenio is distributed in the hope that it will be +# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Invenio; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307, USA. +# +# In applying this license, CERN does not +# waive the privileges and immunities granted to it by virtue of its status +# as an Intergovernmental Organization or submit itself to any jurisdiction. + +"""Models for GitHub integration.""" + +import uuid +from enum import Enum + +from invenio_accounts.models import User +from invenio_db import db +from invenio_i18n import lazy_gettext as _ +from invenio_webhooks.models import Event +from sqlalchemy import UniqueConstraint +from sqlalchemy.dialects import postgresql +from sqlalchemy_utils.models import Timestamp +from sqlalchemy_utils.types import ChoiceType, JSONType, UUIDType + +RELEASE_STATUS_TITLES = { + "RECEIVED": _("Received"), + "PROCESSING": _("Processing"), + "PUBLISHED": _("Published"), + "FAILED": _("Failed"), + "DELETED": _("Deleted"), +} + +RELEASE_STATUS_ICON = { + "RECEIVED": "spinner loading icon", + "PROCESSING": "spinner loading icon", + "PUBLISHED": "check icon", + "FAILED": "times icon", + "DELETED": "times icon", +} + +RELEASE_STATUS_COLOR = { + "RECEIVED": "warning", + "PROCESSING": "warning", + "PUBLISHED": "positive", + "FAILED": "negative", + "DELETED": "negative", +} + + +class ReleaseStatus(Enum): + """Constants for possible status of a Release.""" + + __order__ = "RECEIVED PROCESSING PUBLISHED FAILED DELETED" + + RECEIVED = "R" + """Release has been received and is pending processing.""" + + PROCESSING = "P" + """Release is still being processed.""" + + PUBLISHED = "D" + """Release was successfully processed and published.""" + + FAILED = "F" + """Release processing has failed.""" + + DELETED = "E" + """Release has been deleted.""" + + def __init__(self, value): + """Hack.""" + + def __eq__(self, other): + """Equality test.""" + return self.value == other + + def __str__(self): + """Return its value.""" + return self.value + + @property + def title(self): + """Return human readable title.""" + return RELEASE_STATUS_TITLES[self.name] + + @property + def icon(self): + """Font Awesome status icon.""" + return RELEASE_STATUS_ICON[self.name] + + @property + def color(self): + """UI status color.""" + return RELEASE_STATUS_COLOR[self.name] + + +repository_user_association = db.Table( + "vcs_repository_users", + db.Model.metadata, + db.Column( + "repository_id", + UUIDType, + db.ForeignKey("vcs_repositories.id"), + primary_key=True, + ), + db.Column( + "user_id", db.Integer, db.ForeignKey("accounts_user.id"), primary_key=True + ), +) + + +class Repository(db.Model, Timestamp): + """Information about a GitHub repository.""" + + __tablename__ = "vcs_repositories" + + __table_args__ = ( + UniqueConstraint( + "provider", + "name", + name="uq_vcs_repositories_provider_name", + ), + UniqueConstraint( + "provider", + "provider_id", + name="uq_vcs_repositories_provider_provider_id", + ), + # Index("ix_vcs_repositories_provider_provider_id", "provider", "provider_id"), + ) + + id = db.Column( + UUIDType, + primary_key=True, + default=uuid.uuid4, + ) + """Repository identifier.""" + + provider_id = db.Column( + db.String(255), + nullable=False, + ) + """Unique GitHub identifier for a repository. + + .. note:: + + Past implementations of GitHub for Invenio, used the repository name + (eg. 'inveniosoftware/invenio-github') in order to track repositories. + This however leads to problems, since repository names can change and + thus render the stored repository name useless. In order to tackle this + issue, the `github_id` should be used to track repositories, which is a + unique identifier that GitHub uses for each repository and doesn't + change on renames/transfers. + + In order to be able to keep deleted repositories with releases that + have been published, it is possible to keep an entry without a + `github_id`, that only has a `name`. + """ + + provider = db.Column(db.String(255), nullable=False) + """Which VCS provider the repository is hosted by (and therefore the context in which to consider the provider_id)""" + + description = db.Column(db.String(10000), nullable=True) + html_url = db.Column(db.String(10000), nullable=False) + license_spdx = db.Column(db.String(255), nullable=True) + default_branch = db.Column(db.String(255), nullable=False) + + full_name = db.Column("name", db.String(255), nullable=False) + """Fully qualified name of the repository including user/organization.""" + + hook = db.Column(db.String(255), nullable=True) + """Hook identifier.""" + + enabled_by_id = db.Column(db.Integer, db.ForeignKey(User.id), nullable=True) + + # + # Relationships + # + users = db.relationship(User, secondary=repository_user_association) + enabled_by_user = db.relationship(User, foreign_keys=[enabled_by_id]) + + @classmethod + def create( + cls, + provider, + provider_id, + html_url, + default_branch, + full_name=None, + description=None, + license_spdx=None, + **kwargs, + ): + """Create the repository.""" + obj = cls( + provider=provider, + provider_id=provider_id, + full_name=full_name, + html_url=html_url, + default_branch=default_branch, + description=description, + license_spdx=license_spdx, + **kwargs, + ) + db.session.add(obj) + return obj + + def add_user(self, user_id: int): + user = User(id=user_id) + user = db.session.merge(user) + self.users.append(user) + + def remove_user(self, user_id: int): + user = User(id=user_id) + user = db.session.merge(user) + self.users.remove(user) + + @classmethod + def get(cls, provider, provider_id=None, full_name=None): + """Return a repository given its name or github id. + + :param integer github_id: GitHub repository identifier. + :param str name: GitHub repository full name. + :returns: The repository object. + :raises: :py:exc:`~sqlalchemy.orm.exc.NoResultFound`: if the repository + doesn't exist. + :raises: :py:exc:`~sqlalchemy.orm.exc.MultipleResultsFound`: if + multiple repositories with the specified GitHub id and/or name + exist. + """ + repo = None + if provider_id: + repo = cls.query.filter( + Repository.provider_id == provider_id, Repository.provider == provider + ).one_or_none() + if not repo and full_name is not None: + repo = cls.query.filter( + Repository.full_name == full_name, Repository.provider == provider + ).one_or_none() + + return repo + + @property + def enabled(self): + """Return if the repository has webhooks enabled.""" + return bool(self.hook) + + def latest_release(self, status=None): + """Chronologically latest published release of the repository.""" + # Bail out fast if object (Repository) not in DB session. + if self not in db.session: + return None + + q = self.releases if status is None else self.releases.filter_by(status=status) + return q.order_by(db.desc(Release.created)).first() + + def __repr__(self): + """Get repository representation.""" + return "".format(self=self) + + +class Release(db.Model, Timestamp): + """Information about a GitHub release.""" + + __tablename__ = "vcs_releases" + + __table_args__ = ( + UniqueConstraint( + "provider", + "provider_id", + name="uq_vcs_releases_provider_id_provider", + ), + UniqueConstraint( + "provider_id", + "provider", + "tag", + name="uq_vcs_releases_provider_id_provider_tag", + ), + ) + + id = db.Column( + UUIDType, + primary_key=True, + default=uuid.uuid4, + ) + """Release identifier.""" + + provider_id = db.Column(db.String(255), nullable=True) + """Unique GitHub release identifier.""" + + provider = db.Column(db.String(255), nullable=False) + """Which VCS provider the release is hosted by (and therefore the context in which to consider the provider_id)""" + + tag = db.Column(db.String(255)) + """Release tag.""" + + errors = db.Column( + JSONType().with_variant( + # TODO postgresql specific. Limits the usage of the DB engine. + postgresql.JSON(none_as_null=True), + "postgresql", + ), + nullable=True, + ) + """Release processing errors.""" + + repository_id = db.Column(UUIDType, db.ForeignKey(Repository.id)) + """Repository identifier.""" + + event_id = db.Column(UUIDType, db.ForeignKey(Event.id), nullable=True) + """Incoming webhook event identifier.""" + + record_id = db.Column( + UUIDType, + index=True, + nullable=True, + ) + """Weak reference to a record identifier.""" + + status = db.Column( + ChoiceType(ReleaseStatus, impl=db.CHAR(1)), + nullable=False, + ) + """Status of the release, e.g. 'processing', 'published', 'failed', etc.""" + + repository = db.relationship( + Repository, backref=db.backref("releases", lazy="dynamic") + ) + + event = db.relationship(Event) + + def __repr__(self): + """Get release representation.""" + return f"" From 66c42c0ca41fd78f9350553bc322d0161a866002 Mon Sep 17 00:00:00 2001 From: Pal Kerecsenyi Date: Thu, 9 Oct 2025 10:56:38 +0200 Subject: [PATCH 02/12] chore: pydoc --- ...54318294_switch_to_generic_git_services.py | 14 +++++----- invenio_vcs/models.py | 26 +++++++++++-------- 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py b/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py index 9a58c6dc..6f22fc23 100644 --- a/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py +++ b/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py @@ -1,11 +1,11 @@ -# +# -*- coding: utf-8 -*- # This file is part of Invenio. # Copyright (C) 2025 CERN. # # Invenio is free software; you can redistribute it and/or modify it # under the terms of the MIT License; see LICENSE file for more details. -"""Switch to generic git services""" +"""Switch to a generic VCS module (not GitHub-specific).""" import uuid from datetime import datetime, timezone @@ -253,12 +253,12 @@ def upgrade(): def downgrade(): - """Downgrade database.""" - - # Currently, the downgrade can only be peformed **without data**. The tables are transformed but - # data will not be successfully migrated. The upgrade migration has a large amount of custom logic - # for migrating the data into the new format, and this is not replicated/reversed for downgrading. + """Downgrade database. + Currently, the downgrade can only be peformed **without data**. The tables are transformed but + data will not be successfully migrated. The upgrade migration has a large amount of custom logic + for migrating the data into the new format, and this is not replicated/reversed for downgrading. + """ op.alter_column( "vcs_repositories", "enabled_by_id", diff --git a/invenio_vcs/models.py b/invenio_vcs/models.py index c45f1c89..70a368c8 100644 --- a/invenio_vcs/models.py +++ b/invenio_vcs/models.py @@ -22,7 +22,7 @@ # waive the privileges and immunities granted to it by virtue of its status # as an Intergovernmental Organization or submit itself to any jurisdiction. -"""Models for GitHub integration.""" +"""Models for the VCS integration.""" import uuid from enum import Enum @@ -124,7 +124,7 @@ def color(self): class Repository(db.Model, Timestamp): - """Information about a GitHub repository.""" + """Information about a vcs repository.""" __tablename__ = "vcs_repositories" @@ -153,7 +153,7 @@ class Repository(db.Model, Timestamp): db.String(255), nullable=False, ) - """Unique GitHub identifier for a repository. + """Unique VCS provider identifier for a repository. .. note:: @@ -161,13 +161,14 @@ class Repository(db.Model, Timestamp): (eg. 'inveniosoftware/invenio-github') in order to track repositories. This however leads to problems, since repository names can change and thus render the stored repository name useless. In order to tackle this - issue, the `github_id` should be used to track repositories, which is a + issue, the `provider_id` should be used to track repositories, which is a unique identifier that GitHub uses for each repository and doesn't change on renames/transfers. In order to be able to keep deleted repositories with releases that have been published, it is possible to keep an entry without a - `github_id`, that only has a `name`. + `provider_id`, that only has a `name`. This only applies to the default + `github` provider on migrated pre-VCS instances. """ provider = db.Column(db.String(255), nullable=False) @@ -219,26 +220,29 @@ def create( return obj def add_user(self, user_id: int): + """Add permission for a user to access the repository.""" user = User(id=user_id) user = db.session.merge(user) self.users.append(user) def remove_user(self, user_id: int): + """Remove permission for a user to access the repository.""" user = User(id=user_id) user = db.session.merge(user) self.users.remove(user) @classmethod def get(cls, provider, provider_id=None, full_name=None): - """Return a repository given its name or github id. + """Return a repository given its name or provider id. - :param integer github_id: GitHub repository identifier. - :param str name: GitHub repository full name. + :param str provider: Registered ID of the VCS provider. + :param str provider_id: VCS provider repository identifier. + :param str name: Repository full name. :returns: The repository object. :raises: :py:exc:`~sqlalchemy.orm.exc.NoResultFound`: if the repository doesn't exist. :raises: :py:exc:`~sqlalchemy.orm.exc.MultipleResultsFound`: if - multiple repositories with the specified GitHub id and/or name + multiple repositories with the specified provider id and/or name exist. """ repo = None @@ -273,7 +277,7 @@ def __repr__(self): class Release(db.Model, Timestamp): - """Information about a GitHub release.""" + """Information about a VCS release.""" __tablename__ = "vcs_releases" @@ -299,7 +303,7 @@ class Release(db.Model, Timestamp): """Release identifier.""" provider_id = db.Column(db.String(255), nullable=True) - """Unique GitHub release identifier.""" + """Unique VCS provider release identifier.""" provider = db.Column(db.String(255), nullable=False) """Which VCS provider the release is hosted by (and therefore the context in which to consider the provider_id)""" From bf91a219e304565788762222b926fe36628f6a9a Mon Sep 17 00:00:00 2001 From: Pal Kerecsenyi Date: Wed, 15 Oct 2025 15:08:30 +0200 Subject: [PATCH 03/12] WIP: models: JSONB for errors column --- invenio_vcs/models.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/invenio_vcs/models.py b/invenio_vcs/models.py index 70a368c8..f1cca9f0 100644 --- a/invenio_vcs/models.py +++ b/invenio_vcs/models.py @@ -33,6 +33,7 @@ from invenio_webhooks.models import Event from sqlalchemy import UniqueConstraint from sqlalchemy.dialects import postgresql +from sqlalchemy.ext.mutable import MutableDict from sqlalchemy_utils.models import Timestamp from sqlalchemy_utils.types import ChoiceType, JSONType, UUIDType @@ -312,10 +313,11 @@ class Release(db.Model, Timestamp): """Release tag.""" errors = db.Column( - JSONType().with_variant( - # TODO postgresql specific. Limits the usage of the DB engine. - postgresql.JSON(none_as_null=True), - "postgresql", + MutableDict.as_mutable( + db.JSON() + .with_variant(postgresql.JSONB(), "postgresql") + .with_variant(JSONType(), "sqlite") + .with_variant(JSONType(), "mysql") ), nullable=True, ) From 24cfce3a690f639ee052ee280244c89b58e28b96 Mon Sep 17 00:00:00 2001 From: Pal Kerecsenyi Date: Wed, 15 Oct 2025 15:33:51 +0200 Subject: [PATCH 04/12] WIP: chore: license --- invenio_vcs/models.py | 23 +++-------------------- 1 file changed, 3 insertions(+), 20 deletions(-) diff --git a/invenio_vcs/models.py b/invenio_vcs/models.py index f1cca9f0..36757a20 100644 --- a/invenio_vcs/models.py +++ b/invenio_vcs/models.py @@ -1,26 +1,9 @@ # -*- coding: utf-8 -*- -# # This file is part of Invenio. -# Copyright (C) 2023 CERN. -# -# Invenio is free software; you can redistribute it -# and/or modify it under the terms of the GNU General Public License as -# published by the Free Software Foundation; either version 2 of the -# License, or (at your option) any later version. -# -# Invenio is distributed in the hope that it will be -# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with Invenio; if not, write to the -# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, -# MA 02111-1307, USA. +# Copyright (C) 2025 CERN. # -# In applying this license, CERN does not -# waive the privileges and immunities granted to it by virtue of its status -# as an Intergovernmental Organization or submit itself to any jurisdiction. +# Invenio is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. """Models for the VCS integration.""" From f8a3d84a4b196eb3dd09868dc3cfb8ff36eb28df Mon Sep 17 00:00:00 2001 From: Pal Kerecsenyi Date: Wed, 22 Oct 2025 14:24:45 +0200 Subject: [PATCH 05/12] feat(models): rename enabled_by_id -> enabled_by_user_id, add migration for orphaned repos --- ...54318294_switch_to_generic_git_services.py | 26 ++++++++++++++++--- invenio_vcs/models.py | 4 +-- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py b/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py index 6f22fc23..b344d5ac 100644 --- a/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py +++ b/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py @@ -67,7 +67,7 @@ def upgrade(): op.add_column( "vcs_repositories", sa.Column("license_spdx", sa.String(255), nullable=True) ) - op.alter_column("vcs_repositories", "user_id", new_column_name="enabled_by_id") + op.alter_column("vcs_repositories", "user_id", new_column_name="enabled_by_user_id") op.drop_index("ix_github_repositories_name") op.drop_index("ix_github_repositories_github_id") @@ -115,7 +115,10 @@ def upgrade(): sa.Column("name", sa.String(255), nullable=False), sa.Column("hook", sa.String(255), nullable=True), sa.Column( - "enabled_by_id", sa.Integer, sa.ForeignKey("account_user.id"), nullable=True + "enabled_by_user_id", + sa.Integer, + sa.ForeignKey("account_user.id"), + nullable=True, ), sa.Column("created", sa.DateTime, nullable=False), sa.Column("updated", sa.DateTime, nullable=False), @@ -163,7 +166,7 @@ def upgrade(): license_spdx=None, # This repo wasn't enabled hook=None, - enabled_by_id=None, + enabled_by_user_id=None, created=datetime.now(tz=timezone.utc), updated=datetime.now(tz=timezone.utc), ) @@ -188,6 +191,21 @@ def upgrade(): .values(extra_data={"last_sync": remote_account["extra_data"]["last_sync"]}) ) + # Fill in HTML URL for orphaned repos to ensure we don't have any NULLs + vcs_repositories = session.execute(sa.select(vcs_repositories_table)) + for vcs_repository in vcs_repositories.mappings(): + if vcs_repository["html_url"] is None: + session.execute( + vcs_repositories_table.update() + .filter_by(id=vcs_repository["id"]) + .values( + html_url=f'https://github.com/{vcs_repository["name"]}', + updated=datetime.now(tz=timezone.utc), + ) + ) + + session.commit() + # We initially set this to nullable=True so we can create the column without an error # (it would be null for existing records) but after the SQLAlchemy operations above we # have populated it so we can mark it non-nullable. @@ -261,7 +279,7 @@ def downgrade(): """ op.alter_column( "vcs_repositories", - "enabled_by_id", + "enabled_by_user_id", new_column_name="user_id", ) op.drop_table("vcs_repository_users") diff --git a/invenio_vcs/models.py b/invenio_vcs/models.py index 36757a20..e215e16b 100644 --- a/invenio_vcs/models.py +++ b/invenio_vcs/models.py @@ -169,13 +169,13 @@ class Repository(db.Model, Timestamp): hook = db.Column(db.String(255), nullable=True) """Hook identifier.""" - enabled_by_id = db.Column(db.Integer, db.ForeignKey(User.id), nullable=True) + enabled_by_user_id = db.Column(db.Integer, db.ForeignKey(User.id), nullable=True) # # Relationships # users = db.relationship(User, secondary=repository_user_association) - enabled_by_user = db.relationship(User, foreign_keys=[enabled_by_id]) + enabled_by_user = db.relationship(User, foreign_keys=[enabled_by_user_id]) @classmethod def create( From 1c52d2d2b3615ecc611b42f74ab5ef396b9c8c90 Mon Sep 17 00:00:00 2001 From: Pal Kerecsenyi Date: Thu, 23 Oct 2025 09:40:26 +0200 Subject: [PATCH 06/12] fix(models): remove redundant index + improve performance of `add_user` and `remove_user` --- ...54318294_switch_to_generic_git_services.py | 8 +------ invenio_vcs/models.py | 21 ++++++++----------- 2 files changed, 10 insertions(+), 19 deletions(-) diff --git a/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py b/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py index b344d5ac..07694a44 100644 --- a/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py +++ b/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py @@ -238,14 +238,8 @@ def upgrade(): op.drop_constraint( op.f("uq_github_releases_release_id"), table_name="vcs_releases", type_="unique" ) - # A given provider cannot have duplicate repository IDs. - # These constraints are also inherently deleted when the `provider` column is dropped - op.create_unique_constraint( - constraint_name=op.f("uq_vcs_releases_provider_id_provider"), - table_name="vcs_releases", - columns=["provider_id", "provider"], - ) # A specific repository from a given provider cannot have multiple releases of the same tag + # This constraint is also inherently deleted when the `provider` column is dropped op.create_unique_constraint( constraint_name=op.f("uq_vcs_releases_provider_id_provider_tag"), table_name="vcs_releases", diff --git a/invenio_vcs/models.py b/invenio_vcs/models.py index e215e16b..d24ef6c5 100644 --- a/invenio_vcs/models.py +++ b/invenio_vcs/models.py @@ -14,7 +14,7 @@ from invenio_db import db from invenio_i18n import lazy_gettext as _ from invenio_webhooks.models import Event -from sqlalchemy import UniqueConstraint +from sqlalchemy import UniqueConstraint, delete, insert from sqlalchemy.dialects import postgresql from sqlalchemy.ext.mutable import MutableDict from sqlalchemy_utils.models import Timestamp @@ -205,15 +205,17 @@ def create( def add_user(self, user_id: int): """Add permission for a user to access the repository.""" - user = User(id=user_id) - user = db.session.merge(user) - self.users.append(user) + stmt = insert(repository_user_association).values( + repository_id=self.id, user_id=user_id + ) + db.session.execute(stmt) def remove_user(self, user_id: int): """Remove permission for a user to access the repository.""" - user = User(id=user_id) - user = db.session.merge(user) - self.users.remove(user) + stmt = delete(repository_user_association).filter_by( + repository_id=self.id, user_id=user_id + ) + db.session.execute(stmt) @classmethod def get(cls, provider, provider_id=None, full_name=None): @@ -266,11 +268,6 @@ class Release(db.Model, Timestamp): __tablename__ = "vcs_releases" __table_args__ = ( - UniqueConstraint( - "provider", - "provider_id", - name="uq_vcs_releases_provider_id_provider", - ), UniqueConstraint( "provider_id", "provider", From e24790eaeaa3d1ff3056993b0aa6779c88f917a0 Mon Sep 17 00:00:00 2001 From: Pal Kerecsenyi Date: Thu, 23 Oct 2025 11:28:55 +0200 Subject: [PATCH 07/12] fix(models): remove html_url --- ...54318294_switch_to_generic_git_services.py | 34 +------------------ invenio_vcs/models.py | 3 -- 2 files changed, 1 insertion(+), 36 deletions(-) diff --git a/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py b/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py index 07694a44..63fb1300 100644 --- a/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py +++ b/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py @@ -53,17 +53,12 @@ def upgrade(): op.add_column( "vcs_repositories", sa.Column( - "default_branch", sa.String(255), nullable=False, server_default="master" + "default_branch", sa.String(255), nullable=False, server_default="main" ), ) op.add_column( "vcs_repositories", sa.Column("description", sa.String(10000), nullable=True) ) - op.add_column( - # Nullable for now (see below) - "vcs_repositories", - sa.Column("html_url", sa.String(10000), nullable=True), - ) op.add_column( "vcs_repositories", sa.Column("license_spdx", sa.String(255), nullable=True) ) @@ -109,7 +104,6 @@ def upgrade(): sa.Column("provider_id", sa.String(255), nullable=True), sa.Column("provider", sa.String(255), nullable=True), sa.Column("description", sa.String(10000), nullable=True), - sa.Column("html_url", sa.String(10000), nullable=False), sa.Column("license_spdx", sa.String(255), nullable=True), sa.Column("default_branch", sa.String(255), nullable=False), sa.Column("name", sa.String(255), nullable=False), @@ -159,8 +153,6 @@ def upgrade(): description=github_repo["description"], name=github_repo["full_name"], default_branch=github_repo["default_branch"], - # So far we have only supported github.com so we can safely assume the URL - html_url=f'https://github.com/{github_repo["full_name"]}', # We have never stored this, it is queried at runtime right now. When the first # sync happens after this migration, we will download all the license IDs from the VCS. license_spdx=None, @@ -179,7 +171,6 @@ def upgrade(): description=github_repo["description"], name=github_repo["full_name"], default_branch=github_repo["default_branch"], - html_url=f'https://github.com/{github_repo["full_name"]}', updated=datetime.now(tz=timezone.utc), ) ) @@ -191,28 +182,6 @@ def upgrade(): .values(extra_data={"last_sync": remote_account["extra_data"]["last_sync"]}) ) - # Fill in HTML URL for orphaned repos to ensure we don't have any NULLs - vcs_repositories = session.execute(sa.select(vcs_repositories_table)) - for vcs_repository in vcs_repositories.mappings(): - if vcs_repository["html_url"] is None: - session.execute( - vcs_repositories_table.update() - .filter_by(id=vcs_repository["id"]) - .values( - html_url=f'https://github.com/{vcs_repository["name"]}', - updated=datetime.now(tz=timezone.utc), - ) - ) - - session.commit() - - # We initially set this to nullable=True so we can create the column without an error - # (it would be null for existing records) but after the SQLAlchemy operations above we - # have populated it so we can mark it non-nullable. - op.alter_column( - "vcs_repositories", "html_url", nullable=False, existing_nullable=True - ) - op.rename_table("github_releases", "vcs_releases") op.alter_column( "vcs_releases", @@ -300,7 +269,6 @@ def downgrade(): ) op.drop_column("github_repositories", "provider") op.drop_column("github_repositories", "description") - op.drop_column("github_repositories", "html_url") op.drop_column("github_repositories", "license_spdx") op.drop_column("github_repositories", "default_branch") op.create_index( diff --git a/invenio_vcs/models.py b/invenio_vcs/models.py index d24ef6c5..158e69db 100644 --- a/invenio_vcs/models.py +++ b/invenio_vcs/models.py @@ -159,7 +159,6 @@ class Repository(db.Model, Timestamp): """Which VCS provider the repository is hosted by (and therefore the context in which to consider the provider_id)""" description = db.Column(db.String(10000), nullable=True) - html_url = db.Column(db.String(10000), nullable=False) license_spdx = db.Column(db.String(255), nullable=True) default_branch = db.Column(db.String(255), nullable=False) @@ -182,7 +181,6 @@ def create( cls, provider, provider_id, - html_url, default_branch, full_name=None, description=None, @@ -194,7 +192,6 @@ def create( provider=provider, provider_id=provider_id, full_name=full_name, - html_url=html_url, default_branch=default_branch, description=description, license_spdx=license_spdx, From 98b86fedd2d782640d2152ed904b09e1eee8f2ab Mon Sep 17 00:00:00 2001 From: Pal Kerecsenyi Date: Thu, 23 Oct 2025 16:18:17 +0200 Subject: [PATCH 08/12] WIP: models: create upgrade script for 2-step data migration --- ...54318294_switch_to_generic_git_services.py | 343 +++++------------- invenio_vcs/models.py | 3 +- invenio_vcs/upgrade_scripts/__init__.py | 7 + .../upgrade_scripts/migrate_3_0_to_4_0.py | 310 ++++++++++++++++ 4 files changed, 402 insertions(+), 261 deletions(-) create mode 100644 invenio_vcs/upgrade_scripts/__init__.py create mode 100644 invenio_vcs/upgrade_scripts/migrate_3_0_to_4_0.py diff --git a/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py b/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py index 63fb1300..a47174db 100644 --- a/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py +++ b/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py @@ -7,12 +7,9 @@ """Switch to a generic VCS module (not GitHub-specific).""" -import uuid -from datetime import datetime, timezone import sqlalchemy as sa from alembic import op -from sqlalchemy.ext.mutable import MutableDict from sqlalchemy_utils import JSONType, UUIDType # revision identifiers, used by Alembic. @@ -27,198 +24,53 @@ def upgrade(): """Upgrade database.""" - op.rename_table("github_repositories", "vcs_repositories") - op.alter_column( - "vcs_repositories", - "github_id", - new_column_name="provider_id", - type_=sa.String(length=255), - nullable=False, - existing_type=sa.Integer(), - existing_nullable=True, - ) - op.alter_column( - "vcs_repositories", - "hook", - type_=sa.String(length=255), - nullable=True, - existing_type=sa.Integer(), - existing_nullable=True, - ) - op.add_column( - "vcs_repositories", - # We use the provider name "github" by default as this is what we're already using across the codebase - sa.Column("provider", sa.String(255), nullable=False, server_default="github"), - ) - op.add_column( + op.create_table( "vcs_repositories", + sa.Column("id", UUIDType()), + sa.Column("provider_id", sa.String(length=255), nullable=False), sa.Column( - "default_branch", sa.String(255), nullable=False, server_default="main" + "provider", sa.String(length=255), nullable=False, server_default="github" ), - ) - op.add_column( - "vcs_repositories", sa.Column("description", sa.String(10000), nullable=True) - ) - op.add_column( - "vcs_repositories", sa.Column("license_spdx", sa.String(255), nullable=True) - ) - op.alter_column("vcs_repositories", "user_id", new_column_name="enabled_by_user_id") - op.drop_index("ix_github_repositories_name") - op.drop_index("ix_github_repositories_github_id") - - # Because they rely on the `provider` column, these are automatically - # deleted when downgrading so we don't need a separate drop command - # for them. - op.create_unique_constraint( - constraint_name=op.f("uq_vcs_repositories_provider_provider_id"), - table_name="vcs_repositories", - columns=["provider", "provider_id"], - ) - op.create_unique_constraint( - constraint_name=op.f("uq_vcs_repositories_provider_name"), - table_name="vcs_repositories", - columns=["provider", "name"], - ) - - # Migrate data from the OAuth remote `extra_data` field to the repositories table - # where we will now store everything directly. - # - # We need to recreate the SQLAlchemy models for `RemoteAccount` and `Repository` here but - # in a much more lightweight way. We cannot simply import the models because (a) they depend - # on the full Invenio app being initialised and all extensions available and (b) we need - # to work with the models as they stand precisely at this point in the migration chain - # rather than the model file itself which may be at a later commit. - # - # We only include here the columns, constraints, and relations that we actually need to - # perform the migration, therefore keeping these models as lightweight as possible. - remote_account_table = sa.table( - "oauthclient_remoteaccount", - sa.Column("id", sa.Integer, primary_key=True), - sa.Column("user_id", sa.Integer, sa.ForeignKey("account_user.id")), - sa.Column("client_id", sa.String(255)), - sa.Column("extra_data", MutableDict.as_mutable(JSONType)), - ) - vcs_repositories_table = sa.table( - "vcs_repositories", - sa.Column("id", UUIDType, primary_key=True), - sa.Column("provider_id", sa.String(255), nullable=True), - sa.Column("provider", sa.String(255), nullable=True), - sa.Column("description", sa.String(10000), nullable=True), - sa.Column("license_spdx", sa.String(255), nullable=True), - sa.Column("default_branch", sa.String(255), nullable=False), - sa.Column("name", sa.String(255), nullable=False), - sa.Column("hook", sa.String(255), nullable=True), + sa.Column("name", sa.String(length=255), nullable=False), sa.Column( - "enabled_by_user_id", - sa.Integer, - sa.ForeignKey("account_user.id"), - nullable=True, + "default_branch", + sa.String(length=255), + nullable=False, + server_default="main", + ), + sa.Column("description", sa.String(length=10000)), + sa.Column("license_spdx", sa.String(length=255)), + sa.Column("hook", sa.String(length=255)), + sa.Column("enabled_by_user_id", sa.Integer), + sa.Column("created", sa.DateTime(), nullable=False), + sa.Column("updated", sa.DateTime(), nullable=False), + sa.PrimaryKeyConstraint("id", name=op.f("pk_vcs_repositories")), + sa.ForeignKeyConstraint( + ["enabled_by_user_id"], + ["accounts_user.id"], + name=op.f("fk_vcs_repository_enabled_by_user_id_accounts_user"), + ), + sa.UniqueConstraint( + "provider", + "provider_id", + name=op.f("uq_vcs_repositories_provider_provider_id"), + ), + sa.UniqueConstraint( + "provider", + "name", + name=op.f("uq_vcs_repositories_provider_name"), ), - sa.Column("created", sa.DateTime, nullable=False), - sa.Column("updated", sa.DateTime, nullable=False), - ) - - # This is the recommended way to run SQLAlchemy operations in a migration, see https://alembic.sqlalchemy.org/en/latest/ops.html#alembic.operations.Operations.execute - session = op.get_bind() - - # We don't know the client ID as this is a config variable. - # So to find the RemoteAccounts that correspond to GitHub, we need to check for the existence - # of the `repos` key in the `extra_data` JSON. We cannot make this very efficient sadly, because - # (a) in Postgres we are using JSON not JSONB so there is no efficient JSON querying and (b) the - # instance might be using MySQL/SQLite where we store it as `TEXT`. - - remote_accounts = session.execute(sa.select(remote_account_table)) - for remote_account in remote_accounts.mappings(): - if "repos" not in remote_account["extra_data"]: - continue - - repos = remote_account["extra_data"]["repos"] - - for id, github_repo in repos.items(): - # `id` (the dict key) is a string because JSON keys must be strings - - matching_db_repo_id = session.scalar( - sa.select(vcs_repositories_table).filter_by(provider_id=id) - ) - - if matching_db_repo_id is None: - # We are now storing _all_ repositories (even non-enabled ones) in the DB. - # The repo-user association will be created on the first sync after this migration, we need to download - # the list of users with access to the repo from the GitHub API. - session.execute( - vcs_repositories_table.insert().values( - id=uuid.uuid4(), - provider_id=id, - provider="github", - description=github_repo["description"], - name=github_repo["full_name"], - default_branch=github_repo["default_branch"], - # We have never stored this, it is queried at runtime right now. When the first - # sync happens after this migration, we will download all the license IDs from the VCS. - license_spdx=None, - # This repo wasn't enabled - hook=None, - enabled_by_user_id=None, - created=datetime.now(tz=timezone.utc), - updated=datetime.now(tz=timezone.utc), - ) - ) - else: - session.execute( - vcs_repositories_table.update() - .filter_by(id=matching_db_repo_id) - .values( - description=github_repo["description"], - name=github_repo["full_name"], - default_branch=github_repo["default_branch"], - updated=datetime.now(tz=timezone.utc), - ) - ) - - # Remove `repos` from the existing `extra_data`, leaving only the last sync timestamp - session.execute( - remote_account_table.update() - .filter_by(id=remote_account["id"]) - .values(extra_data={"last_sync": remote_account["extra_data"]["last_sync"]}) - ) - - op.rename_table("github_releases", "vcs_releases") - op.alter_column( - "vcs_releases", - "release_id", - new_column_name="provider_id", - type_=sa.String(length=255), - nullable=False, - existing_type=sa.Integer(), - existing_nullable=True, - ) - op.add_column( - "vcs_releases", - sa.Column("provider", sa.String(255), nullable=False, server_default="github"), - ) - if op.get_context().dialect.name == "postgresql": - op.alter_column( - "vcs_releases", - "errors", - type_=sa.dialects.postgresql.JSONB, - postgresql_using="errors::text::jsonb", - ) - - op.drop_constraint( - op.f("uq_github_releases_release_id"), table_name="vcs_releases", type_="unique" - ) - # A specific repository from a given provider cannot have multiple releases of the same tag - # This constraint is also inherently deleted when the `provider` column is dropped - op.create_unique_constraint( - constraint_name=op.f("uq_vcs_releases_provider_id_provider_tag"), - table_name="vcs_releases", - columns=["provider_id", "provider", "tag"], ) op.create_table( "vcs_repository_users", - sa.Column("repository_id", UUIDType(), primary_key=True), - sa.Column("user_id", sa.Integer(), primary_key=True), + sa.Column("repository_id", UUIDType()), + sa.Column("user_id", sa.Integer()), + sa.Column("created", sa.DateTime(), nullable=False), + sa.Column("updated", sa.DateTime(), nullable=False), + sa.PrimaryKeyConstraint( + "repository_id", "user_id", name=op.f("pk_vcs_repository_users") + ), sa.ForeignKeyConstraint( ["repository_id"], ["vcs_repositories.id"], @@ -230,82 +82,55 @@ def upgrade(): name=op.f("fk_vcs_repository_users_user_id_accounts_user"), ), ) - # ### end Alembic commands ### - -def downgrade(): - """Downgrade database. - - Currently, the downgrade can only be peformed **without data**. The tables are transformed but - data will not be successfully migrated. The upgrade migration has a large amount of custom logic - for migrating the data into the new format, and this is not replicated/reversed for downgrading. - """ - op.alter_column( - "vcs_repositories", - "enabled_by_user_id", - new_column_name="user_id", + op.create_table( + "vcs_releases", + sa.Column("id", UUIDType()), + sa.Column("provider_id", sa.String(length=255), nullable=False), + sa.Column( + "provider", sa.String(length=255), nullable=False, server_default="github" + ), + sa.Column("tag", sa.String(length=255), nullable=False), + sa.Column( + "errors", + sa.JSON() + .with_variant(sa.dialects.postgresql.JSONB(), "postgresql") + .with_variant(JSONType(), "sqlite") + .with_variant(JSONType(), "mysql"), + ), + sa.Column("repository_id", UUIDType(), nullable=False), + sa.Column("event_id", UUIDType(), nullable=True), + sa.Column("record_id", UUIDType()), + sa.Column("status", sa.CHAR(1)), + sa.Column("created", sa.DateTime(), nullable=False), + sa.Column("updated", sa.DateTime(), nullable=False), + sa.PrimaryKeyConstraint("id", name=op.f("pk_vcs_releases")), + sa.ForeignKeyConstraint( + ["event_id"], + ["webhooks_events.id"], + name=op.f("fk_vcs_releases_event_id_webhooks_events"), + ), + sa.ForeignKeyConstraint( + ["repository_id"], + ["vcs_repositories.id"], + name=op.f("fk_vcs_releases_repository_id_vcs_repositories"), + ), + sa.UniqueConstraint( + "provider", + "provider_id", + name=op.f("uq_vcs_releases_provider_id_provider"), + ), ) - op.drop_table("vcs_repository_users") - op.rename_table("vcs_repositories", "github_repositories") - op.alter_column( - "github_repositories", - "provider_id", - new_column_name="github_id", - type_=sa.Integer(), - nullable=True, - existing_type=sa.String(length=255), - existing_nullable=False, - postgresql_using="provider_id::integer", - ) - op.alter_column( - "github_repositories", - "hook", - type_=sa.Integer(), - nullable=True, - existing_type=sa.String(length=255), - existing_nullable=True, - postgresql_using="hook::integer", - ) - op.drop_column("github_repositories", "provider") - op.drop_column("github_repositories", "description") - op.drop_column("github_repositories", "license_spdx") - op.drop_column("github_repositories", "default_branch") op.create_index( - op.f("ix_github_repositories_github_id"), - "github_repositories", - ["github_id"], - unique=True, - ) - op.create_index( - op.f("ix_github_repositories_name"), - "github_repositories", - ["name"], - unique=True, + op.f("ix_vcs_releases_record_id"), + table_name="vcs_releases", + columns=["record_id"], ) - op.rename_table("vcs_releases", "github_releases") - op.alter_column( - "github_releases", - "provider_id", - new_column_name="release_id", - type_=sa.Integer(), - nullable=True, - existing_type=sa.String(length=255), - existing_nullable=False, - postgresql_using="provider_id::integer", - ) - op.drop_column("github_releases", "provider") - if op.get_context().dialect.name == "postgresql": - op.alter_column( - "github_releases", - "errors", - type_=sa.dialects.postgresql.JSON, - postgresql_using="errors::text::json", - ) - op.create_unique_constraint( - op.f("uq_github_releases_release_id"), - table_name="github_releases", - columns=["release_id"], - ) - # ### end Alembic commands ### + +def downgrade(): + """Downgrade database.""" + op.drop_table("vcs_releases") + op.drop_table("vcs_repository_users") + op.drop_table("vcs_repositories") diff --git a/invenio_vcs/models.py b/invenio_vcs/models.py index 158e69db..f4f5c2cd 100644 --- a/invenio_vcs/models.py +++ b/invenio_vcs/models.py @@ -268,8 +268,7 @@ class Release(db.Model, Timestamp): UniqueConstraint( "provider_id", "provider", - "tag", - name="uq_vcs_releases_provider_id_provider_tag", + name="uq_vcs_releases_provider_id_provider", ), ) diff --git a/invenio_vcs/upgrade_scripts/__init__.py b/invenio_vcs/upgrade_scripts/__init__.py new file mode 100644 index 00000000..a7ce9edf --- /dev/null +++ b/invenio_vcs/upgrade_scripts/__init__.py @@ -0,0 +1,7 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2025 CERN. +# +# Invenio-VCS is free software; you can redistribute it and/or modify +# it under the terms of the MIT License; see LICENSE file for more details. +"""Upgrade scripts for InvenioVCS.""" diff --git a/invenio_vcs/upgrade_scripts/migrate_3_0_to_4_0.py b/invenio_vcs/upgrade_scripts/migrate_3_0_to_4_0.py new file mode 100644 index 00000000..b68f5dd2 --- /dev/null +++ b/invenio_vcs/upgrade_scripts/migrate_3_0_to_4_0.py @@ -0,0 +1,310 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2025 CERN. +# +# Invenio-VCS is free software; you can redistribute it and/or modify +# it under the terms of the MIT License; see LICENSE file for more details. +"""Migration script for v3 (old GitHub-only integration) to v4 (new generic VCS integration).""" + +import sys +import uuid +from datetime import datetime, timezone + +import sqlalchemy as sa +from alembic.runtime.migration import MigrationContext +from click import progressbar, secho +from invenio_db import db +from sqlalchemy.dialects import postgresql +from sqlalchemy.ext.mutable import MutableDict +from sqlalchemy_utils import JSONType, UUIDType + +# Lightweight models for all of the tables (incl old and new versions) +remote_account_table = sa.table( + "oauthclient_remoteaccount", + sa.Column("id", sa.Integer, primary_key=True), + sa.Column("user_id", sa.Integer, sa.ForeignKey("account_user.id")), + sa.Column("client_id", sa.String(255)), + # We may have changed this if we merge https://github.com/inveniosoftware/invenio-oauthclient/pull/360 + # but we're only reading this column so it shouldn't make a difference. + sa.Column("extra_data", MutableDict.as_mutable(JSONType)), +) +github_repositories_table = sa.table( + "github_repositories", + sa.Column("id", UUIDType, primary_key=True), + sa.Column("github_id", sa.String(255), nullable=True), + sa.Column("name", sa.String(255), nullable=False), + sa.Column("hook", sa.Integer, nullable=True), + sa.Column("user_id", sa.Integer, sa.ForeignKey("account_user.id"), nullable=True), + sa.Column("created", sa.DateTime, nullable=False), + sa.Column("updated", sa.DateTime, nullable=False), +) +vcs_repositories_table = sa.table( + "vcs_repositories", + sa.Column("id", UUIDType, primary_key=True), + sa.Column("provider_id", sa.String(255), nullable=False), + sa.Column("provider", sa.String(255), nullable=False), + sa.Column("description", sa.String(10000), nullable=True), + sa.Column("license_spdx", sa.String(255), nullable=True), + sa.Column("default_branch", sa.String(255), nullable=False), + sa.Column("name", sa.String(255), nullable=False), + sa.Column("hook", sa.String(255), nullable=True), + sa.Column( + "enabled_by_user_id", + sa.Integer, + sa.ForeignKey("account_user.id"), + nullable=True, + ), + sa.Column("created", sa.DateTime, nullable=False), + sa.Column("updated", sa.DateTime, nullable=False), +) +github_releases_table = sa.table( + "github_releases", + sa.Column("id", UUIDType, primary_key=True), + sa.Column("release_id", sa.Integer, primary_key=True), + sa.Column("tag", sa.String(255), nullable=True), + sa.Column("errors", MutableDict.as_mutable(JSONType), nullable=True), + sa.Column( + "repository_id", + UUIDType, + sa.ForeignKey("github_repositories.id"), + nullable=True, + ), + sa.Column("event_id", UUIDType, sa.ForeignKey("webhooks_events.id"), nullable=True), + sa.Column("record_id", UUIDType, nullable=True), + sa.Column("status", sa.CHAR(1), nullable=False), + sa.Column("created", sa.DateTime, nullable=False), + sa.Column("updated", sa.DateTime, nullable=False), +) +vcs_releases_table = sa.table( + "vcs_releases", + sa.Column("id", UUIDType, primary_key=True), + sa.Column("provider_id", sa.String(255), nullable=False), + sa.Column("provider", sa.String(255), nullable=False), + sa.Column("tag", sa.String(255), nullable=False), + sa.Column( + "errors", + MutableDict.as_mutable( + sa.JSON() + .with_variant(postgresql.JSONB(), "postgresql") + .with_variant(JSONType(), "sqlite") + .with_variant(JSONType(), "mysql") + ), + nullable=True, + ), + sa.Column( + "repository_id", + UUIDType, + sa.ForeignKey("vcs_repositories.id"), + nullable=True, + ), + sa.Column( + "event_id", UUIDType, sa.ForeignKey("webhooks_events.id"), nullable=False + ), + sa.Column("record_id", UUIDType, nullable=True), + sa.Column("status", sa.CHAR(1), nullable=False), + sa.Column("created", sa.DateTime, nullable=False), + sa.Column("updated", sa.DateTime, nullable=False), +) + + +def run_upgrade_for_oauthclient_repositories(): + """Move the JSON repos from oauthclient_remoteaccount to the new vcs_repositories table.""" + + secho( + "Migrating JSON data from oauthclient_remoteaccount into vcs_repositories table...", + fg="green", + ) + + # We don't know the client ID as this is a config variable. + # So to find the RemoteAccounts that correspond to GitHub, we need to check for the existence + # of the `repos` key in the `extra_data` JSON. We cannot make this very efficient sadly, because + # (a) in Postgres we are using JSON not JSONB so there is no efficient JSON querying and (b) the + # instance might be using MySQL/SQLite where we store it as `TEXT`. + + # We can make this a little bit faster if https://github.com/inveniosoftware/invenio-oauthclient/pull/328 + # were merged and released and all instances were using it, but this is unlikely to be the case + # by the time we release Invenio VCS v4. + + remote_accounts = db.session.execute(sa.select(remote_account_table)).mappings() + with progressbar(remote_accounts) as remote_accounts: + for remote_account in remote_accounts: + if "repos" not in remote_account["extra_data"]: + continue + + repos = remote_account["extra_data"]["repos"] + + for id, github_repo in repos.items(): + # `id` (the dict key) is a string because JSON keys must be strings + + # We might have already created it for another user + matching_db_repo_id = db.session.scalar( + sa.select(vcs_repositories_table).filter_by(provider_id=id) + ) + + if matching_db_repo_id is None: + # We are now storing _all_ repositories (even non-enabled ones) in the DB. + # The repo-user association will be created on the first sync after this migration, we need to download + # the list of users with access to the repo from the GitHub API. + db.session.execute( + vcs_repositories_table.insert().values( + id=uuid.uuid4(), + provider_id=id, + provider="github", + description=github_repo["description"], + name=github_repo["full_name"], + default_branch=github_repo["default_branch"], + # We have never stored this, it is queried at runtime right now. When the first + # sync happens after this migration, we will download all the license IDs from the VCS. + license_spdx=None, + # This repo wasn't enabled, since it is not already in the repositories table. + hook=None, + enabled_by_user_id=None, + created=datetime.now(tz=timezone.utc), + updated=datetime.now(tz=timezone.utc), + ) + ) + else: + db.session.execute( + vcs_repositories_table.update() + .filter_by(id=matching_db_repo_id) + .values( + description=github_repo["description"], + name=github_repo["full_name"], + default_branch=github_repo["default_branch"], + updated=datetime.now(tz=timezone.utc), + ) + ) + + # Remove `repos` from the existing `extra_data`, leaving only the last sync timestamp + db.session.execute( + remote_account_table.update() + .filter_by(id=remote_account["id"]) + .values( + extra_data={"last_sync": remote_account["extra_data"]["last_sync"]} + ) + ) + + db.session.commit() + + +def run_upgrade_for_existing_db_repositories(): + """Move over any old rows from github_repositories that weren't attached to any user (for whatever reason). + + These are (almost) all repos that are enabled and have a hook. However repos that have been enabled and then + later disabled are also included. + """ + + secho( + "Migrating old repo table entries to new vcs_repositories table...", fg="green" + ) + + old_db_repos = db.session.execute(sa.select(github_repositories_table)).mappings() + with progressbar(old_db_repos) as old_db_repos: + for old_db_repo in old_db_repos: + matching_new_repo_id = db.session.scalar( + sa.select( + vcs_repositories_table.c.id, + ).filter_by(provider_id=str(old_db_repo["github_id"])) + ) + + if matching_new_repo_id is None: + # We only have very limited metadata available at this point. + # The first sync job after this migration will fill in the rest. + db.session.execute( + vcs_repositories_table.insert().values( + id=old_db_repo["id"], + provider_id=str(old_db_repo["github_id"]), + provider="github", + name=old_db_repo["name"], + default_branch="main", + license_spdx=None, + hook=old_db_repo["hook"], + enabled_by_user_id=old_db_repo["user_id"], + created=old_db_repo["created"], + updated=datetime.now(tz=timezone.utc), + ) + ) + else: + db.session.execute( + vcs_repositories_table.update() + .filter_by(id=matching_new_repo_id) + .values( + id=old_db_repo["id"], + hook=str(old_db_repo["hook"]), + enabled_by_user_id=old_db_repo["user_id"], + created=old_db_repo["created"], + ) + ) + + db.session.commit() + + +def run_upgrade_for_releases(): + """Copy releases from old table to new vcs_releases table.""" + + secho( + "Migrating old release table entries to new vcs_releases table...", fg="green" + ) + + # Finally, we copy over the releases + old_db_releases = db.session.execute(sa.select(github_releases_table)).mappings() + with progressbar(old_db_releases) as old_db_releases: + for old_db_release in old_db_releases: + # Since we've created all the repos, we know due to referential integrity that this release's repo ID corresponds + # to a valid and existent repo. + + db.session.execute( + vcs_releases_table.insert().values( + id=old_db_release["id"], + provider_id=str(old_db_release["release_id"]), + provider="github", + tag=old_db_release["tag"], + errors=old_db_release["errors"], + repository_id=old_db_release["repository_id"], + event_id=old_db_release["event_id"], + record_id=old_db_release["record_id"], + status=old_db_release["status"], + created=old_db_release["created"], + updated=datetime.now(tz=timezone.utc), + ) + ) + + db.session.commit() + + +def verify_alembic_version(expected_revision: str): + """Verify that the Alembic migration for this version has been executed. + + Attempting to run the other steps of this upgrade script on an old migration version + will have unexpected consequences. + """ + + secho("Verifying Alembic migration is up-to-date...", fg="green") + + with db.engine.connect() as connection: + alembic_ctx = MigrationContext.configure(connection) + # This returns a tuple of the versions of each branch (without the branch name). + current_revs = alembic_ctx.get_current_heads() + + # We just need to check that our expected version ID is included in the tuple + if expected_revision not in current_revs: + secho( + "The invenio-github Alembic branch is not at the latest revision. Please upgrade it before continuing.", + fg="red", + ) + sys.exit(1) + + +def execute_upgrade(): + """Execute all of the steps for the upgrade of InvenioVCS v3 to v4.""" + secho("Starting Invenio-VCS v3->v4 data migration...", fg="green") + + verify_alembic_version("1754318294") + + run_upgrade_for_oauthclient_repositories() + run_upgrade_for_existing_db_repositories() + run_upgrade_for_releases() + + +if __name__ == "__main__": + execute_upgrade() From 65ef06cefef5f6bc7096a65b728547f52a72236b Mon Sep 17 00:00:00 2001 From: Pal Kerecsenyi Date: Fri, 24 Oct 2025 10:19:54 +0200 Subject: [PATCH 09/12] WIP: models: add timestamps to vcs_repository_users --- invenio_vcs/models.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/invenio_vcs/models.py b/invenio_vcs/models.py index f4f5c2cd..96417307 100644 --- a/invenio_vcs/models.py +++ b/invenio_vcs/models.py @@ -8,6 +8,7 @@ """Models for the VCS integration.""" import uuid +from datetime import datetime, timezone from enum import Enum from invenio_accounts.models import User @@ -104,6 +105,8 @@ def color(self): db.Column( "user_id", db.Integer, db.ForeignKey("accounts_user.id"), primary_key=True ), + db.Column("created", db.DateTime, nullable=False), + db.Column("updated", db.DateTime, nullable=False), ) @@ -202,8 +205,9 @@ def create( def add_user(self, user_id: int): """Add permission for a user to access the repository.""" + now = datetime.now(tz=timezone.utc) stmt = insert(repository_user_association).values( - repository_id=self.id, user_id=user_id + repository_id=self.id, user_id=user_id, created=now, updated=now ) db.session.execute(stmt) From 8750be97cc2a99970aec06cac282d73178ab63cc Mon Sep 17 00:00:00 2001 From: Pal Kerecsenyi Date: Fri, 24 Oct 2025 11:21:40 +0200 Subject: [PATCH 10/12] WIP: models: remove title/icon/color mappings for release status These have been moved to a Jinja template --- invenio_vcs/models.py | 39 --------------------------------------- 1 file changed, 39 deletions(-) diff --git a/invenio_vcs/models.py b/invenio_vcs/models.py index 96417307..ba21c6c7 100644 --- a/invenio_vcs/models.py +++ b/invenio_vcs/models.py @@ -21,30 +21,6 @@ from sqlalchemy_utils.models import Timestamp from sqlalchemy_utils.types import ChoiceType, JSONType, UUIDType -RELEASE_STATUS_TITLES = { - "RECEIVED": _("Received"), - "PROCESSING": _("Processing"), - "PUBLISHED": _("Published"), - "FAILED": _("Failed"), - "DELETED": _("Deleted"), -} - -RELEASE_STATUS_ICON = { - "RECEIVED": "spinner loading icon", - "PROCESSING": "spinner loading icon", - "PUBLISHED": "check icon", - "FAILED": "times icon", - "DELETED": "times icon", -} - -RELEASE_STATUS_COLOR = { - "RECEIVED": "warning", - "PROCESSING": "warning", - "PUBLISHED": "positive", - "FAILED": "negative", - "DELETED": "negative", -} - class ReleaseStatus(Enum): """Constants for possible status of a Release.""" @@ -77,21 +53,6 @@ def __str__(self): """Return its value.""" return self.value - @property - def title(self): - """Return human readable title.""" - return RELEASE_STATUS_TITLES[self.name] - - @property - def icon(self): - """Font Awesome status icon.""" - return RELEASE_STATUS_ICON[self.name] - - @property - def color(self): - """UI status color.""" - return RELEASE_STATUS_COLOR[self.name] - repository_user_association = db.Table( "vcs_repository_users", From 7ba04da63ea0dfbf0c430568912f74eff288f6d3 Mon Sep 17 00:00:00 2001 From: Pal Kerecsenyi Date: Fri, 24 Oct 2025 14:02:39 +0200 Subject: [PATCH 11/12] WIP: models: simplify repository get() method, remove provider/name index on vcs_repositories --- ...54318294_switch_to_generic_git_services.py | 5 ---- invenio_vcs/models.py | 27 ++++--------------- 2 files changed, 5 insertions(+), 27 deletions(-) diff --git a/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py b/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py index a47174db..9694b4a9 100644 --- a/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py +++ b/invenio_vcs/alembic/1754318294_switch_to_generic_git_services.py @@ -55,11 +55,6 @@ def upgrade(): "provider_id", name=op.f("uq_vcs_repositories_provider_provider_id"), ), - sa.UniqueConstraint( - "provider", - "name", - name=op.f("uq_vcs_repositories_provider_name"), - ), ) op.create_table( diff --git a/invenio_vcs/models.py b/invenio_vcs/models.py index ba21c6c7..3b46ab7c 100644 --- a/invenio_vcs/models.py +++ b/invenio_vcs/models.py @@ -77,11 +77,6 @@ class Repository(db.Model, Timestamp): __tablename__ = "vcs_repositories" __table_args__ = ( - UniqueConstraint( - "provider", - "name", - name="uq_vcs_repositories_provider_name", - ), UniqueConstraint( "provider", "provider_id", @@ -180,30 +175,18 @@ def remove_user(self, user_id: int): db.session.execute(stmt) @classmethod - def get(cls, provider, provider_id=None, full_name=None): - """Return a repository given its name or provider id. + def get(cls, provider: str, provider_id: str): + """Return a repository given its provider ID. :param str provider: Registered ID of the VCS provider. :param str provider_id: VCS provider repository identifier. - :param str name: Repository full name. :returns: The repository object. :raises: :py:exc:`~sqlalchemy.orm.exc.NoResultFound`: if the repository doesn't exist. - :raises: :py:exc:`~sqlalchemy.orm.exc.MultipleResultsFound`: if - multiple repositories with the specified provider id and/or name - exist. """ - repo = None - if provider_id: - repo = cls.query.filter( - Repository.provider_id == provider_id, Repository.provider == provider - ).one_or_none() - if not repo and full_name is not None: - repo = cls.query.filter( - Repository.full_name == full_name, Repository.provider == provider - ).one_or_none() - - return repo + return cls.query.filter( + Repository.provider_id == provider_id, Repository.provider == provider + ).one_or_none() @property def enabled(self): From 455a2c0a8283a600e25d8b9bfb78d4e24cedc367 Mon Sep 17 00:00:00 2001 From: Pal Kerecsenyi Date: Fri, 31 Oct 2025 09:54:53 +0100 Subject: [PATCH 12/12] WIP: models: add list_users method to Repository --- invenio_vcs/models.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/invenio_vcs/models.py b/invenio_vcs/models.py index 3b46ab7c..bf1f65a9 100644 --- a/invenio_vcs/models.py +++ b/invenio_vcs/models.py @@ -7,6 +7,8 @@ """Models for the VCS integration.""" +from __future__ import annotations + import uuid from datetime import datetime, timezone from enum import Enum @@ -15,7 +17,7 @@ from invenio_db import db from invenio_i18n import lazy_gettext as _ from invenio_webhooks.models import Event -from sqlalchemy import UniqueConstraint, delete, insert +from sqlalchemy import UniqueConstraint, delete, insert, select from sqlalchemy.dialects import postgresql from sqlalchemy.ext.mutable import MutableDict from sqlalchemy_utils.models import Timestamp @@ -174,15 +176,19 @@ def remove_user(self, user_id: int): ) db.session.execute(stmt) + def list_users(self): + """Return a list of users with access to the repository.""" + return db.session.execute( + select(repository_user_association).filter_by(repository_id=self.id) + ) + @classmethod - def get(cls, provider: str, provider_id: str): + def get(cls, provider: str, provider_id: str) -> Repository | None: """Return a repository given its provider ID. :param str provider: Registered ID of the VCS provider. :param str provider_id: VCS provider repository identifier. - :returns: The repository object. - :raises: :py:exc:`~sqlalchemy.orm.exc.NoResultFound`: if the repository - doesn't exist. + :returns: The repository object or None if one with the given ID and provider doesn't exist. """ return cls.query.filter( Repository.provider_id == provider_id, Repository.provider == provider