diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml index 63e1851..7a50626 100644 --- a/.github/workflows/black.yml +++ b/.github/workflows/black.yml @@ -6,6 +6,10 @@ jobs: lint: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 - - uses: psf/black@20.8b1 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + - run: pip install ruff + - run: ruff check . + - run: ruff format --check . diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index 4e1ef42..eef44ed 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -1,4 +1,4 @@ -# This workflows will upload a Python Package using Twine when a release is created +# This workflow will upload a Python Package using Twine when a release is created # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries name: Upload Python Package @@ -9,23 +9,22 @@ on: jobs: deploy: - runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: '3.x' - name: Install dependencies run: | python -m pip install --upgrade pip - pip install setuptools wheel twine + pip install build twine - name: Build and publish env: TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} run: | - python setup.py sdist bdist_wheel + python -m build twine upload dist/* diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ab5489e..549d42d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,20 +1,15 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v3.4.0 + rev: v5.0.0 hooks: - id: trailing-whitespace - id: check-yaml - id: end-of-file-fixer - - id: requirements-txt-fixer - - id: trailing-whitespace - - - repo: https://github.com/PyCQA/isort - rev: 5.7.0 - hooks: - - id: isort - args: ["--profile", "black"] + - id: check-ast - - repo: https://github.com/psf/black - rev: 20.8b1 + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.9.0 hooks: - - id: black + - id: ruff + args: [--fix] + - id: ruff-format diff --git a/Dockerfile b/Dockerfile index eed51c2..d51f12a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,7 @@ -FROM tiangolo/uvicorn-gunicorn:python3.7-alpine3.8 +FROM python:3.12-slim LABEL authors="Nathan Sheffield, Michal Stolarczyk" COPY . /app +WORKDIR /app RUN pip install . +CMD ["uvicorn", "refgenieserver.main:app", "--host", "0.0.0.0", "--port", "80"] diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 6f98629..0000000 --- a/MANIFEST.in +++ /dev/null @@ -1,5 +0,0 @@ -include requirements/* -include README.md -include LICENSE.txt -include Dockerfile -recursive-include refgenieserver * \ No newline at end of file diff --git a/changelog.md b/changelog.md index 9602475..199609f 100644 --- a/changelog.md +++ b/changelog.md @@ -2,6 +2,22 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. +## [0.8.0] -- 2026-02-25 + +### Changed +- Updated yacman and refgenconf dependency requirements +- Modernized packaging to use pyproject.toml with hatchling +- Updated FastAPI route definitions to use `pattern` instead of deprecated `regex` +- Updated GitHub Actions to modern versions + +### Added +- `app_factory.create_app()` function for programmatic server creation + +### Fixed +- Compatibility with yacman v1 (`with rgc as r:` context manager removed) +- Compatibility with refgenconf 0.13.0 +- Various modernization and small bugfix improvements + ## [0.7.0] -- 2021-04-27 ### Added - `remotes` section in the refgenieserver config, which supersedes `remote_url_base`. It can be used to define multiple remote data providers. diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..2b4d42a --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,67 @@ +[project] +name = "refgenieserver" +version = "0.8.0" +description = "A web interface and RESTful API for reference genome assets" +readme = "README.md" +license = {text = "BSD-2-Clause"} +requires-python = ">=3.10" +authors = [ + { name = "Michal Stolarczyk" }, + { name = "Vince Reuter" }, + { name = "Nathan Sheffield" }, +] +keywords = ["bioinformatics", "sequencing", "ngs", "genomes", "server"] +classifiers = [ + "Development Status :: 4 - Beta", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Topic :: Scientific/Engineering :: Bio-Informatics", +] +dependencies = [ + "aiofiles", + "fastapi", + "jinja2", + "logmuse>=0.2", + "refgenconf>=0.13.0", + "ubiquerg>=0.6.1", + "uvicorn>=0.7.1", + "yacman>=0.9.5", +] + +[project.urls] +Homepage = "https://refgenie.databio.org" + +[project.scripts] +refgenieserver = "refgenieserver.__main__:main" + +[build-system] +requires = ["setuptools>=61"] +build-backend = "setuptools.build_meta" + +[tool.setuptools.packages.find] +include = ["refgenieserver*"] + +[tool.setuptools.package-data] +refgenieserver = ["templates/**", "static/*"] + +[project.optional-dependencies] +test = [ + "pytest", + "httpx", +] + +[tool.pytest.ini_options] +addopts = "-rfE" +testpaths = ["tests"] + +[tool.ruff] +line-length = 88 + +[tool.ruff.lint] +select = ["E", "F", "I"] +ignore = ["F403", "F405", "E501"] + +[tool.ruff.lint.isort] +known-first-party = ["refgenieserver"] diff --git a/refgenieserver/__init__.py b/refgenieserver/__init__.py index b898684..87db908 100644 --- a/refgenieserver/__init__.py +++ b/refgenieserver/__init__.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from .const import * from .helpers import * from .main import * diff --git a/refgenieserver/_version.py b/refgenieserver/_version.py index 49e0fc1..ed9369f 100644 --- a/refgenieserver/_version.py +++ b/refgenieserver/_version.py @@ -1 +1,6 @@ -__version__ = "0.7.0" +from __future__ import annotations + +# Version is defined in pyproject.toml +from importlib.metadata import version + +__version__ = version("refgenieserver") diff --git a/refgenieserver/app_factory.py b/refgenieserver/app_factory.py new file mode 100644 index 0000000..51602f2 --- /dev/null +++ b/refgenieserver/app_factory.py @@ -0,0 +1,81 @@ +"""Factory function to create a configured refgenieserver FastAPI app.""" + +from __future__ import annotations + +import logging +import sys + +from fastapi import FastAPI +from refgenconf import RefGenConf + +from .const import PKG_NAME, PRIVATE_API, TAGS_METADATA +from .helpers import purge_nonservable + +_LOGGER = logging.getLogger(PKG_NAME) + + +def create_app(config_path: str, archive_base_dir: str | None = None) -> FastAPI: + """Create a configured FastAPI app for refgenieserver. + + This builds a fresh FastAPI app with the real refgenieserver routers, + configured from a given YAML config file. Used both for production + (as an alternative to the CLI entry point) and for integration tests. + + Args: + config_path: Path to the refgenie server config YAML. + archive_base_dir: Override for BASE_DIR (default: /genomes). + Used in tests to point at a temp directory. + + Returns: + Configured FastAPI app ready to serve. + """ + # Use sys.modules to get the actual module objects. Using + # `import refgenieserver.main as m` can return a different object + # than what's in sys.modules (due to __init__.py's `from .main import *`), + # which means attribute modifications won't be visible to other modules. + import refgenieserver.const # noqa: F401 ensure loaded + import refgenieserver.helpers # noqa: F401 ensure loaded + import refgenieserver.main # noqa: F401 ensure loaded + + main_module = sys.modules["refgenieserver.main"] + const_module = sys.modules["refgenieserver.const"] + helpers_module = sys.modules["refgenieserver.helpers"] + + # Load config and purge non-servable entries + rgc = RefGenConf.from_yaml_file(config_path) + purge_nonservable(rgc) + + # Override the module-level globals that the routers import. + # The routers do `from ..main import _LOGGER, rgc, app, templates` + # which reads from main's module dict at import time. + main_module.rgc = rgc + main_module._LOGGER = _LOGGER + + if archive_base_dir is not None: + # Must override BASE_DIR in both const and helpers modules, + # because helpers.py uses `from .const import *` which copies + # BASE_DIR into helpers' own namespace. + const_module.BASE_DIR = archive_base_dir + helpers_module.BASE_DIR = archive_base_dir + + from ._version import __version__ as server_v + + app = FastAPI( + title=PKG_NAME, + description="a web interface and RESTful API for reference genome assets", + version=server_v, + openapi_tags=TAGS_METADATA, + ) + + # Set the app on main_module so routers that import `app` from main + # can access it (needed for openapi spec introspection) + main_module.app = app + + # Import routers AFTER rgc is set (they read rgc at import time) + from .routers import private, version3 + + app.include_router(version3.router) + app.include_router(version3.router, prefix="/v3") + app.include_router(private.router, prefix=f"/{PRIVATE_API}") + + return app diff --git a/refgenieserver/const.py b/refgenieserver/const.py index 0b1e4e9..749c53a 100644 --- a/refgenieserver/const.py +++ b/refgenieserver/const.py @@ -1,4 +1,7 @@ -""" Package constants """ +"""Package constants""" + +from __future__ import annotations + import os from platform import python_version @@ -7,39 +10,41 @@ from ._version import __version__ as server_v -ALL_VERSIONS = { +ALL_VERSIONS: dict[str, str] = { "server_version": server_v, "rgc_version": rgc_v, "python_version": python_version(), } -PKG_NAME = "refgenieserver" -DEFAULT_PORT = 80 -BASE_DIR = "/genomes" +PKG_NAME: str = "refgenieserver" +DEFAULT_PORT: int = 80 +BASE_DIR: str = "/genomes" # if running outside of the Docker container 'BASE_DIR' can be replaced with rgc[CFG_ARCHIVE_KEY] -TEMPLATES_DIRNAME = "templates" -TEMPLATES_PATH = os.path.join( +TEMPLATES_DIRNAME: str = "templates" +TEMPLATES_PATH: str = os.path.join( os.path.dirname(os.path.abspath(__file__)), TEMPLATES_DIRNAME ) -STATIC_DIRNAME = "static" -STATIC_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), STATIC_DIRNAME) -LOG_FORMAT = "%(levelname)s in %(funcName)s: %(message)s" -MSG_404 = "No such {} on server" -DESC_PLACEHOLDER = "No description" -CHECKSUM_PLACEHOLDER = "No digest" +STATIC_DIRNAME: str = "static" +STATIC_PATH: str = os.path.join( + os.path.dirname(os.path.abspath(__file__)), STATIC_DIRNAME +) +LOG_FORMAT: str = "%(levelname)s in %(funcName)s: %(message)s" +MSG_404: str = "No such {} on server" +DESC_PLACEHOLDER: str = "No description" +CHECKSUM_PLACEHOLDER: str = "No digest" # Here we define the key name changes; format: {"new_key": "old_key"} # This dict is then used to pre-process the attributes dict before serving to the old versions of the client -CHANGED_KEYS = {CFG_ASSET_PATH_KEY: "path"} +CHANGED_KEYS: dict[str, str] = {CFG_ASSET_PATH_KEY: "path"} # TODO: to be removed in the future -CFG_LEGACY_ARCHIVE_CHECKSUM_KEY = "legacy_archive_digest" +CFG_LEGACY_ARCHIVE_CHECKSUM_KEY: str = "legacy_archive_digest" -API1_ID = "APIv1" -API2_ID = "APIv2" -API3_ID = "APIv3" -PRIV_API_ID = "PRIVATE_API" +API1_ID: str = "APIv1" +API2_ID: str = "APIv2" +API3_ID: str = "APIv3" +PRIV_API_ID: str = "PRIVATE_API" -TAGS_METADATA = [ +TAGS_METADATA: list[dict[str, str]] = [ { "name": API3_ID, "description": "These are the most recent API endpoints. " diff --git a/refgenieserver/data_models.py b/refgenieserver/data_models.py index 8b2243b..4814314 100644 --- a/refgenieserver/data_models.py +++ b/refgenieserver/data_models.py @@ -1,12 +1,12 @@ +from __future__ import annotations + from typing import Dict, List from pydantic import BaseModel class Tag(BaseModel): - """ - Tag data model - """ + """Tag data model.""" asset_path: str asset_digest: str @@ -19,9 +19,7 @@ class Tag(BaseModel): class Asset(BaseModel): - """ - Asset data model - """ + """Asset data model.""" asset_description: str tags: Dict[str, Tag] @@ -29,9 +27,7 @@ class Asset(BaseModel): class Genome(BaseModel): - """ - Genome data model - """ + """Genome data model.""" genome_description: str assets: Dict[str, Asset] diff --git a/refgenieserver/helpers.py b/refgenieserver/helpers.py index 08e7b7e..b341cab 100644 --- a/refgenieserver/helpers.py +++ b/refgenieserver/helpers.py @@ -1,13 +1,21 @@ +from __future__ import annotations + +import argparse import logging from json import load from string import Formatter +from typing import TYPE_CHECKING, Any from fastapi import HTTPException from fastapi.responses import FileResponse, JSONResponse, RedirectResponse from refgenconf.exceptions import RefgenconfError from refgenconf.helpers import send_data_request from ubiquerg import VersionInHelpParser, is_url -from yacman import get_first_env_var + +if TYPE_CHECKING: + from fastapi import FastAPI + from refgenconf import RefGenConf + from starlette.responses import Response from ._version import __version__ as v from .const import * @@ -16,17 +24,18 @@ _LOGGER = logging.getLogger(PKG_NAME) -def build_parser(): - """ - Building argument parser +def build_parser() -> argparse.ArgumentParser: + """Build the argument parser. - :return argparse.ArgumentParser + Returns: + The configured argument parser. """ - env_var_val = ( - get_first_env_var(CFG_ENV_VARS)[1] - if get_first_env_var(CFG_ENV_VARS) is not None - else "not set" - ) + env_var_val = "not set" + for var in CFG_ENV_VARS: + val = os.environ.get(var) + if val is not None: + env_var_val = val + break banner = "%(prog)s - refgenie web server utilities" additional_description = ( "For subcommand-specific options, type: '%(prog)s -h'" @@ -52,16 +61,18 @@ def add_subparser(cmd, description): # add arguments that are common for both subparsers for cmd, desc in msg_by_cmd.items(): sps[cmd] = add_subparser(cmd, desc) - sps[cmd].add_argument( - "-c", - "--config", - required=False, - dest="config", - help=f"A path to the refgenie config file (YAML). If not provided, the " - f"first available environment variable among: " - f"'{', '.join(CFG_ENV_VARS)}' will be used if set. " - f"Currently: {env_var_val}", - ), + ( + sps[cmd].add_argument( + "-c", + "--config", + required=False, + dest="config", + help=f"A path to the refgenie config file (YAML). If not provided, the " + f"first available environment variable among: " + f"'{', '.join(CFG_ENV_VARS)}' will be used if set. " + f"Currently: {env_var_val}", + ), + ) sps[cmd].add_argument( "-d", "--dbg", @@ -110,12 +121,14 @@ def add_subparser(cmd, description): return parser -def preprocess_attrs(attrs): - """ - Based on the CHANGED_KEYS mapping (new_key:old_key), rename the keys in the provided one +def preprocess_attrs(attrs: dict) -> dict: + """Rename keys based on the CHANGED_KEYS mapping (new_key:old_key). - :param yacman.YacAttMap attrs: mapping to process - :return yacman.YacAttMap: mapping with renamed key names + Args: + attrs: Mapping to process. + + Returns: + Mapping with renamed key names. """ from copy import deepcopy @@ -127,12 +140,14 @@ def preprocess_attrs(attrs): return attrs_cpy -def get_openapi_version(app): - """ - Get the OpenAPI version from the OpenAPI description JSON +def get_openapi_version(app: FastAPI) -> str: + """Get the OpenAPI version from the OpenAPI description JSON. - :param fastapi.FastAPI app: app object - :return str: openAPI version in use + Args: + app: FastAPI app object. + + Returns: + The openAPI version in use. """ try: return app.openapi()["openapi"] @@ -142,25 +157,29 @@ def get_openapi_version(app): def get_datapath_for_genome( - rgc, fill_dict, pth_templ="{base}/{genome}/{file_name}", remote_key=None -): - """ - Get the path to the data file to serve. - - Depending on the remote URL base being set or not, the function will return - either a remote URL to the file or a file path along with a flag indicating - the source - - :param refgenconf.RefGenConf rgc: configuration object to use - :param dict fill_dict: a dictionary to use to fill in the path template - :param str pth_templ: the path template - :return (str, bool): a pair of file source and the flag indicating whether - the source is remote + rgc: RefGenConf, + fill_dict: dict[str, str], + pth_templ: str = "{base}/{genome}/{file_name}", + remote_key: str | None = None, +) -> tuple[str, bool]: + """Get the path to the data file to serve. + + Depending on the remote URL base being set or not, returns either a remote + URL to the file or a file path along with a flag indicating the source. + + Args: + rgc: Configuration object to use. + fill_dict: Dictionary to fill in the path template. + pth_templ: The path template. + remote_key: Key identifying the remote data provider. + + Returns: + A pair of (file source, is_remote flag). """ req_keys = [i[1] for i in Formatter().parse(pth_templ) if i[1] is not None] - assert all( - [k in req_keys for k in list(fill_dict.keys())] - ), f"Only the these keys are allowed in the fill_dict: {req_keys}" + assert all([k in req_keys for k in list(fill_dict.keys())]), ( + f"Only the these keys are allowed in the fill_dict: {req_keys}" + ) fill_dict.update({"base": BASE_DIR}) # fill_dict.update({"base": rgc["genome_archive_folder"]}) remote = is_data_remote(rgc) @@ -181,13 +200,17 @@ def get_datapath_for_genome( return pth_templ.format(**fill_dict), remote -def is_data_remote(rgc): - """ - Determine if server genome config defines a 'remotes' key, 'http is one of them and - additionally assert the correct structure -- 'prefix' key defined. +def is_data_remote(rgc: RefGenConf) -> bool: + """Determine if the server genome config defines a remote data source. + + Checks for a 'remotes' key with correct structure (each remote has a + 'prefix' key defined). - :param refgenconf.RefGenConf rgc: server genome config object - :return bool: whether remote data source is configured + Args: + rgc: Server genome config object. + + Returns: + Whether a remote data source is configured. """ return ( True @@ -203,16 +226,17 @@ def is_data_remote(rgc): ) -def purge_nonservable(rgc): - """ - Remove entries in RefGenConf object that were not processed by the archiver - and should not be served +def purge_nonservable(rgc: RefGenConf) -> RefGenConf: + """Remove entries not processed by the archiver that should not be served. - :param refgenconf.RefGenConf rgc: object to check - :return refgenconf.RefGenConf: object with just the servable entries + Args: + rgc: Configuration object to check. + + Returns: + The configuration object with only servable entries. """ - def _check_servable(rgc, genome, asset, tag): + def _check_servable(rgc: RefGenConf, genome: str, asset: str, tag: str) -> bool: tag_data = rgc[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset][ CFG_ASSET_TAGS_KEY ][tag] @@ -220,25 +244,52 @@ def _check_servable(rgc, genome, asset, tag): [r in tag_data for r in [CFG_ARCHIVE_CHECKSUM_KEY, CFG_ARCHIVE_SIZE_KEY]] ) + # Collect items to remove (don't modify during iteration) + to_remove = [] + for genome_name, genome in rgc[CFG_GENOMES_KEY].items(): + if CFG_ASSETS_KEY not in genome: + continue for asset_name, asset in genome[CFG_ASSETS_KEY].items(): - try: - for tag_name, tag in asset[CFG_ASSET_TAGS_KEY].items(): - if not _check_servable(rgc, genome_name, asset_name, tag_name): - _LOGGER.debug( - "Removing '{}/{}:{}', it's not servable".format( - genome_name, asset_name, tag_name - ) + if CFG_ASSET_TAGS_KEY not in asset: + to_remove.append((genome_name, asset_name, None)) + continue + for tag_name in list(asset[CFG_ASSET_TAGS_KEY].keys()): + if not _check_servable(rgc, genome_name, asset_name, tag_name): + _LOGGER.debug( + "Removing '{}/{}:{}', it's not servable".format( + genome_name, asset_name, tag_name ) - rgc.cfg_remove_assets(genome_name, asset_name, tag_name) - except KeyError: - rgc.cfg_remove_assets(genome_name, asset_name) + ) + to_remove.append((genome_name, asset_name, tag_name)) + + # Remove after iteration completes + for genome_name, asset_name, tag_name in to_remove: + try: + rgc.cfg_remove_assets(genome_name, asset_name, tag_name) + except (KeyError, Exception): + _LOGGER.debug(f"Could not remove {genome_name}/{asset_name}:{tag_name}") + return rgc -def safely_get_example(rgc, entity, rgc_method, default, **kwargs): +def safely_get_example( + rgc: RefGenConf, entity: str, rgc_method: str, default: str, **kwargs: Any +) -> str: + """Safely get an example value from the config, falling back to a default. + + Args: + rgc: Configuration object. + entity: Description of the entity for logging. + rgc_method: Name of the method to call on rgc. + default: Fallback value if the method call fails. + **kwargs: Additional keyword arguments passed to the method. + + Returns: + The first result element (if list) or the result itself, or the default. + """ try: - res = rgc.__getattr__(rgc_method)(**kwargs) + res = getattr(rgc, rgc_method)(**kwargs) return res[0] if isinstance(res, list) else res except Exception as e: _LOGGER.warning( @@ -248,13 +299,29 @@ def safely_get_example(rgc, entity, rgc_method, default, **kwargs): return default -def create_asset_file_path(rgc, genome, asset, tag, seek_key, remote_key="http"): - """ - Construct a path to an unarchived asset file - - :param str genome: - :param str asset: - :param str tag: +def create_asset_file_path( + rgc: RefGenConf, + genome: str, + asset: str, + tag: str | None, + seek_key: str, + remote_key: str = "http", +) -> str: + """Construct a path to an unarchived asset file. + + Args: + rgc: Configuration object. + genome: Genome name. + asset: Asset name. + tag: Tag name. + seek_key: Seek key name. + remote_key: Remote data provider key. + + Returns: + Path to the asset file. + + Raises: + HTTPException: If the asset or seek key is not found. """ tag = tag or rgc.get_default_tag( genome, asset @@ -265,7 +332,9 @@ def create_asset_file_path(rgc, genome, asset, tag, seek_key, remote_key="http") msg = MSG_404.format(f"asset ({genome}/{asset}:{tag})") _LOGGER.warning(msg) raise HTTPException(status_code=404, detail=msg) - tag_dict = rgc.genomes[genome][CFG_ASSETS_KEY][asset][CFG_ASSET_TAGS_KEY][tag] + tag_dict = rgc[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset][CFG_ASSET_TAGS_KEY][ + tag + ] if seek_key not in tag_dict[CFG_SEEK_KEYS_KEY]: msg = MSG_404.format(f"seek_key ({genome}/{asset}.{seek_key}:{tag})") _LOGGER.warning(msg) @@ -283,15 +352,24 @@ def create_asset_file_path(rgc, genome, asset, tag, seek_key, remote_key="http") return path -def serve_file_for_asset(rgc, genome, asset, tag, template): - """ - Serve a file, like log file +def serve_file_for_asset( + rgc: RefGenConf, genome: str, asset: str, tag: str | None, template: str +) -> Response: + """Serve a file, like a build log. + + Args: + rgc: Configuration object. + genome: Genome name. + asset: Asset name. + tag: Tag name. + template: File name template with placeholders for asset and tag names, + e.g. 'build_log_{}__{}.md'. + + Returns: + A RedirectResponse for remote files, or a FileResponse for local files. - :param str genome: genome name - :param str asset: asset name - :param str tag: tag name - :param ste template: file name template with place for asset and tag names, - e.g. 'build_log_{}__{}.md' + Raises: + HTTPException: If the file is not found. """ # returns 'default' for nonexistent genome/asset; no need to catch tag = tag or rgc.get_default_tag(genome, asset) @@ -313,15 +391,24 @@ def serve_file_for_asset(rgc, genome, asset, tag, template): raise HTTPException(status_code=404, detail=msg) -def serve_json_for_asset(rgc, genome, asset, tag, template): - """ - Serve a JSON object, like recipe or asset dir contents for an asset +def serve_json_for_asset( + rgc: RefGenConf, genome: str, asset: str, tag: str | None, template: str +) -> Response: + """Serve a JSON object, like a recipe or asset directory contents. + + Args: + rgc: Configuration object. + genome: Genome name. + asset: Asset name. + tag: Tag name. + template: File name template with placeholders for asset and tag names, + e.g. 'build_recipe_{}__{}.json'. - :param str genome: genome name - :param str asset: asset name - :param str tag: tag name - :param ste template: file name template with place for asset and tag names, - e.g. 'build_recipe_{}__{}.json' + Returns: + A RedirectResponse for remote files, or a JSONResponse for local files. + + Raises: + HTTPException: If the file is not found. """ # returns 'default' for nonexistent genome/asset; no need to catch tag = tag or rgc.get_default_tag(genome, asset) @@ -343,15 +430,22 @@ def serve_json_for_asset(rgc, genome, asset, tag, template): raise HTTPException(status_code=404, detail=msg) -def get_asset_dir_contents(rgc, genome, asset, tag): - """ - Get the asset directory contents into a list +def get_asset_dir_contents( + rgc: RefGenConf, genome: str, asset: str, tag: str | None +) -> list: + """Get the asset directory contents as a list. + + Args: + rgc: Configuration object. + genome: Genome name. + asset: Asset name. + tag: Tag name. + + Returns: + List of files in the asset directory. - :param refgenconf.RefGenConf rgc: config - :param str genome: genome name - :param str asset: asset name - :param str tag: tag name - :return list[str]: list of files in the asset directory + Raises: + TypeError: If the path is neither a valid URL nor an existing file. """ # returns 'default' for nonexistent genome/asset; no need to catch tag = tag or rgc.get_default_tag(genome, asset) diff --git a/refgenieserver/main.py b/refgenieserver/main.py index 2cde651..e3c821c 100644 --- a/refgenieserver/main.py +++ b/refgenieserver/main.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import sys import logmuse @@ -24,7 +26,8 @@ templates.env.filters["os_path_join"] = lambda paths: os.path.join(*paths) -def main(): +def main() -> None: + """Entry point for the refgenieserver CLI.""" global rgc, _LOGGER parser = build_parser() args = parser.parse_args() @@ -39,13 +42,13 @@ def main(): ) _LOGGER = logmuse.setup_logger(**logger_args) selected_cfg = select_genome_config(args.config) - assert ( - selected_cfg is not None - ), "You must provide a config file or set the {} environment variable".format( - "or ".join(CFG_ENV_VARS) + assert selected_cfg is not None, ( + "You must provide a config file or set the {} environment variable".format( + "or ".join(CFG_ENV_VARS) + ) ) # this RefGenConf object will be used in the server, so it's read-only - rgc = RefGenConf(filepath=selected_cfg, writable=False) + rgc = RefGenConf.from_yaml_file(selected_cfg) if args.command == "archive": arp = ( [parse_registry_path(x) for x in args.asset_registry_paths] @@ -55,10 +58,12 @@ def main(): archive(rgc, arp, args.force, args.remove, selected_cfg, args.genomes_desc) elif args.command == "serve": # the router imports need to be after the RefGenConf object is declared - with rgc as r: - purge_nonservable(r) + purge_nonservable(rgc) from .routers import private, version1, version2, version3 + # v3 is registered at both root (latest/default API) and /v3 (versioned). + # This intentional dual-registration causes harmless "Duplicate Operation ID" + # warnings from FastAPI. These only affect OpenAPI codegen tools, not API usage. app.include_router(version3.router) app.include_router(version1.router, prefix="/v1") app.include_router(version2.router, prefix="/v2") diff --git a/refgenieserver/routers/private.py b/refgenieserver/routers/private.py index 9f6dd64..5b9223a 100644 --- a/refgenieserver/routers/private.py +++ b/refgenieserver/routers/private.py @@ -1,8 +1,10 @@ +from __future__ import annotations + from fastapi import APIRouter from ..const import * from ..data_models import Dict, Genome -from ..main import _LOGGER, app, rgc, templates +from ..main import _LOGGER, rgc router = APIRouter() @@ -15,9 +17,7 @@ operation_id=PRIVATE_API + API_ID_GENOMES_DICT, response_model=Dict[str, Genome], ) -async def get_genomes_dict(): - """ - **Private endpoint**, which returns the entire 'genomes' part of the config - """ +async def get_genomes_dict() -> dict: + """Return the entire 'genomes' section of the config (private endpoint).""" _LOGGER.info(f"serving genomes dict: '{rgc[CFG_GENOMES_KEY]}'") return rgc[CFG_GENOMES_KEY] diff --git a/refgenieserver/routers/version1.py b/refgenieserver/routers/version1.py index bc123f3..3785ca0 100644 --- a/refgenieserver/routers/version1.py +++ b/refgenieserver/routers/version1.py @@ -1,9 +1,11 @@ +from __future__ import annotations + from copy import copy from fastapi import APIRouter, HTTPException from refgenconf.helpers import replace_str_in_obj from starlette.requests import Request -from starlette.responses import FileResponse, RedirectResponse +from starlette.responses import FileResponse, RedirectResponse, Response from ..const import * from ..helpers import get_datapath_for_genome, get_openapi_version, preprocess_attrs @@ -16,10 +18,8 @@ @router.get("/", tags=api_version_tags) @router.get("/index", tags=api_version_tags) -async def index(request: Request): - """ - Returns a landing page HTML with the server resources ready do download. No inputs required. - """ +async def index(request: Request) -> Response: + """Return a landing page HTML with the server resources ready to download.""" _LOGGER.debug("RefGenConf object:\n{}".format(rgc)) templ_vars = { "request": request, @@ -32,31 +32,30 @@ async def index(request: Request): @router.get("/genomes", tags=api_version_tags) -def list_available_genomes(): - """ - Returns a list of genomes this server holds at least one asset for. No inputs required. - """ +def list_available_genomes() -> list[str]: + """Return a list of genomes this server holds at least one asset for.""" _LOGGER.info("serving genomes string: '{}'".format(rgc.genomes_str())) return rgc.genomes_list() @router.get("/assets", tags=api_version_tags) -def list_available_assets(): - """ - Returns a list of all assets that can be downloaded. No inputs required. - """ +def list_available_assets() -> dict: + """Return a list of all assets that can be downloaded.""" ret_dict = rgc.list(include_tags=True) _LOGGER.info("serving assets dict: {}".format(ret_dict)) return ret_dict @router.get("/asset/{genome}/{asset}/archive", tags=api_version_tags) -async def download_asset(genome: str, asset: str, tag: str = None): - """ - Returns an archive. Requires the genome name and the asset name as an input. +async def download_asset(genome: str, asset: str, tag: str | None = None) -> Response: + """Return an asset archive. + + Since tags were introduced, the default tag is selected behind the scenes. - Since the refgenconf.RefGenConf object structure has changed (tags were introduced), - the default tag has to be selected behind the scenes + Args: + genome: Genome name. + asset: Asset name. + tag: Tag name (default tag used if not specified). """ tag = tag or rgc.get_default_tag( genome, asset @@ -86,13 +85,14 @@ async def download_asset(genome: str, asset: str, tag: str = None): @router.get("/asset/{genome}/{asset}", tags=api_version_tags) -def download_asset_attributes(genome: str, asset: str): - """ - Returns a dictionary of asset attributes, like archive size, archive checksum etc. - Requires the genome name and the asset name as an input. +def download_asset_attributes(genome: str, asset: str) -> dict: + """Return a dictionary of asset attributes (archive size, checksum, etc.). - Since the refgenconf.RefGenConf object structure has changed (tags were introduced), - the default tag has to be selected behind the scenes + Since tags were introduced, the default tag is selected behind the scenes. + + Args: + genome: Genome name. + asset: Asset name. """ try: attrs = preprocess_attrs( @@ -126,9 +126,11 @@ def download_asset_attributes(genome: str, asset: str): @router.get("/genomes/{asset}", tags=api_version_tags) -def list_genomes_by_asset(asset: str): - """ - Returns a list of genomes that have the requested asset defined. Requires the asset name as an input. +def list_genomes_by_asset(asset: str) -> list[str]: + """Return a list of genomes that have the requested asset defined. + + Args: + asset: Asset name. """ genomes = rgc.list_genomes_by_asset(asset) _LOGGER.info("serving genomes by '{}' asset: {}".format(asset, genomes)) diff --git a/refgenieserver/routers/version2.py b/refgenieserver/routers/version2.py index ef69abf..9959309 100644 --- a/refgenieserver/routers/version2.py +++ b/refgenieserver/routers/version2.py @@ -1,10 +1,12 @@ +from __future__ import annotations + from copy import copy from fastapi import APIRouter, HTTPException from refgenconf.helpers import replace_str_in_obj from refgenconf.refgenconf import map_paths_by_id from starlette.requests import Request -from starlette.responses import FileResponse, JSONResponse, RedirectResponse +from starlette.responses import FileResponse, JSONResponse, RedirectResponse, Response from ubiquerg import parse_registry_path from ..const import * @@ -18,10 +20,8 @@ @router.get("/", tags=api_version_tags) @router.get("/index", tags=api_version_tags) -async def index(request: Request): - """ - Returns a landing page HTML with the server resources ready do download. No inputs required. - """ +async def index(request: Request) -> Response: + """Return a landing page HTML with the server resources ready to download.""" _LOGGER.debug("RefGenConf object:\n{}".format(rgc)) templ_vars = { "request": request, @@ -34,9 +34,16 @@ async def index(request: Request): @router.get("/asset/{genome}/{asset}/splash", tags=api_version_tags) -async def asset_splash_page(request: Request, genome: str, asset: str, tag: str = None): - """ - Returns an asset splash page +async def asset_splash_page( + request: Request, genome: str, asset: str, tag: str | None = None +) -> Response: + """Return an asset splash page. + + Args: + request: The incoming request. + genome: Genome name. + asset: Asset name. + tag: Tag name (default tag used if not specified). """ tag = tag or rgc.get_default_tag( genome, asset @@ -61,19 +68,15 @@ async def asset_splash_page(request: Request, genome: str, asset: str, tag: str @router.get("/genomes", tags=api_version_tags) -async def list_available_genomes(): - """ - Returns a list of genomes this server holds at least one asset for. No inputs required. - """ +async def list_available_genomes() -> list[str]: + """Return a list of genomes this server holds at least one asset for.""" _LOGGER.info("serving genomes string: '{}'".format(rgc.genomes_str())) return rgc.genomes_list() @router.get("/assets", operation_id=API_ID_ASSETS, tags=api_version_tags) -async def list_available_assets(): - """ - Returns a list of all assets that can be downloaded. No inputs required. - """ +async def list_available_assets() -> dict: + """Return a list of all assets that can be downloaded.""" ret_dict = rgc.list(include_tags=True) _LOGGER.info("serving assets dict: {}".format(ret_dict)) return ret_dict @@ -84,11 +87,13 @@ async def list_available_assets(): operation_id=API_ID_ARCHIVE, tags=api_version_tags, ) -async def download_asset(genome: str, asset: str, tag: str = None): - """ - Returns an archive. Requires the genome name and the asset name as an input. +async def download_asset(genome: str, asset: str, tag: str | None = None) -> Response: + """Return an asset archive. - Optionally, 'tag' query parameter can be specified to get a tagged asset archive. Default tag is returned otherwise. + Args: + genome: Genome name. + asset: Asset name. + tag: Tag name (default tag used if not specified). """ tag = tag or rgc.get_default_tag( genome, asset @@ -122,9 +127,12 @@ async def download_asset(genome: str, asset: str, tag: str = None): operation_id=API_ID_DEFAULT_TAG, tags=api_version_tags, ) -async def get_asset_default_tag(genome: str, asset: str): - """ - Returns the default tag name. Requires genome name and asset name as an input. +async def get_asset_default_tag(genome: str, asset: str) -> str: + """Return the default tag name. + + Args: + genome: Genome name. + asset: Asset name. """ return rgc.get_default_tag(genome, asset) @@ -134,9 +142,13 @@ async def get_asset_default_tag(genome: str, asset: str): operation_id=API_ID_DIGEST, tags=api_version_tags, ) -async def get_asset_digest(genome: str, asset: str, tag: str): - """ - Returns the asset digest. Requires genome name asset name and tag name as an input. +async def get_asset_digest(genome: str, asset: str, tag: str) -> str: + """Return the asset digest. + + Args: + genome: Genome name. + asset: Asset name. + tag: Tag name. """ try: return rgc[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset][CFG_ASSET_TAGS_KEY][ @@ -155,9 +167,13 @@ async def get_asset_digest(genome: str, asset: str, tag: str): operation_id=API_ID_ARCHIVE_DIGEST, tags=api_version_tags, ) -async def get_archive_digest(genome: str, asset: str, tag: str): - """ - Returns the archive digest. Requires genome name asset name and tag name as an input. +async def get_archive_digest(genome: str, asset: str, tag: str) -> str: + """Return the archive digest. + + Args: + genome: Genome name. + asset: Asset name. + tag: Tag name. """ try: return rgc[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset][CFG_ASSET_TAGS_KEY][ @@ -174,11 +190,15 @@ async def get_archive_digest(genome: str, asset: str, tag: str): @router.get( "/asset/{genome}/{asset}/log", operation_id=API_ID_LOG, tags=api_version_tags ) -async def download_asset_build_log(genome: str, asset: str, tag: str = None): - """ - Returns a build log. Requires the genome name and the asset name as an input. - - Optionally, 'tag' query parameter can be specified to get a tagged asset archive. Default tag is returned otherwise. +async def download_asset_build_log( + genome: str, asset: str, tag: str | None = None +) -> Response: + """Return a build log. + + Args: + genome: Genome name. + asset: Asset name. + tag: Tag name (default tag used if not specified). """ tag = tag or rgc.get_default_tag( genome, asset @@ -209,11 +229,15 @@ async def download_asset_build_log(genome: str, asset: str, tag: str = None): @router.get( "/asset/{genome}/{asset}/recipe", operation_id=API_ID_RECIPE, tags=api_version_tags ) -async def download_asset_build_recipe(genome: str, asset: str, tag: str = None): - """ - Returns a build recipe. Requires the genome name and the asset name as an input. - - Optionally, 'tag' query parameter can be specified to get a tagged asset archive. Default tag is returned otherwise. +async def download_asset_build_recipe( + genome: str, asset: str, tag: str | None = None +) -> Response: + """Return a build recipe. + + Args: + genome: Genome name. + asset: Asset name. + tag: Tag name (default tag used if not specified). """ tag = tag or rgc.get_default_tag( genome, asset @@ -246,11 +270,15 @@ async def download_asset_build_recipe(genome: str, asset: str, tag: str = None): @router.get( "/asset/{genome}/{asset}", operation_id=API_ID_ASSET_ATTRS, tags=api_version_tags ) -async def download_asset_attributes(genome: str, asset: str, tag: str = None): - """ - Returns a dictionary of asset attributes, like archive size, archive digest etc. - Requires the genome name and the asset name as an input. - Optionally, 'tag' query parameter can be specified to get a tagged asset attributes. +async def download_asset_attributes( + genome: str, asset: str, tag: str | None = None +) -> dict: + """Return a dictionary of asset attributes (archive size, digest, etc.). + + Args: + genome: Genome name. + asset: Asset name. + tag: Tag name (default tag used if not specified). """ tag = tag or rgc.get_default_tag( genome, asset @@ -285,9 +313,11 @@ async def download_asset_attributes(genome: str, asset: str, tag: str = None): @router.get("/genome/{genome}/genome_digest", tags=api_version_tags) -async def download_genome_digest(genome: str): - """ - Returns the genome digest. Requires the genome name as an input +async def download_genome_digest(genome: str) -> str: + """Return the genome digest. + + Args: + genome: Genome name. """ try: digest = rgc.get_genome_alias_digest(alias=genome) @@ -300,10 +330,11 @@ async def download_genome_digest(genome: str): @router.get("/genome/{genome}", operation_id=API_ID_GENOME_ATTRS, tags=api_version_tags) -async def download_genome_attributes(genome: str): - """ - Returns a dictionary of genome attributes, like archive size, archive digest etc. - Requires the genome name name as an input. +async def download_genome_attributes(genome: str) -> dict: + """Return a dictionary of genome attributes (archive size, digest, etc.). + + Args: + genome: Genome name. """ try: attrs = rgc.get_genome_attributes(genome) @@ -318,9 +349,11 @@ async def download_genome_attributes(genome: str): @router.get("/genomes/{asset}", tags=api_version_tags) -async def list_genomes_by_asset(asset: str): - """ - Returns a list of genomes that have the requested asset defined. Requires the asset name as an input. +async def list_genomes_by_asset(asset: str) -> list[str]: + """Return a list of genomes that have the requested asset defined. + + Args: + asset: Asset name. """ genomes = rgc.list_genomes_by_asset(asset) _LOGGER.info("serving genomes by '{}' asset: {}".format(asset, genomes)) diff --git a/refgenieserver/routers/version3.py b/refgenieserver/routers/version3.py index 0a9568d..0049d8b 100644 --- a/refgenieserver/routers/version3.py +++ b/refgenieserver/routers/version3.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from copy import copy from datetime import date from enum import Enum @@ -8,7 +10,7 @@ from starlette.requests import Request from starlette.responses import FileResponse, RedirectResponse from ubiquerg import parse_registry_path -from yacman import IK, UndefinedAliasError +from yacman import UndefinedAliasError from ..const import * from ..data_models import Dict, List, Tag @@ -48,41 +50,41 @@ g = Path( ..., description="Genome digest", - regex=r"^\w+$", + pattern=r"^\w+$", max_length=48, min_length=48, - example=ex_digest, + examples={"default": ex_digest}, ) al = Path( ..., description="Genome alias", - regex=r"^\S+$", - example=ex_alias, + pattern=r"^\S+$", + examples={"default": ex_alias}, ) a = Path( ..., description="Asset name", - regex=r"^\S+$", - example=ex_asset, + pattern=r"^\S+$", + examples={"default": ex_asset}, ) s = Path( ..., description="Seek key name", - regex=r"^\S+$", - example=ex_asset, + pattern=r"^\S+$", + examples={"default": ex_asset}, ) t = Path( ..., description="Tag name", - regex=r"^\S+$", - example=DEFAULT_TAG, + pattern=r"^\S+$", + examples={"default": DEFAULT_TAG}, ) # API query parameter definitions tq = Query( None, description="Tag name", - regex=r"^\S+$", + pattern=r"^\S+$", ) api_version_tags = [API3_ID] @@ -91,11 +93,8 @@ @router.get("/", tags=api_version_tags) @router.get("/index", tags=api_version_tags) -async def index(request: Request): - """ - Returns a landing page HTML with the server resources ready do download. - No inputs required. - """ +async def index(request: Request) -> Response: + """Return a landing page HTML with the server resources ready to download.""" _LOGGER.debug(f"RefGenConf object:\n{rgc}") templ_vars = { "request": request, @@ -111,18 +110,14 @@ async def index(request: Request): @router.get( "/remotes/dict", tags=api_version_tags, response_model=Dict[str, Dict[str, str]] ) -async def get_remotes_dict(): - """ - Returns the remotes section of the server configuration file - """ +async def get_remotes_dict() -> dict[str, dict[str, str]] | None: + """Return the remotes section of the server configuration file.""" return rgc["remotes"] if "remotes" in rgc else None @router.get("/genomes/splash/{genome}", tags=api_version_tags) -async def genome_splash_page(request: Request, genome: str = g): - """ - Returns a genome splash page - """ +async def genome_splash_page(request: Request, genome: str = g) -> Response: + """Return a genome splash page.""" templ_vars = { "openapi_version": get_openapi_version(app), "genome": genome, @@ -146,10 +141,8 @@ async def genome_splash_page(request: Request, genome: str = g): @router.get("/assets/splash/{genome}/{asset}", tags=api_version_tags) async def asset_splash_page( request: Request, genome: str = g, asset: str = a, tag: Optional[str] = tq -): - """ - Returns an asset splash page - """ +) -> Response: + """Return an asset splash page.""" tag = tag or rgc.get_default_tag( genome, asset ) # returns 'default' for nonexistent genome/asset; no need to catch @@ -202,12 +195,10 @@ async def asset_splash_page( @router.get("/genomes/list", response_model=List[str], tags=api_version_tags) -async def list_available_genomes(): - """ - Returns a list of **genome digests** this server serves at least one asset for. - """ +async def list_available_genomes() -> list[str]: + """Return a list of genome digests this server serves at least one asset for.""" _LOGGER.info("serving genomes list") - return list(rgc.genomes[IK]["aliases_raw"].keys()) + return list(rgc[CFG_GENOMES_KEY].keys()) @router.get( @@ -216,12 +207,13 @@ async def list_available_genomes(): tags=api_version_tags, operation_id=API_VERSION + API_ID_ALIASES_DICT, ) -async def get_alias_dict(): - """ - Returns a dictionary of lists of aliases keyed by the respective genome digests. - """ +async def get_alias_dict() -> dict[str, list[str]]: + """Return a dictionary of alias lists keyed by genome digests.""" _LOGGER.info("serving genomes alias dict") - return rgc.genomes[IK]["aliases_raw"] + return { + g: rgc[CFG_GENOMES_KEY][g].get(CFG_ALIASES_KEY, []) + for g in rgc[CFG_GENOMES_KEY].keys() + } @router.get( @@ -233,11 +225,9 @@ async def get_alias_dict(): async def list_available_assets( includeSeekKeys: Optional[bool] = Query( False, description="Whether to include seek keys in the response" - ) -): - """ - Returns a list of assets that can be downloaded, keyed by the respective genome digests. - """ + ), +) -> dict[str, list[str]]: + """Return a list of assets that can be downloaded, keyed by genome digests.""" ret_dict = ( rgc.list(include_tags=True) if includeSeekKeys else rgc.list_assets_by_genome() ) @@ -254,12 +244,13 @@ async def list_available_assets( operation_id=API_VERSION + API_ID_ARCHIVE, tags=api_version_tags, ) -async def download_asset(genome: str = g, asset: str = a, tag: Optional[str] = tq): - """ - Returns an archive. Requires the genome name and the asset name as an input. +async def download_asset( + genome: str = g, asset: str = a, tag: Optional[str] = tq +) -> Response: + """Return an asset archive. - Optionally, 'tag' query parameter can be specified to get a tagged asset archive. - Default tag is returned otherwise. + Optionally, 'tag' query parameter can be specified to get a tagged asset + archive. Default tag is returned otherwise. """ tag = tag or rgc.get_default_tag( genome, asset @@ -297,10 +288,8 @@ async def get_asset_file_path( remoteClass: RemoteClassEnum = Query( "http", description="Remote data provider class" ), -): - """ - Returns a path to the unarchived asset file. - Requires a genome name, an asset name and a seek_key name as an input. +) -> Response: + """Return a path to the unarchived asset file. Optionally, query parameters can be specified: @@ -326,10 +315,8 @@ async def get_asset_file_path( response_model=str, tags=api_version_tags, ) -async def get_asset_default_tag(genome: str = g, asset: str = a): - """ - Returns the default tag name. Requires genome name and asset name as an input. - """ +async def get_asset_default_tag(genome: str = g, asset: str = a) -> Response: + """Return the default tag name for a genome/asset pair.""" return Response(content=rgc.get_default_tag(genome, asset), media_type="text/plain") @@ -339,10 +326,10 @@ async def get_asset_default_tag(genome: str = g, asset: str = a): response_model=str, tags=api_version_tags, ) -async def get_asset_digest(genome: str = g, asset: str = a, tag: Optional[str] = tq): - """ - Returns the asset digest. Requires genome name asset name and tag name as an input. - """ +async def get_asset_digest( + genome: str = g, asset: str = a, tag: Optional[str] = tq +) -> Response: + """Return the asset digest for a genome/asset:tag combination.""" tag = tag or DEFAULT_TAG try: return Response( @@ -363,10 +350,10 @@ async def get_asset_digest(genome: str = g, asset: str = a, tag: Optional[str] = response_model=str, tags=api_version_tags, ) -async def get_archive_digest(genome: str = g, asset: str = a, tag: Optional[str] = tq): - """ - Returns the archive digest. Requires genome name asset name and tag name as an input. - """ +async def get_archive_digest( + genome: str = g, asset: str = a, tag: Optional[str] = tq +) -> Response: + """Return the archive digest for a genome/asset:tag combination.""" tag = tag or DEFAULT_TAG try: return Response( @@ -388,12 +375,11 @@ async def get_archive_digest(genome: str = g, asset: str = a, tag: Optional[str] ) async def download_asset_build_recipe( genome: str = g, asset: str = a, tag: Optional[str] = tq -): - """ - Returns a build recipe. Requires the genome name and the asset name as an input. +) -> Response: + """Return a build recipe. - Optionally, 'tag' query parameter can be specified to get a tagged asset archive. - Default tag is returned otherwise. + Optionally, 'tag' query parameter can be specified. Default tag is returned + otherwise. """ return serve_json_for_asset( rgc=rgc, @@ -411,12 +397,11 @@ async def download_asset_build_recipe( ) async def download_asset_build_log( genome: str = g, asset: str = a, tag: Optional[str] = tq -): - """ - Returns a build log. Requires the genome name and the asset name as an input. +) -> Response: + """Return a build log. - Optionally, 'tag' query parameter can be specified to get a tagged asset archive. - Default tag is returned otherwise. + Optionally, 'tag' query parameter can be specified. Default tag is returned + otherwise. """ return serve_file_for_asset( rgc=rgc, @@ -434,13 +419,11 @@ async def download_asset_build_log( ) async def download_asset_directory_contents( genome: str = g, asset: str = a, tag: Optional[str] = tq -): - """ - Returns a asset directory tree file. - Requires the genome name and the asset name as an input. +) -> Response: + """Return an asset directory tree file. - Optionally, 'tag' query parameter can be specified to get a tagged asset archive. - Default tag is returned otherwise. + Optionally, 'tag' query parameter can be specified. Default tag is returned + otherwise. """ return serve_json_for_asset( rgc=rgc, @@ -459,11 +442,11 @@ async def download_asset_directory_contents( ) async def download_asset_attributes( genome: str = g, asset: str = a, tag: Optional[str] = tq -): - """ - Returns a dictionary of asset attributes, like archive size, archive digest etc. - Requires the genome name and the asset name as an input. - Optionally, 'tag' query parameter can be specified to get a tagged asset attributes. +) -> dict: + """Return a dictionary of asset attributes (archive size, digest, etc.). + + Optionally, 'tag' query parameter can be specified to get tagged asset + attributes. """ tag = tag or rgc.get_default_tag( genome, asset @@ -494,11 +477,8 @@ async def download_asset_attributes( response_model=Dict[str, str], tags=api_version_tags, ) -async def download_genome_attributes(genome: str = g): - """ - Returns a dictionary of genome attributes, like archive size, archive digest etc. - Requires the genome name name as an input. - """ +async def download_genome_attributes(genome: str = g) -> dict: + """Return a dictionary of genome attributes (archive size, digest, etc.).""" try: attrs = rgc.get_genome_attributes(genome) _LOGGER.info(f"attributes returned for genome '{genome}': \n{attrs}") @@ -512,11 +492,8 @@ async def download_genome_attributes(genome: str = g): @router.get( "/genomes/by_asset/{asset}", response_model=List[str], tags=api_version_tags ) -async def list_genomes_by_asset(asset: str = a): - """ - Returns a list of genomes that have the requested asset defined. - Requires the asset name as an input. - """ +async def list_genomes_by_asset(asset: str = a) -> list[str]: + """Return a list of genomes that have the requested asset defined.""" genomes = rgc.list_genomes_by_asset(asset) _LOGGER.info(f"serving genomes by '{asset}' asset: {genomes}") return genomes @@ -528,10 +505,8 @@ async def list_genomes_by_asset(asset: str = a): response_model=str, tags=api_version_tags, ) -async def get_genome_alias_digest(alias: str = al): - """ - Returns the genome digest. Requires the genome name as an input - """ +async def get_genome_alias_digest(alias: str = al) -> Response: + """Return the genome digest for a given alias.""" try: digest = rgc.get_genome_alias_digest(alias=alias) _LOGGER.info(f"digest returned for '{alias}': {digest}") @@ -543,20 +518,18 @@ async def get_genome_alias_digest(alias: str = al): @router.get( - "/genomes/aliases/{genome_digest}", + "/genomes/aliases/{genome}", operation_id=API_VERSION + API_ID_ALIAS_ALIAS, response_model=List[str], tags=api_version_tags, ) -async def get_genome_alias(genome_digest: str = g): - """ - Returns the genome digest. Requires the genome name as an input - """ +async def get_genome_alias(genome: str = g) -> list[str]: + """Return the genome aliases for a given digest.""" try: - alias = rgc[CFG_GENOMES_KEY][genome_digest][CFG_ALIASES_KEY] - _LOGGER.info(f"alias returned for '{genome_digest}': {alias}") + alias = rgc[CFG_GENOMES_KEY][genome][CFG_ALIASES_KEY] + _LOGGER.info(f"alias returned for '{genome}': {alias}") return alias except (KeyError, UndefinedAliasError): - msg = MSG_404.format(f"genome ({genome_digest})") + msg = MSG_404.format(f"genome ({genome})") _LOGGER.warning(msg) raise HTTPException(status_code=404, detail=msg) diff --git a/refgenieserver/server_builder.py b/refgenieserver/server_builder.py index d3c92ea..91a837f 100644 --- a/refgenieserver/server_builder.py +++ b/refgenieserver/server_builder.py @@ -1,10 +1,11 @@ +from __future__ import annotations + import logging import sys from glob import glob from json import dump from subprocess import run -from attmap import PathExAttMap as PXAM from refgenconf import RefGenConf from refgenconf.exceptions import ( ConfigNotCompliantError, @@ -14,6 +15,7 @@ ) from refgenconf.helpers import replace_str_in_obj, swap_names_in_tree from ubiquerg import checksum, is_command_callable, parse_registry_path, size +from yacman import write_lock from .const import * @@ -21,19 +23,27 @@ _LOGGER = logging.getLogger(PKG_NAME) -def archive(rgc, registry_paths, force, remove, cfg_path, genomes_desc): - """ - Takes the RefGenConf object and builds individual tar archives - that can be then served with 'refgenieserver serve'. Additionally determines their md5 checksums, file sizes and - updates the original refgenie config with these data. If the --asset and/or --genome options are used (specific - build is requested) the archiver will check for the existence of config file saved in the path provided in - `genome_server` in the original config and update it so that no archive metadata is lost - - :param RefGenConf rgc: configuration object with the data to build the servable archives for - :param list[dict] registry_paths: a collection of mappings that identifies the assets to update - :param bool force: whether to force the build of archive, regardless of its existence - :param bool remove: whether remove specified genome/asset:tag from the archive - :param str cfg_path: config file path +def archive( + rgc: RefGenConf, + registry_paths: list[dict] | None, + force: bool, + remove: bool, + cfg_path: str, + genomes_desc: str | None, +) -> None: + """Build tar archives for serving with 'refgenieserver serve'. + + Determines md5 checksums and file sizes and updates the original refgenie + config with these data. If specific assets/genomes are requested, checks + for the server config file and updates it to preserve archive metadata. + + Args: + rgc: Configuration object with data to build servable archives for. + registry_paths: Collection of mappings identifying assets to update. + force: Whether to force the build regardless of existence. + remove: Whether to remove specified genome/asset:tag from the archive. + cfg_path: Config file path. + genomes_desc: Path to CSV file with genome descriptions. """ if float(rgc[CFG_VERSION_KEY]) < float(REQ_CFG_VERSION): raise ConfigNotCompliantError( @@ -70,7 +80,7 @@ def archive(rgc, registry_paths, force, remove, cfg_path, genomes_desc): # make it RW compatible and point to new target path for server use or initialize a new object if os.path.exists(server_rgc_path): _LOGGER.debug(f"'{server_rgc_path}' file was found and will be updated") - rgc_server = RefGenConf(filepath=server_rgc_path) + rgc_server = RefGenConf.from_yaml_file(server_rgc_path) if remove: if not registry_paths: _LOGGER.error( @@ -78,8 +88,9 @@ def archive(rgc, registry_paths, force, remove, cfg_path, genomes_desc): "Use 'asset_registry_path' argument." ) exit(1) - with rgc_server as r: + with write_lock(rgc_server) as r: _remove_archive(r, registry_paths, CFG_ARCHIVE_KEY) + r.write() exit(0) else: if remove: @@ -88,9 +99,10 @@ def archive(rgc, registry_paths, force, remove, cfg_path, genomes_desc): ) exit(1) _LOGGER.debug(f"'{server_rgc_path}' file was not found and will be created") - rgc_server = RefGenConf(filepath=rgc.file_path) - rgc_server.make_writable(filepath=server_rgc_path) - rgc_server.make_readonly() + rgc_server = RefGenConf.from_yaml_file(rgc.file_path) + rgc_server.write_copy(server_rgc_path) + rgc_server.filepath = os.path.abspath(server_rgc_path) + rgc_server.locker.set_file_path(os.path.abspath(server_rgc_path)) if registry_paths: genomes = _get_paths_element(registry_paths, "namespace") asset_list = _get_paths_element(registry_paths, "item") @@ -141,12 +153,13 @@ def archive(rgc, registry_paths, force, remove, cfg_path, genomes_desc): CFG_GENOME_DESC_KEY: genome_desc, CFG_ALIASES_KEY: genome_aliases, } - with rgc_server as r: - r[CFG_GENOMES_KEY].setdefault(genome, PXAM()) + with write_lock(rgc_server) as r: + r[CFG_GENOMES_KEY].setdefault(genome, {}) r[CFG_GENOMES_KEY][genome].update(genome_attrs) + r.write() _LOGGER.debug(f"Updating '{genome}' genome attributes...") asset = asset_list[counter] if asset_list is not None else None - assets = asset or rgc[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY].keys() + assets = asset or list(rgc[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY].keys()) if not assets: _LOGGER.error("No assets found") continue @@ -164,13 +177,13 @@ def archive(rgc, registry_paths, force, remove, cfg_path, genomes_desc): CFG_ASSET_DEFAULT_TAG_KEY: default_tag, } _LOGGER.debug(f"Updating '{genome}/{asset_name}' asset attributes...") - with rgc_server as r: + with write_lock(rgc_server) as r: r.update_assets(genome, asset_name, asset_attrs) + r.write() tag = tag_list[counter] if tag_list is not None else None - tags = ( - tag - or rgc[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset_name][ + tags = tag or list( + rgc[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset_name][ CFG_ASSET_TAGS_KEY ].keys() ) @@ -254,7 +267,7 @@ def archive(rgc, registry_paths, force, remove, cfg_path, genomes_desc): {CFG_LEGACY_ARCHIVE_CHECKSUM_KEY: legacy_digest} ) _LOGGER.debug(f"attr dict: {tag_attrs}") - with rgc_server as r: + with write_lock(rgc_server) as r: for parent in parents: # here we update any pre-existing parents' children # attr with the newly added asset @@ -287,6 +300,7 @@ def archive(rgc, registry_paths, force, remove, cfg_path, genomes_desc): children=True, ) r.update_tags(genome, asset_name, tag_name, tag_attrs) + r.write() else: exists_msg = f"'{target_file}' exists." try: @@ -297,21 +311,23 @@ def archive(rgc, registry_paths, force, remove, cfg_path, genomes_desc): except KeyError: _LOGGER.debug(exists_msg + " Calculating archive digest") tag_attrs = {CFG_ARCHIVE_CHECKSUM_KEY: checksum(target_file)} - with rgc_server as r: + with write_lock(rgc_server) as r: r.update_tags(genome, asset_name, tag_name, tag_attrs) + r.write() counter += 1 _LOGGER.info(f"Builder finished; server config file saved: {rgc_server.file_path}") -def _check_tgz(path, output): - """ - Check if file exists and tar it. - If gzipping is requested, the pigz software is used if available. +def _check_tgz(path: str, output: str) -> None: + """Check if file exists and tar it, using pigz if available. - :param str path: path to the file to be tarred - :param str output: path to the result file - :raise OSError: if the file/directory meant to be archived does not exist + Args: + path: Path to the file to be tarred. + output: Path to the result file. + + Raises: + OSError: If the file/directory to be archived does not exist. """ pth, tag_name = os.path.split(path) if os.path.exists(path): @@ -329,18 +345,23 @@ def _check_tgz(path, output): raise OSError(f"Entity '{path}' does not exist") -def _check_tgz_legacy(path, output, asset_name, genome_name, alias): - """ - NOTE: This is a duplication of the _check_tgz function, kept separate as in - the future this step will be simply removed. +def _check_tgz_legacy( + path: str, output: str, asset_name: str, genome_name: str, alias: str | list[str] +) -> None: + """Legacy version of _check_tgz, to be removed in the future. - Check if file exists and tar it. - If gzipping is requested, the availability of pigz software is checked and used. + Checks if file exists and tars it with alias-based naming. Uses pigz + if available. - :param str path: path to the file to be tarred - :param str output: path to the result file - :param str asset_name: name of the asset - :raise OSError: if the file/directory meant to be archived does not exist + Args: + path: Path to the file to be tarred. + output: Path to the result file. + asset_name: Name of the asset. + genome_name: Genome digest name. + alias: Genome alias or list of aliases. + + Raises: + OSError: If the file/directory to be archived does not exist. """ # TODO: remove in the future if isinstance(alias, str): @@ -368,12 +389,14 @@ def _check_tgz_legacy(path, output, asset_name, genome_name, alias): raise OSError(f"Entity '{path}' does not exist") -def _copy_log(input_dir, target_dir, asset_name, tag_name): - """ - Copy the log file +def _copy_log(input_dir: str, target_dir: str, asset_name: str, tag_name: str) -> None: + """Copy the build log file. - :param str input_dir: path to the directory to copy the recipe from - :param str target_dir: path to the directory to copy the recipe to + Args: + input_dir: Path to the source directory. + target_dir: Path to the destination directory. + asset_name: Asset name. + tag_name: Tag name. """ log_path = f"{input_dir}/{BUILD_STATS_DIR}/{ORI_LOG_NAME}" if log_path and os.path.exists(log_path): @@ -389,12 +412,12 @@ def _copy_log(input_dir, target_dir, asset_name, tag_name): _LOGGER.warning(f"Log not found: {log_path}") -def _copy_asset_dir(input_dir, target_dir): - """ - Copy the asset directory +def _copy_asset_dir(input_dir: str, target_dir: str) -> None: + """Copy the asset directory via rsync. - :param str input_dir: path to the directory to copy the asset dir from - :param str target_dir: path to the directory to copy the asset dir to + Args: + input_dir: Path to the source directory. + target_dir: Path to the destination directory. """ if input_dir and os.path.exists(input_dir): run( @@ -406,13 +429,13 @@ def _copy_asset_dir(input_dir, target_dir): _LOGGER.warning(f"Asset directory not found: {input_dir}") -def _get_asset_dir_contents(asset_dir, asset_name, tag_name): - """ - Create a file tree with contents of the unarchived asset directory +def _get_asset_dir_contents(asset_dir: str, asset_name: str, tag_name: str) -> None: + """Create a JSON file listing the unarchived asset directory contents. - :param str asset_dir: path to the asset directory to get the contents of - :param str asset_name: name of the asset - :param str tag_name: name of the tag + Args: + asset_dir: Path to the asset directory. + asset_name: Name of the asset. + tag_name: Name of the tag. """ asset_dir_contents_file_path = os.path.join( os.path.dirname(asset_dir), @@ -432,14 +455,16 @@ def _get_asset_dir_contents(asset_dir, asset_name, tag_name): ) -def _copy_recipe(input_dir, target_dir, asset_name, tag_name): - """ - Copy the recipe +def _copy_recipe( + input_dir: str, target_dir: str, asset_name: str, tag_name: str +) -> None: + """Copy the build recipe file. - :param str input_dir: path to the directory to copy the recipe from - :param str target_dir: path to the directory to copy the recipe to - :param str asset_name: asset name - :param str tag_name: tag name + Args: + input_dir: Path to the source directory. + target_dir: Path to the destination directory. + asset_name: Asset name. + tag_name: Tag name. """ recipe_path = ( f"{input_dir}/{BUILD_STATS_DIR}/" @@ -452,15 +477,20 @@ def _copy_recipe(input_dir, target_dir, asset_name, tag_name): _LOGGER.warning(f"Recipe not found: {recipe_path}") -def _remove_archive(rgc, registry_paths, cfg_archive_folder_key=CFG_ARCHIVE_KEY): - """ - Remove archives and corresponding entries from the RefGenConf object +def _remove_archive( + rgc: RefGenConf, + registry_paths: list[dict], + cfg_archive_folder_key: str = CFG_ARCHIVE_KEY, +) -> list[str]: + """Remove archives and corresponding entries from the RefGenConf object. + + Args: + rgc: Configuration object to remove entries from. + registry_paths: Entries to remove. + cfg_archive_folder_key: Archive folder key in the genome config file. - :param refgenconf.RefGenConf rgc: object to remove the entries from - :param list[dict] registry_paths: entries to remove - :param str cfg_archive_folder_key: configuration archive folder key in the genome - configuration file - :return list[str]: removed file paths + Returns: + List of removed file paths. """ ret = [] for registry_path in _correct_registry_paths(registry_paths): @@ -505,21 +535,27 @@ def _remove_archive(rgc, registry_paths, cfg_archive_folder_key=CFG_ARCHIVE_KEY) return ret -def _correct_registry_paths(registry_paths): - """ - parse_registry_path function recognizes the 'item' as the central element of the asset registry path. - We require the 'namespace' to be the central one. Consequently, this function swaps them. +def _correct_registry_paths(registry_paths: list[dict]) -> list[dict]: + """Correct registry paths by swapping 'namespace' and 'item' keys. + + parse_registry_path recognizes 'item' as the central element, but we + require 'namespace' to be central. This function swaps them. + + Args: + registry_paths: Output of parse_registry_path. - :param list[dict] registry_paths: output of parse_registry_path - :return list[dict]: corrected registry paths + Returns: + Corrected registry paths. """ - def _swap(rp): - """ - Swaps dict values of 'namespace' with 'item' keys + def _swap(rp: dict) -> dict: + """Swap 'namespace' and 'item' values in a registry path dict. + + Args: + rp: Dict to swap values for. - :param dict rp: dict to swap values for - :return dict: dict with swapped values + Returns: + Dict with swapped values. """ rp["namespace"] = rp["item"] rp["item"] = None @@ -528,12 +564,14 @@ def _swap(rp): return [_swap(x) if x["namespace"] is None else x for x in registry_paths] -def _get_paths_element(registry_paths, element): - """ - Extract the specific element from a collection of registry paths +def _get_paths_element(registry_paths: list[dict], element: str) -> list[str | None]: + """Extract a specific element from a collection of registry paths. + + Args: + registry_paths: Output of parse_registry_path. + element: One of 'protocol', 'namespace', 'item', or 'tag'. - :param list[dict] registry_paths: output of parse_registry_path - :param str element: 'protocol', 'namespace', 'item' or 'tag' - :return list[str]: extracted elements + Returns: + List of extracted elements. """ return [x[element] for x in _correct_registry_paths(registry_paths)] diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt deleted file mode 100644 index e353ac6..0000000 --- a/requirements/requirements-all.txt +++ /dev/null @@ -1,7 +0,0 @@ -aiofiles -fastapi -jinja2 -logmuse>=0.2 -refgenconf>=0.11.0 -ubiquerg>=0.6.1 -uvicorn>=0.7.1 diff --git a/setup.py b/setup.py deleted file mode 100644 index c618823..0000000 --- a/setup.py +++ /dev/null @@ -1,72 +0,0 @@ -#! /usr/bin/env python - -import sys - -from setuptools import setup - -PACKAGE = "refgenieserver" - -# Additional keyword arguments for setup(). -extra = {} - -# Ordinary dependencies -DEPENDENCIES = [] -with open("requirements/requirements-all.txt", "r") as reqs_file: - for line in reqs_file: - print(line) - if not line.strip(): - continue - DEPENDENCIES.append(line) - -# 2to3 -if sys.version_info >= (3,): - extra["use_2to3"] = True -extra["install_requires"] = DEPENDENCIES - - -with open("{}/_version.py".format(PACKAGE), "r") as versionfile: - version = versionfile.readline().split()[-1].strip("\"'\n") - - -# Handle the pypi README formatting. -try: - import pypandoc - - long_description = pypandoc.convert_file("README.md", "rst") - msg = "\033[032mPandoc conversion succeeded.\033[0m" -except (IOError, ImportError, OSError): - msg = "\033[0;31mWarning: pandoc conversion failed!\033[0m" - long_description = open("README.md").read() - - -setup( - name=PACKAGE, - packages=[PACKAGE], - version=version, - description="This server provides both a web interface and a RESTful API. Users may explore and download archived " - "indexes from the web interface or develop tools that programmatically query the API.", - long_description=long_description, - long_description_content_type="text/markdown", - classifiers=[ - "Development Status :: 4 - Beta", - "License :: OSI Approved :: BSD License", - "Programming Language :: Python :: 3.5", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Topic :: Scientific/Engineering :: Bio-Informatics", - ], - keywords="project, bioinformatics, sequencing, ngs, workflow, GUI, genomes, server", - url="https://refgenie.databio.org/", - author=u"Michal Stolarczyk, Vince Reuter, Nathan Sheffield", - license="BSD2", - entry_points={ - "console_scripts": [ - "{p} = {p}.__main__:main".format(p=PACKAGE), - ], - }, - include_package_data=True, - **extra -) - -print(msg) diff --git a/staging.Dockerfile b/staging.Dockerfile index f74376b..d51f12a 100644 --- a/staging.Dockerfile +++ b/staging.Dockerfile @@ -1,6 +1,7 @@ -FROM tiangolo/uvicorn-gunicorn:python3.7-alpine3.8 +FROM python:3.12-slim LABEL authors="Nathan Sheffield, Michal Stolarczyk" COPY . /app -#RUN pip install https://github.com/refgenie/refgenconf/archive/dev.zip +WORKDIR /app RUN pip install . +CMD ["uvicorn", "refgenieserver.main:app", "--host", "0.0.0.0", "--port", "80"]