From 4a614f7ec2f46d39f28421be729edeb45a31cb71 Mon Sep 17 00:00:00 2001 From: nsheff Date: Mon, 9 Feb 2026 21:34:46 -0500 Subject: [PATCH 01/15] modernize packaging to uv --- .pre-commit-config.yaml | 19 +++----- MANIFEST.in | 5 --- pyproject.toml | 66 +++++++++++++++++++++++++++ refgenieserver/_version.py | 5 ++- refgenieserver/const.py | 3 +- refgenieserver/helpers.py | 28 ++++++------ refgenieserver/main.py | 8 ++-- refgenieserver/routers/private.py | 2 +- refgenieserver/routers/version3.py | 2 +- requirements/requirements-all.txt | 7 --- setup.py | 72 ------------------------------ 11 files changed, 100 insertions(+), 117 deletions(-) delete mode 100644 MANIFEST.in create mode 100644 pyproject.toml delete mode 100644 requirements/requirements-all.txt delete mode 100644 setup.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ab5489e..549d42d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,20 +1,15 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v3.4.0 + rev: v5.0.0 hooks: - id: trailing-whitespace - id: check-yaml - id: end-of-file-fixer - - id: requirements-txt-fixer - - id: trailing-whitespace - - - repo: https://github.com/PyCQA/isort - rev: 5.7.0 - hooks: - - id: isort - args: ["--profile", "black"] + - id: check-ast - - repo: https://github.com/psf/black - rev: 20.8b1 + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.9.0 hooks: - - id: black + - id: ruff + args: [--fix] + - id: ruff-format diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 6f98629..0000000 --- a/MANIFEST.in +++ /dev/null @@ -1,5 +0,0 @@ -include requirements/* -include README.md -include LICENSE.txt -include Dockerfile -recursive-include refgenieserver * \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..a7e524a --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,66 @@ +[project] +name = "refgenieserver" +version = "0.7.0" +description = "A web interface and RESTful API for reference genome assets" +readme = "README.md" +license = "BSD-2-Clause" +requires-python = ">=3.10" +authors = [ + { name = "Michal Stolarczyk" }, + { name = "Vince Reuter" }, + { name = "Nathan Sheffield" }, +] +keywords = ["bioinformatics", "sequencing", "ngs", "genomes", "server"] +classifiers = [ + "Development Status :: 4 - Beta", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Topic :: Scientific/Engineering :: Bio-Informatics", +] +dependencies = [ + "aiofiles", + "fastapi", + "jinja2", + "logmuse>=0.2", + "refgenconf>=0.12.2", + "ubiquerg>=0.6.1", + "uvicorn>=0.7.1", +] + +[project.urls] +Homepage = "https://refgenie.databio.org" + +[project.scripts] +refgenieserver = "refgenieserver.__main__:main" + +[build-system] +requires = ["setuptools>=61"] +build-backend = "setuptools.build_meta" + +[tool.setuptools.packages.find] +include = ["refgenieserver*"] + +[tool.setuptools.package-data] +refgenieserver = ["templates/*", "static/*"] + +[project.optional-dependencies] +test = [ + "pytest", + "httpx", +] + +[tool.pytest.ini_options] +addopts = "-rfE" +testpaths = ["tests"] + +[tool.ruff] +line-length = 88 + +[tool.ruff.lint] +select = ["E", "F", "I"] +ignore = ["F403", "F405", "E501"] + +[tool.ruff.lint.isort] +known-first-party = ["refgenieserver"] diff --git a/refgenieserver/_version.py b/refgenieserver/_version.py index 49e0fc1..9e23b5f 100644 --- a/refgenieserver/_version.py +++ b/refgenieserver/_version.py @@ -1 +1,4 @@ -__version__ = "0.7.0" +# Version is defined in pyproject.toml +from importlib.metadata import version + +__version__ = version("refgenieserver") diff --git a/refgenieserver/const.py b/refgenieserver/const.py index 0b1e4e9..10bede7 100644 --- a/refgenieserver/const.py +++ b/refgenieserver/const.py @@ -1,4 +1,5 @@ -""" Package constants """ +"""Package constants""" + import os from platform import python_version diff --git a/refgenieserver/helpers.py b/refgenieserver/helpers.py index 08e7b7e..3526be4 100644 --- a/refgenieserver/helpers.py +++ b/refgenieserver/helpers.py @@ -52,16 +52,18 @@ def add_subparser(cmd, description): # add arguments that are common for both subparsers for cmd, desc in msg_by_cmd.items(): sps[cmd] = add_subparser(cmd, desc) - sps[cmd].add_argument( - "-c", - "--config", - required=False, - dest="config", - help=f"A path to the refgenie config file (YAML). If not provided, the " - f"first available environment variable among: " - f"'{', '.join(CFG_ENV_VARS)}' will be used if set. " - f"Currently: {env_var_val}", - ), + ( + sps[cmd].add_argument( + "-c", + "--config", + required=False, + dest="config", + help=f"A path to the refgenie config file (YAML). If not provided, the " + f"first available environment variable among: " + f"'{', '.join(CFG_ENV_VARS)}' will be used if set. " + f"Currently: {env_var_val}", + ), + ) sps[cmd].add_argument( "-d", "--dbg", @@ -158,9 +160,9 @@ def get_datapath_for_genome( the source is remote """ req_keys = [i[1] for i in Formatter().parse(pth_templ) if i[1] is not None] - assert all( - [k in req_keys for k in list(fill_dict.keys())] - ), f"Only the these keys are allowed in the fill_dict: {req_keys}" + assert all([k in req_keys for k in list(fill_dict.keys())]), ( + f"Only the these keys are allowed in the fill_dict: {req_keys}" + ) fill_dict.update({"base": BASE_DIR}) # fill_dict.update({"base": rgc["genome_archive_folder"]}) remote = is_data_remote(rgc) diff --git a/refgenieserver/main.py b/refgenieserver/main.py index 2cde651..9a42d80 100644 --- a/refgenieserver/main.py +++ b/refgenieserver/main.py @@ -39,10 +39,10 @@ def main(): ) _LOGGER = logmuse.setup_logger(**logger_args) selected_cfg = select_genome_config(args.config) - assert ( - selected_cfg is not None - ), "You must provide a config file or set the {} environment variable".format( - "or ".join(CFG_ENV_VARS) + assert selected_cfg is not None, ( + "You must provide a config file or set the {} environment variable".format( + "or ".join(CFG_ENV_VARS) + ) ) # this RefGenConf object will be used in the server, so it's read-only rgc = RefGenConf(filepath=selected_cfg, writable=False) diff --git a/refgenieserver/routers/private.py b/refgenieserver/routers/private.py index 9f6dd64..1b9f38b 100644 --- a/refgenieserver/routers/private.py +++ b/refgenieserver/routers/private.py @@ -2,7 +2,7 @@ from ..const import * from ..data_models import Dict, Genome -from ..main import _LOGGER, app, rgc, templates +from ..main import _LOGGER, rgc router = APIRouter() diff --git a/refgenieserver/routers/version3.py b/refgenieserver/routers/version3.py index 0a9568d..fa81811 100644 --- a/refgenieserver/routers/version3.py +++ b/refgenieserver/routers/version3.py @@ -233,7 +233,7 @@ async def get_alias_dict(): async def list_available_assets( includeSeekKeys: Optional[bool] = Query( False, description="Whether to include seek keys in the response" - ) + ), ): """ Returns a list of assets that can be downloaded, keyed by the respective genome digests. diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt deleted file mode 100644 index e353ac6..0000000 --- a/requirements/requirements-all.txt +++ /dev/null @@ -1,7 +0,0 @@ -aiofiles -fastapi -jinja2 -logmuse>=0.2 -refgenconf>=0.11.0 -ubiquerg>=0.6.1 -uvicorn>=0.7.1 diff --git a/setup.py b/setup.py deleted file mode 100644 index c618823..0000000 --- a/setup.py +++ /dev/null @@ -1,72 +0,0 @@ -#! /usr/bin/env python - -import sys - -from setuptools import setup - -PACKAGE = "refgenieserver" - -# Additional keyword arguments for setup(). -extra = {} - -# Ordinary dependencies -DEPENDENCIES = [] -with open("requirements/requirements-all.txt", "r") as reqs_file: - for line in reqs_file: - print(line) - if not line.strip(): - continue - DEPENDENCIES.append(line) - -# 2to3 -if sys.version_info >= (3,): - extra["use_2to3"] = True -extra["install_requires"] = DEPENDENCIES - - -with open("{}/_version.py".format(PACKAGE), "r") as versionfile: - version = versionfile.readline().split()[-1].strip("\"'\n") - - -# Handle the pypi README formatting. -try: - import pypandoc - - long_description = pypandoc.convert_file("README.md", "rst") - msg = "\033[032mPandoc conversion succeeded.\033[0m" -except (IOError, ImportError, OSError): - msg = "\033[0;31mWarning: pandoc conversion failed!\033[0m" - long_description = open("README.md").read() - - -setup( - name=PACKAGE, - packages=[PACKAGE], - version=version, - description="This server provides both a web interface and a RESTful API. Users may explore and download archived " - "indexes from the web interface or develop tools that programmatically query the API.", - long_description=long_description, - long_description_content_type="text/markdown", - classifiers=[ - "Development Status :: 4 - Beta", - "License :: OSI Approved :: BSD License", - "Programming Language :: Python :: 3.5", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Topic :: Scientific/Engineering :: Bio-Informatics", - ], - keywords="project, bioinformatics, sequencing, ngs, workflow, GUI, genomes, server", - url="https://refgenie.databio.org/", - author=u"Michal Stolarczyk, Vince Reuter, Nathan Sheffield", - license="BSD2", - entry_points={ - "console_scripts": [ - "{p} = {p}.__main__:main".format(p=PACKAGE), - ], - }, - include_package_data=True, - **extra -) - -print(msg) From ef7521d2451a1b1368373cbe0782e9e7875f57e4 Mon Sep 17 00:00:00 2001 From: nsheff Date: Mon, 9 Feb 2026 21:45:19 -0500 Subject: [PATCH 02/15] restyle docstrings --- refgenieserver/data_models.py | 12 +-- refgenieserver/helpers.py | 164 +++++++++++++++++++---------- refgenieserver/main.py | 1 + refgenieserver/routers/private.py | 4 +- refgenieserver/routers/version1.py | 40 +++---- refgenieserver/routers/version2.py | 97 ++++++++++------- refgenieserver/routers/version3.py | 100 ++++++------------ refgenieserver/server_builder.py | 150 ++++++++++++++------------ 8 files changed, 306 insertions(+), 262 deletions(-) diff --git a/refgenieserver/data_models.py b/refgenieserver/data_models.py index 8b2243b..439c90a 100644 --- a/refgenieserver/data_models.py +++ b/refgenieserver/data_models.py @@ -4,9 +4,7 @@ class Tag(BaseModel): - """ - Tag data model - """ + """Tag data model.""" asset_path: str asset_digest: str @@ -19,9 +17,7 @@ class Tag(BaseModel): class Asset(BaseModel): - """ - Asset data model - """ + """Asset data model.""" asset_description: str tags: Dict[str, Tag] @@ -29,9 +25,7 @@ class Asset(BaseModel): class Genome(BaseModel): - """ - Genome data model - """ + """Genome data model.""" genome_description: str assets: Dict[str, Asset] diff --git a/refgenieserver/helpers.py b/refgenieserver/helpers.py index 3526be4..f98d624 100644 --- a/refgenieserver/helpers.py +++ b/refgenieserver/helpers.py @@ -17,10 +17,10 @@ def build_parser(): - """ - Building argument parser + """Build the argument parser. - :return argparse.ArgumentParser + Returns: + The configured argument parser. """ env_var_val = ( get_first_env_var(CFG_ENV_VARS)[1] @@ -113,11 +113,13 @@ def add_subparser(cmd, description): def preprocess_attrs(attrs): - """ - Based on the CHANGED_KEYS mapping (new_key:old_key), rename the keys in the provided one + """Rename keys based on the CHANGED_KEYS mapping (new_key:old_key). - :param yacman.YacAttMap attrs: mapping to process - :return yacman.YacAttMap: mapping with renamed key names + Args: + attrs: Mapping to process. + + Returns: + Mapping with renamed key names. """ from copy import deepcopy @@ -130,11 +132,13 @@ def preprocess_attrs(attrs): def get_openapi_version(app): - """ - Get the OpenAPI version from the OpenAPI description JSON + """Get the OpenAPI version from the OpenAPI description JSON. - :param fastapi.FastAPI app: app object - :return str: openAPI version in use + Args: + app: FastAPI app object. + + Returns: + The openAPI version in use. """ try: return app.openapi()["openapi"] @@ -146,18 +150,19 @@ def get_openapi_version(app): def get_datapath_for_genome( rgc, fill_dict, pth_templ="{base}/{genome}/{file_name}", remote_key=None ): - """ - Get the path to the data file to serve. + """Get the path to the data file to serve. + + Depending on the remote URL base being set or not, returns either a remote + URL to the file or a file path along with a flag indicating the source. - Depending on the remote URL base being set or not, the function will return - either a remote URL to the file or a file path along with a flag indicating - the source + Args: + rgc: Configuration object to use. + fill_dict: Dictionary to fill in the path template. + pth_templ: The path template. + remote_key: Key identifying the remote data provider. - :param refgenconf.RefGenConf rgc: configuration object to use - :param dict fill_dict: a dictionary to use to fill in the path template - :param str pth_templ: the path template - :return (str, bool): a pair of file source and the flag indicating whether - the source is remote + Returns: + A pair of (file source, is_remote flag). """ req_keys = [i[1] for i in Formatter().parse(pth_templ) if i[1] is not None] assert all([k in req_keys for k in list(fill_dict.keys())]), ( @@ -184,12 +189,16 @@ def get_datapath_for_genome( def is_data_remote(rgc): - """ - Determine if server genome config defines a 'remotes' key, 'http is one of them and - additionally assert the correct structure -- 'prefix' key defined. + """Determine if the server genome config defines a remote data source. + + Checks for a 'remotes' key with correct structure (each remote has a + 'prefix' key defined). - :param refgenconf.RefGenConf rgc: server genome config object - :return bool: whether remote data source is configured + Args: + rgc: Server genome config object. + + Returns: + Whether a remote data source is configured. """ return ( True @@ -206,12 +215,13 @@ def is_data_remote(rgc): def purge_nonservable(rgc): - """ - Remove entries in RefGenConf object that were not processed by the archiver - and should not be served + """Remove entries not processed by the archiver that should not be served. - :param refgenconf.RefGenConf rgc: object to check - :return refgenconf.RefGenConf: object with just the servable entries + Args: + rgc: Configuration object to check. + + Returns: + The configuration object with only servable entries. """ def _check_servable(rgc, genome, asset, tag): @@ -239,6 +249,18 @@ def _check_servable(rgc, genome, asset, tag): def safely_get_example(rgc, entity, rgc_method, default, **kwargs): + """Safely get an example value from the config, falling back to a default. + + Args: + rgc: Configuration object. + entity: Description of the entity for logging. + rgc_method: Name of the method to call on rgc. + default: Fallback value if the method call fails. + **kwargs: Additional keyword arguments passed to the method. + + Returns: + The first result element (if list) or the result itself, or the default. + """ try: res = rgc.__getattr__(rgc_method)(**kwargs) return res[0] if isinstance(res, list) else res @@ -251,12 +273,21 @@ def safely_get_example(rgc, entity, rgc_method, default, **kwargs): def create_asset_file_path(rgc, genome, asset, tag, seek_key, remote_key="http"): - """ - Construct a path to an unarchived asset file + """Construct a path to an unarchived asset file. + + Args: + rgc: Configuration object. + genome: Genome name. + asset: Asset name. + tag: Tag name. + seek_key: Seek key name. + remote_key: Remote data provider key. + + Returns: + Path to the asset file. - :param str genome: - :param str asset: - :param str tag: + Raises: + HTTPException: If the asset or seek key is not found. """ tag = tag or rgc.get_default_tag( genome, asset @@ -286,14 +317,21 @@ def create_asset_file_path(rgc, genome, asset, tag, seek_key, remote_key="http") def serve_file_for_asset(rgc, genome, asset, tag, template): - """ - Serve a file, like log file + """Serve a file, like a build log. + + Args: + rgc: Configuration object. + genome: Genome name. + asset: Asset name. + tag: Tag name. + template: File name template with placeholders for asset and tag names, + e.g. 'build_log_{}__{}.md'. + + Returns: + A RedirectResponse for remote files, or a FileResponse for local files. - :param str genome: genome name - :param str asset: asset name - :param str tag: tag name - :param ste template: file name template with place for asset and tag names, - e.g. 'build_log_{}__{}.md' + Raises: + HTTPException: If the file is not found. """ # returns 'default' for nonexistent genome/asset; no need to catch tag = tag or rgc.get_default_tag(genome, asset) @@ -316,14 +354,21 @@ def serve_file_for_asset(rgc, genome, asset, tag, template): def serve_json_for_asset(rgc, genome, asset, tag, template): - """ - Serve a JSON object, like recipe or asset dir contents for an asset + """Serve a JSON object, like a recipe or asset directory contents. + + Args: + rgc: Configuration object. + genome: Genome name. + asset: Asset name. + tag: Tag name. + template: File name template with placeholders for asset and tag names, + e.g. 'build_recipe_{}__{}.json'. + + Returns: + A RedirectResponse for remote files, or a JSONResponse for local files. - :param str genome: genome name - :param str asset: asset name - :param str tag: tag name - :param ste template: file name template with place for asset and tag names, - e.g. 'build_recipe_{}__{}.json' + Raises: + HTTPException: If the file is not found. """ # returns 'default' for nonexistent genome/asset; no need to catch tag = tag or rgc.get_default_tag(genome, asset) @@ -346,14 +391,19 @@ def serve_json_for_asset(rgc, genome, asset, tag, template): def get_asset_dir_contents(rgc, genome, asset, tag): - """ - Get the asset directory contents into a list + """Get the asset directory contents as a list. + + Args: + rgc: Configuration object. + genome: Genome name. + asset: Asset name. + tag: Tag name. + + Returns: + List of files in the asset directory. - :param refgenconf.RefGenConf rgc: config - :param str genome: genome name - :param str asset: asset name - :param str tag: tag name - :return list[str]: list of files in the asset directory + Raises: + TypeError: If the path is neither a valid URL nor an existing file. """ # returns 'default' for nonexistent genome/asset; no need to catch tag = tag or rgc.get_default_tag(genome, asset) diff --git a/refgenieserver/main.py b/refgenieserver/main.py index 9a42d80..c7323f2 100644 --- a/refgenieserver/main.py +++ b/refgenieserver/main.py @@ -25,6 +25,7 @@ def main(): + """Entry point for the refgenieserver CLI.""" global rgc, _LOGGER parser = build_parser() args = parser.parse_args() diff --git a/refgenieserver/routers/private.py b/refgenieserver/routers/private.py index 1b9f38b..d6ce909 100644 --- a/refgenieserver/routers/private.py +++ b/refgenieserver/routers/private.py @@ -16,8 +16,6 @@ response_model=Dict[str, Genome], ) async def get_genomes_dict(): - """ - **Private endpoint**, which returns the entire 'genomes' part of the config - """ + """Return the entire 'genomes' section of the config (private endpoint).""" _LOGGER.info(f"serving genomes dict: '{rgc[CFG_GENOMES_KEY]}'") return rgc[CFG_GENOMES_KEY] diff --git a/refgenieserver/routers/version1.py b/refgenieserver/routers/version1.py index bc123f3..a01436d 100644 --- a/refgenieserver/routers/version1.py +++ b/refgenieserver/routers/version1.py @@ -17,9 +17,7 @@ @router.get("/", tags=api_version_tags) @router.get("/index", tags=api_version_tags) async def index(request: Request): - """ - Returns a landing page HTML with the server resources ready do download. No inputs required. - """ + """Return a landing page HTML with the server resources ready to download.""" _LOGGER.debug("RefGenConf object:\n{}".format(rgc)) templ_vars = { "request": request, @@ -33,18 +31,14 @@ async def index(request: Request): @router.get("/genomes", tags=api_version_tags) def list_available_genomes(): - """ - Returns a list of genomes this server holds at least one asset for. No inputs required. - """ + """Return a list of genomes this server holds at least one asset for.""" _LOGGER.info("serving genomes string: '{}'".format(rgc.genomes_str())) return rgc.genomes_list() @router.get("/assets", tags=api_version_tags) def list_available_assets(): - """ - Returns a list of all assets that can be downloaded. No inputs required. - """ + """Return a list of all assets that can be downloaded.""" ret_dict = rgc.list(include_tags=True) _LOGGER.info("serving assets dict: {}".format(ret_dict)) return ret_dict @@ -52,11 +46,14 @@ def list_available_assets(): @router.get("/asset/{genome}/{asset}/archive", tags=api_version_tags) async def download_asset(genome: str, asset: str, tag: str = None): - """ - Returns an archive. Requires the genome name and the asset name as an input. + """Return an asset archive. + + Since tags were introduced, the default tag is selected behind the scenes. - Since the refgenconf.RefGenConf object structure has changed (tags were introduced), - the default tag has to be selected behind the scenes + Args: + genome: Genome name. + asset: Asset name. + tag: Tag name (default tag used if not specified). """ tag = tag or rgc.get_default_tag( genome, asset @@ -87,12 +84,13 @@ async def download_asset(genome: str, asset: str, tag: str = None): @router.get("/asset/{genome}/{asset}", tags=api_version_tags) def download_asset_attributes(genome: str, asset: str): - """ - Returns a dictionary of asset attributes, like archive size, archive checksum etc. - Requires the genome name and the asset name as an input. + """Return a dictionary of asset attributes (archive size, checksum, etc.). - Since the refgenconf.RefGenConf object structure has changed (tags were introduced), - the default tag has to be selected behind the scenes + Since tags were introduced, the default tag is selected behind the scenes. + + Args: + genome: Genome name. + asset: Asset name. """ try: attrs = preprocess_attrs( @@ -127,8 +125,10 @@ def download_asset_attributes(genome: str, asset: str): @router.get("/genomes/{asset}", tags=api_version_tags) def list_genomes_by_asset(asset: str): - """ - Returns a list of genomes that have the requested asset defined. Requires the asset name as an input. + """Return a list of genomes that have the requested asset defined. + + Args: + asset: Asset name. """ genomes = rgc.list_genomes_by_asset(asset) _LOGGER.info("serving genomes by '{}' asset: {}".format(asset, genomes)) diff --git a/refgenieserver/routers/version2.py b/refgenieserver/routers/version2.py index ef69abf..551afad 100644 --- a/refgenieserver/routers/version2.py +++ b/refgenieserver/routers/version2.py @@ -19,9 +19,7 @@ @router.get("/", tags=api_version_tags) @router.get("/index", tags=api_version_tags) async def index(request: Request): - """ - Returns a landing page HTML with the server resources ready do download. No inputs required. - """ + """Return a landing page HTML with the server resources ready to download.""" _LOGGER.debug("RefGenConf object:\n{}".format(rgc)) templ_vars = { "request": request, @@ -35,8 +33,13 @@ async def index(request: Request): @router.get("/asset/{genome}/{asset}/splash", tags=api_version_tags) async def asset_splash_page(request: Request, genome: str, asset: str, tag: str = None): - """ - Returns an asset splash page + """Return an asset splash page. + + Args: + request: The incoming request. + genome: Genome name. + asset: Asset name. + tag: Tag name (default tag used if not specified). """ tag = tag or rgc.get_default_tag( genome, asset @@ -62,18 +65,14 @@ async def asset_splash_page(request: Request, genome: str, asset: str, tag: str @router.get("/genomes", tags=api_version_tags) async def list_available_genomes(): - """ - Returns a list of genomes this server holds at least one asset for. No inputs required. - """ + """Return a list of genomes this server holds at least one asset for.""" _LOGGER.info("serving genomes string: '{}'".format(rgc.genomes_str())) return rgc.genomes_list() @router.get("/assets", operation_id=API_ID_ASSETS, tags=api_version_tags) async def list_available_assets(): - """ - Returns a list of all assets that can be downloaded. No inputs required. - """ + """Return a list of all assets that can be downloaded.""" ret_dict = rgc.list(include_tags=True) _LOGGER.info("serving assets dict: {}".format(ret_dict)) return ret_dict @@ -85,10 +84,12 @@ async def list_available_assets(): tags=api_version_tags, ) async def download_asset(genome: str, asset: str, tag: str = None): - """ - Returns an archive. Requires the genome name and the asset name as an input. + """Return an asset archive. - Optionally, 'tag' query parameter can be specified to get a tagged asset archive. Default tag is returned otherwise. + Args: + genome: Genome name. + asset: Asset name. + tag: Tag name (default tag used if not specified). """ tag = tag or rgc.get_default_tag( genome, asset @@ -123,8 +124,11 @@ async def download_asset(genome: str, asset: str, tag: str = None): tags=api_version_tags, ) async def get_asset_default_tag(genome: str, asset: str): - """ - Returns the default tag name. Requires genome name and asset name as an input. + """Return the default tag name. + + Args: + genome: Genome name. + asset: Asset name. """ return rgc.get_default_tag(genome, asset) @@ -135,8 +139,12 @@ async def get_asset_default_tag(genome: str, asset: str): tags=api_version_tags, ) async def get_asset_digest(genome: str, asset: str, tag: str): - """ - Returns the asset digest. Requires genome name asset name and tag name as an input. + """Return the asset digest. + + Args: + genome: Genome name. + asset: Asset name. + tag: Tag name. """ try: return rgc[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset][CFG_ASSET_TAGS_KEY][ @@ -156,8 +164,12 @@ async def get_asset_digest(genome: str, asset: str, tag: str): tags=api_version_tags, ) async def get_archive_digest(genome: str, asset: str, tag: str): - """ - Returns the archive digest. Requires genome name asset name and tag name as an input. + """Return the archive digest. + + Args: + genome: Genome name. + asset: Asset name. + tag: Tag name. """ try: return rgc[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset][CFG_ASSET_TAGS_KEY][ @@ -175,10 +187,12 @@ async def get_archive_digest(genome: str, asset: str, tag: str): "/asset/{genome}/{asset}/log", operation_id=API_ID_LOG, tags=api_version_tags ) async def download_asset_build_log(genome: str, asset: str, tag: str = None): - """ - Returns a build log. Requires the genome name and the asset name as an input. + """Return a build log. - Optionally, 'tag' query parameter can be specified to get a tagged asset archive. Default tag is returned otherwise. + Args: + genome: Genome name. + asset: Asset name. + tag: Tag name (default tag used if not specified). """ tag = tag or rgc.get_default_tag( genome, asset @@ -210,10 +224,12 @@ async def download_asset_build_log(genome: str, asset: str, tag: str = None): "/asset/{genome}/{asset}/recipe", operation_id=API_ID_RECIPE, tags=api_version_tags ) async def download_asset_build_recipe(genome: str, asset: str, tag: str = None): - """ - Returns a build recipe. Requires the genome name and the asset name as an input. + """Return a build recipe. - Optionally, 'tag' query parameter can be specified to get a tagged asset archive. Default tag is returned otherwise. + Args: + genome: Genome name. + asset: Asset name. + tag: Tag name (default tag used if not specified). """ tag = tag or rgc.get_default_tag( genome, asset @@ -247,10 +263,12 @@ async def download_asset_build_recipe(genome: str, asset: str, tag: str = None): "/asset/{genome}/{asset}", operation_id=API_ID_ASSET_ATTRS, tags=api_version_tags ) async def download_asset_attributes(genome: str, asset: str, tag: str = None): - """ - Returns a dictionary of asset attributes, like archive size, archive digest etc. - Requires the genome name and the asset name as an input. - Optionally, 'tag' query parameter can be specified to get a tagged asset attributes. + """Return a dictionary of asset attributes (archive size, digest, etc.). + + Args: + genome: Genome name. + asset: Asset name. + tag: Tag name (default tag used if not specified). """ tag = tag or rgc.get_default_tag( genome, asset @@ -286,8 +304,10 @@ async def download_asset_attributes(genome: str, asset: str, tag: str = None): @router.get("/genome/{genome}/genome_digest", tags=api_version_tags) async def download_genome_digest(genome: str): - """ - Returns the genome digest. Requires the genome name as an input + """Return the genome digest. + + Args: + genome: Genome name. """ try: digest = rgc.get_genome_alias_digest(alias=genome) @@ -301,9 +321,10 @@ async def download_genome_digest(genome: str): @router.get("/genome/{genome}", operation_id=API_ID_GENOME_ATTRS, tags=api_version_tags) async def download_genome_attributes(genome: str): - """ - Returns a dictionary of genome attributes, like archive size, archive digest etc. - Requires the genome name name as an input. + """Return a dictionary of genome attributes (archive size, digest, etc.). + + Args: + genome: Genome name. """ try: attrs = rgc.get_genome_attributes(genome) @@ -319,8 +340,10 @@ async def download_genome_attributes(genome: str): @router.get("/genomes/{asset}", tags=api_version_tags) async def list_genomes_by_asset(asset: str): - """ - Returns a list of genomes that have the requested asset defined. Requires the asset name as an input. + """Return a list of genomes that have the requested asset defined. + + Args: + asset: Asset name. """ genomes = rgc.list_genomes_by_asset(asset) _LOGGER.info("serving genomes by '{}' asset: {}".format(asset, genomes)) diff --git a/refgenieserver/routers/version3.py b/refgenieserver/routers/version3.py index fa81811..5b315e6 100644 --- a/refgenieserver/routers/version3.py +++ b/refgenieserver/routers/version3.py @@ -92,10 +92,7 @@ @router.get("/", tags=api_version_tags) @router.get("/index", tags=api_version_tags) async def index(request: Request): - """ - Returns a landing page HTML with the server resources ready do download. - No inputs required. - """ + """Return a landing page HTML with the server resources ready to download.""" _LOGGER.debug(f"RefGenConf object:\n{rgc}") templ_vars = { "request": request, @@ -112,17 +109,13 @@ async def index(request: Request): "/remotes/dict", tags=api_version_tags, response_model=Dict[str, Dict[str, str]] ) async def get_remotes_dict(): - """ - Returns the remotes section of the server configuration file - """ + """Return the remotes section of the server configuration file.""" return rgc["remotes"] if "remotes" in rgc else None @router.get("/genomes/splash/{genome}", tags=api_version_tags) async def genome_splash_page(request: Request, genome: str = g): - """ - Returns a genome splash page - """ + """Return a genome splash page.""" templ_vars = { "openapi_version": get_openapi_version(app), "genome": genome, @@ -147,9 +140,7 @@ async def genome_splash_page(request: Request, genome: str = g): async def asset_splash_page( request: Request, genome: str = g, asset: str = a, tag: Optional[str] = tq ): - """ - Returns an asset splash page - """ + """Return an asset splash page.""" tag = tag or rgc.get_default_tag( genome, asset ) # returns 'default' for nonexistent genome/asset; no need to catch @@ -203,9 +194,7 @@ async def asset_splash_page( @router.get("/genomes/list", response_model=List[str], tags=api_version_tags) async def list_available_genomes(): - """ - Returns a list of **genome digests** this server serves at least one asset for. - """ + """Return a list of genome digests this server serves at least one asset for.""" _LOGGER.info("serving genomes list") return list(rgc.genomes[IK]["aliases_raw"].keys()) @@ -217,9 +206,7 @@ async def list_available_genomes(): operation_id=API_VERSION + API_ID_ALIASES_DICT, ) async def get_alias_dict(): - """ - Returns a dictionary of lists of aliases keyed by the respective genome digests. - """ + """Return a dictionary of alias lists keyed by genome digests.""" _LOGGER.info("serving genomes alias dict") return rgc.genomes[IK]["aliases_raw"] @@ -235,9 +222,7 @@ async def list_available_assets( False, description="Whether to include seek keys in the response" ), ): - """ - Returns a list of assets that can be downloaded, keyed by the respective genome digests. - """ + """Return a list of assets that can be downloaded, keyed by genome digests.""" ret_dict = ( rgc.list(include_tags=True) if includeSeekKeys else rgc.list_assets_by_genome() ) @@ -255,11 +240,10 @@ async def list_available_assets( tags=api_version_tags, ) async def download_asset(genome: str = g, asset: str = a, tag: Optional[str] = tq): - """ - Returns an archive. Requires the genome name and the asset name as an input. + """Return an asset archive. - Optionally, 'tag' query parameter can be specified to get a tagged asset archive. - Default tag is returned otherwise. + Optionally, 'tag' query parameter can be specified to get a tagged asset + archive. Default tag is returned otherwise. """ tag = tag or rgc.get_default_tag( genome, asset @@ -298,9 +282,7 @@ async def get_asset_file_path( "http", description="Remote data provider class" ), ): - """ - Returns a path to the unarchived asset file. - Requires a genome name, an asset name and a seek_key name as an input. + """Return a path to the unarchived asset file. Optionally, query parameters can be specified: @@ -327,9 +309,7 @@ async def get_asset_file_path( tags=api_version_tags, ) async def get_asset_default_tag(genome: str = g, asset: str = a): - """ - Returns the default tag name. Requires genome name and asset name as an input. - """ + """Return the default tag name for a genome/asset pair.""" return Response(content=rgc.get_default_tag(genome, asset), media_type="text/plain") @@ -340,9 +320,7 @@ async def get_asset_default_tag(genome: str = g, asset: str = a): tags=api_version_tags, ) async def get_asset_digest(genome: str = g, asset: str = a, tag: Optional[str] = tq): - """ - Returns the asset digest. Requires genome name asset name and tag name as an input. - """ + """Return the asset digest for a genome/asset:tag combination.""" tag = tag or DEFAULT_TAG try: return Response( @@ -364,9 +342,7 @@ async def get_asset_digest(genome: str = g, asset: str = a, tag: Optional[str] = tags=api_version_tags, ) async def get_archive_digest(genome: str = g, asset: str = a, tag: Optional[str] = tq): - """ - Returns the archive digest. Requires genome name asset name and tag name as an input. - """ + """Return the archive digest for a genome/asset:tag combination.""" tag = tag or DEFAULT_TAG try: return Response( @@ -389,11 +365,10 @@ async def get_archive_digest(genome: str = g, asset: str = a, tag: Optional[str] async def download_asset_build_recipe( genome: str = g, asset: str = a, tag: Optional[str] = tq ): - """ - Returns a build recipe. Requires the genome name and the asset name as an input. + """Return a build recipe. - Optionally, 'tag' query parameter can be specified to get a tagged asset archive. - Default tag is returned otherwise. + Optionally, 'tag' query parameter can be specified. Default tag is returned + otherwise. """ return serve_json_for_asset( rgc=rgc, @@ -412,11 +387,10 @@ async def download_asset_build_recipe( async def download_asset_build_log( genome: str = g, asset: str = a, tag: Optional[str] = tq ): - """ - Returns a build log. Requires the genome name and the asset name as an input. + """Return a build log. - Optionally, 'tag' query parameter can be specified to get a tagged asset archive. - Default tag is returned otherwise. + Optionally, 'tag' query parameter can be specified. Default tag is returned + otherwise. """ return serve_file_for_asset( rgc=rgc, @@ -435,12 +409,10 @@ async def download_asset_build_log( async def download_asset_directory_contents( genome: str = g, asset: str = a, tag: Optional[str] = tq ): - """ - Returns a asset directory tree file. - Requires the genome name and the asset name as an input. + """Return an asset directory tree file. - Optionally, 'tag' query parameter can be specified to get a tagged asset archive. - Default tag is returned otherwise. + Optionally, 'tag' query parameter can be specified. Default tag is returned + otherwise. """ return serve_json_for_asset( rgc=rgc, @@ -460,10 +432,10 @@ async def download_asset_directory_contents( async def download_asset_attributes( genome: str = g, asset: str = a, tag: Optional[str] = tq ): - """ - Returns a dictionary of asset attributes, like archive size, archive digest etc. - Requires the genome name and the asset name as an input. - Optionally, 'tag' query parameter can be specified to get a tagged asset attributes. + """Return a dictionary of asset attributes (archive size, digest, etc.). + + Optionally, 'tag' query parameter can be specified to get tagged asset + attributes. """ tag = tag or rgc.get_default_tag( genome, asset @@ -495,10 +467,7 @@ async def download_asset_attributes( tags=api_version_tags, ) async def download_genome_attributes(genome: str = g): - """ - Returns a dictionary of genome attributes, like archive size, archive digest etc. - Requires the genome name name as an input. - """ + """Return a dictionary of genome attributes (archive size, digest, etc.).""" try: attrs = rgc.get_genome_attributes(genome) _LOGGER.info(f"attributes returned for genome '{genome}': \n{attrs}") @@ -513,10 +482,7 @@ async def download_genome_attributes(genome: str = g): "/genomes/by_asset/{asset}", response_model=List[str], tags=api_version_tags ) async def list_genomes_by_asset(asset: str = a): - """ - Returns a list of genomes that have the requested asset defined. - Requires the asset name as an input. - """ + """Return a list of genomes that have the requested asset defined.""" genomes = rgc.list_genomes_by_asset(asset) _LOGGER.info(f"serving genomes by '{asset}' asset: {genomes}") return genomes @@ -529,9 +495,7 @@ async def list_genomes_by_asset(asset: str = a): tags=api_version_tags, ) async def get_genome_alias_digest(alias: str = al): - """ - Returns the genome digest. Requires the genome name as an input - """ + """Return the genome digest for a given alias.""" try: digest = rgc.get_genome_alias_digest(alias=alias) _LOGGER.info(f"digest returned for '{alias}': {digest}") @@ -549,9 +513,7 @@ async def get_genome_alias_digest(alias: str = al): tags=api_version_tags, ) async def get_genome_alias(genome_digest: str = g): - """ - Returns the genome digest. Requires the genome name as an input - """ + """Return the genome aliases for a given digest.""" try: alias = rgc[CFG_GENOMES_KEY][genome_digest][CFG_ALIASES_KEY] _LOGGER.info(f"alias returned for '{genome_digest}': {alias}") diff --git a/refgenieserver/server_builder.py b/refgenieserver/server_builder.py index d3c92ea..f931c73 100644 --- a/refgenieserver/server_builder.py +++ b/refgenieserver/server_builder.py @@ -22,18 +22,19 @@ def archive(rgc, registry_paths, force, remove, cfg_path, genomes_desc): - """ - Takes the RefGenConf object and builds individual tar archives - that can be then served with 'refgenieserver serve'. Additionally determines their md5 checksums, file sizes and - updates the original refgenie config with these data. If the --asset and/or --genome options are used (specific - build is requested) the archiver will check for the existence of config file saved in the path provided in - `genome_server` in the original config and update it so that no archive metadata is lost - - :param RefGenConf rgc: configuration object with the data to build the servable archives for - :param list[dict] registry_paths: a collection of mappings that identifies the assets to update - :param bool force: whether to force the build of archive, regardless of its existence - :param bool remove: whether remove specified genome/asset:tag from the archive - :param str cfg_path: config file path + """Build tar archives for serving with 'refgenieserver serve'. + + Determines md5 checksums and file sizes and updates the original refgenie + config with these data. If specific assets/genomes are requested, checks + for the server config file and updates it to preserve archive metadata. + + Args: + rgc: Configuration object with data to build servable archives for. + registry_paths: Collection of mappings identifying assets to update. + force: Whether to force the build regardless of existence. + remove: Whether to remove specified genome/asset:tag from the archive. + cfg_path: Config file path. + genomes_desc: Path to CSV file with genome descriptions. """ if float(rgc[CFG_VERSION_KEY]) < float(REQ_CFG_VERSION): raise ConfigNotCompliantError( @@ -305,13 +306,14 @@ def archive(rgc, registry_paths, force, remove, cfg_path, genomes_desc): def _check_tgz(path, output): - """ - Check if file exists and tar it. - If gzipping is requested, the pigz software is used if available. + """Check if file exists and tar it, using pigz if available. + + Args: + path: Path to the file to be tarred. + output: Path to the result file. - :param str path: path to the file to be tarred - :param str output: path to the result file - :raise OSError: if the file/directory meant to be archived does not exist + Raises: + OSError: If the file/directory to be archived does not exist. """ pth, tag_name = os.path.split(path) if os.path.exists(path): @@ -330,17 +332,20 @@ def _check_tgz(path, output): def _check_tgz_legacy(path, output, asset_name, genome_name, alias): - """ - NOTE: This is a duplication of the _check_tgz function, kept separate as in - the future this step will be simply removed. + """Legacy version of _check_tgz, to be removed in the future. + + Checks if file exists and tars it with alias-based naming. Uses pigz + if available. - Check if file exists and tar it. - If gzipping is requested, the availability of pigz software is checked and used. + Args: + path: Path to the file to be tarred. + output: Path to the result file. + asset_name: Name of the asset. + genome_name: Genome digest name. + alias: Genome alias or list of aliases. - :param str path: path to the file to be tarred - :param str output: path to the result file - :param str asset_name: name of the asset - :raise OSError: if the file/directory meant to be archived does not exist + Raises: + OSError: If the file/directory to be archived does not exist. """ # TODO: remove in the future if isinstance(alias, str): @@ -369,11 +374,13 @@ def _check_tgz_legacy(path, output, asset_name, genome_name, alias): def _copy_log(input_dir, target_dir, asset_name, tag_name): - """ - Copy the log file + """Copy the build log file. - :param str input_dir: path to the directory to copy the recipe from - :param str target_dir: path to the directory to copy the recipe to + Args: + input_dir: Path to the source directory. + target_dir: Path to the destination directory. + asset_name: Asset name. + tag_name: Tag name. """ log_path = f"{input_dir}/{BUILD_STATS_DIR}/{ORI_LOG_NAME}" if log_path and os.path.exists(log_path): @@ -390,11 +397,11 @@ def _copy_log(input_dir, target_dir, asset_name, tag_name): def _copy_asset_dir(input_dir, target_dir): - """ - Copy the asset directory + """Copy the asset directory via rsync. - :param str input_dir: path to the directory to copy the asset dir from - :param str target_dir: path to the directory to copy the asset dir to + Args: + input_dir: Path to the source directory. + target_dir: Path to the destination directory. """ if input_dir and os.path.exists(input_dir): run( @@ -407,12 +414,12 @@ def _copy_asset_dir(input_dir, target_dir): def _get_asset_dir_contents(asset_dir, asset_name, tag_name): - """ - Create a file tree with contents of the unarchived asset directory + """Create a JSON file listing the unarchived asset directory contents. - :param str asset_dir: path to the asset directory to get the contents of - :param str asset_name: name of the asset - :param str tag_name: name of the tag + Args: + asset_dir: Path to the asset directory. + asset_name: Name of the asset. + tag_name: Name of the tag. """ asset_dir_contents_file_path = os.path.join( os.path.dirname(asset_dir), @@ -433,13 +440,13 @@ def _get_asset_dir_contents(asset_dir, asset_name, tag_name): def _copy_recipe(input_dir, target_dir, asset_name, tag_name): - """ - Copy the recipe + """Copy the build recipe file. - :param str input_dir: path to the directory to copy the recipe from - :param str target_dir: path to the directory to copy the recipe to - :param str asset_name: asset name - :param str tag_name: tag name + Args: + input_dir: Path to the source directory. + target_dir: Path to the destination directory. + asset_name: Asset name. + tag_name: Tag name. """ recipe_path = ( f"{input_dir}/{BUILD_STATS_DIR}/" @@ -453,14 +460,15 @@ def _copy_recipe(input_dir, target_dir, asset_name, tag_name): def _remove_archive(rgc, registry_paths, cfg_archive_folder_key=CFG_ARCHIVE_KEY): - """ - Remove archives and corresponding entries from the RefGenConf object + """Remove archives and corresponding entries from the RefGenConf object. + + Args: + rgc: Configuration object to remove entries from. + registry_paths: Entries to remove. + cfg_archive_folder_key: Archive folder key in the genome config file. - :param refgenconf.RefGenConf rgc: object to remove the entries from - :param list[dict] registry_paths: entries to remove - :param str cfg_archive_folder_key: configuration archive folder key in the genome - configuration file - :return list[str]: removed file paths + Returns: + List of removed file paths. """ ret = [] for registry_path in _correct_registry_paths(registry_paths): @@ -506,20 +514,26 @@ def _remove_archive(rgc, registry_paths, cfg_archive_folder_key=CFG_ARCHIVE_KEY) def _correct_registry_paths(registry_paths): - """ - parse_registry_path function recognizes the 'item' as the central element of the asset registry path. - We require the 'namespace' to be the central one. Consequently, this function swaps them. + """Correct registry paths by swapping 'namespace' and 'item' keys. + + parse_registry_path recognizes 'item' as the central element, but we + require 'namespace' to be central. This function swaps them. + + Args: + registry_paths: Output of parse_registry_path. - :param list[dict] registry_paths: output of parse_registry_path - :return list[dict]: corrected registry paths + Returns: + Corrected registry paths. """ def _swap(rp): - """ - Swaps dict values of 'namespace' with 'item' keys + """Swap 'namespace' and 'item' values in a registry path dict. + + Args: + rp: Dict to swap values for. - :param dict rp: dict to swap values for - :return dict: dict with swapped values + Returns: + Dict with swapped values. """ rp["namespace"] = rp["item"] rp["item"] = None @@ -529,11 +543,13 @@ def _swap(rp): def _get_paths_element(registry_paths, element): - """ - Extract the specific element from a collection of registry paths + """Extract a specific element from a collection of registry paths. + + Args: + registry_paths: Output of parse_registry_path. + element: One of 'protocol', 'namespace', 'item', or 'tag'. - :param list[dict] registry_paths: output of parse_registry_path - :param str element: 'protocol', 'namespace', 'item' or 'tag' - :return list[str]: extracted elements + Returns: + List of extracted elements. """ return [x[element] for x in _correct_registry_paths(registry_paths)] From 971b944b8d68e8246a01462941c57e23a8195a49 Mon Sep 17 00:00:00 2001 From: nsheff Date: Mon, 9 Feb 2026 21:59:55 -0500 Subject: [PATCH 03/15] update cicd, modernize --- .github/workflows/black.yml | 10 ++++-- .github/workflows/python-publish.yml | 11 +++--- refgenieserver/__init__.py | 2 ++ refgenieserver/_version.py | 2 ++ refgenieserver/const.py | 42 ++++++++++++---------- refgenieserver/data_models.py | 2 ++ refgenieserver/helpers.py | 53 +++++++++++++++++++++------- refgenieserver/main.py | 4 ++- refgenieserver/routers/private.py | 4 ++- refgenieserver/routers/version1.py | 16 +++++---- refgenieserver/routers/version2.py | 40 +++++++++++++-------- refgenieserver/routers/version3.py | 48 ++++++++++++++----------- refgenieserver/server_builder.py | 39 ++++++++++++++------ 13 files changed, 177 insertions(+), 96 deletions(-) diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml index 63e1851..7a50626 100644 --- a/.github/workflows/black.yml +++ b/.github/workflows/black.yml @@ -6,6 +6,10 @@ jobs: lint: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 - - uses: psf/black@20.8b1 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + - run: pip install ruff + - run: ruff check . + - run: ruff format --check . diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index 4e1ef42..eef44ed 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -1,4 +1,4 @@ -# This workflows will upload a Python Package using Twine when a release is created +# This workflow will upload a Python Package using Twine when a release is created # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries name: Upload Python Package @@ -9,23 +9,22 @@ on: jobs: deploy: - runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: '3.x' - name: Install dependencies run: | python -m pip install --upgrade pip - pip install setuptools wheel twine + pip install build twine - name: Build and publish env: TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} run: | - python setup.py sdist bdist_wheel + python -m build twine upload dist/* diff --git a/refgenieserver/__init__.py b/refgenieserver/__init__.py index b898684..87db908 100644 --- a/refgenieserver/__init__.py +++ b/refgenieserver/__init__.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from .const import * from .helpers import * from .main import * diff --git a/refgenieserver/_version.py b/refgenieserver/_version.py index 9e23b5f..ed9369f 100644 --- a/refgenieserver/_version.py +++ b/refgenieserver/_version.py @@ -1,3 +1,5 @@ +from __future__ import annotations + # Version is defined in pyproject.toml from importlib.metadata import version diff --git a/refgenieserver/const.py b/refgenieserver/const.py index 10bede7..749c53a 100644 --- a/refgenieserver/const.py +++ b/refgenieserver/const.py @@ -1,5 +1,7 @@ """Package constants""" +from __future__ import annotations + import os from platform import python_version @@ -8,39 +10,41 @@ from ._version import __version__ as server_v -ALL_VERSIONS = { +ALL_VERSIONS: dict[str, str] = { "server_version": server_v, "rgc_version": rgc_v, "python_version": python_version(), } -PKG_NAME = "refgenieserver" -DEFAULT_PORT = 80 -BASE_DIR = "/genomes" +PKG_NAME: str = "refgenieserver" +DEFAULT_PORT: int = 80 +BASE_DIR: str = "/genomes" # if running outside of the Docker container 'BASE_DIR' can be replaced with rgc[CFG_ARCHIVE_KEY] -TEMPLATES_DIRNAME = "templates" -TEMPLATES_PATH = os.path.join( +TEMPLATES_DIRNAME: str = "templates" +TEMPLATES_PATH: str = os.path.join( os.path.dirname(os.path.abspath(__file__)), TEMPLATES_DIRNAME ) -STATIC_DIRNAME = "static" -STATIC_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), STATIC_DIRNAME) -LOG_FORMAT = "%(levelname)s in %(funcName)s: %(message)s" -MSG_404 = "No such {} on server" -DESC_PLACEHOLDER = "No description" -CHECKSUM_PLACEHOLDER = "No digest" +STATIC_DIRNAME: str = "static" +STATIC_PATH: str = os.path.join( + os.path.dirname(os.path.abspath(__file__)), STATIC_DIRNAME +) +LOG_FORMAT: str = "%(levelname)s in %(funcName)s: %(message)s" +MSG_404: str = "No such {} on server" +DESC_PLACEHOLDER: str = "No description" +CHECKSUM_PLACEHOLDER: str = "No digest" # Here we define the key name changes; format: {"new_key": "old_key"} # This dict is then used to pre-process the attributes dict before serving to the old versions of the client -CHANGED_KEYS = {CFG_ASSET_PATH_KEY: "path"} +CHANGED_KEYS: dict[str, str] = {CFG_ASSET_PATH_KEY: "path"} # TODO: to be removed in the future -CFG_LEGACY_ARCHIVE_CHECKSUM_KEY = "legacy_archive_digest" +CFG_LEGACY_ARCHIVE_CHECKSUM_KEY: str = "legacy_archive_digest" -API1_ID = "APIv1" -API2_ID = "APIv2" -API3_ID = "APIv3" -PRIV_API_ID = "PRIVATE_API" +API1_ID: str = "APIv1" +API2_ID: str = "APIv2" +API3_ID: str = "APIv3" +PRIV_API_ID: str = "PRIVATE_API" -TAGS_METADATA = [ +TAGS_METADATA: list[dict[str, str]] = [ { "name": API3_ID, "description": "These are the most recent API endpoints. " diff --git a/refgenieserver/data_models.py b/refgenieserver/data_models.py index 439c90a..4814314 100644 --- a/refgenieserver/data_models.py +++ b/refgenieserver/data_models.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from typing import Dict, List from pydantic import BaseModel diff --git a/refgenieserver/helpers.py b/refgenieserver/helpers.py index f98d624..0838159 100644 --- a/refgenieserver/helpers.py +++ b/refgenieserver/helpers.py @@ -1,6 +1,10 @@ +from __future__ import annotations + +import argparse import logging from json import load from string import Formatter +from typing import TYPE_CHECKING, Any from fastapi import HTTPException from fastapi.responses import FileResponse, JSONResponse, RedirectResponse @@ -9,6 +13,11 @@ from ubiquerg import VersionInHelpParser, is_url from yacman import get_first_env_var +if TYPE_CHECKING: + from fastapi import FastAPI + from refgenconf import RefGenConf + from starlette.responses import Response + from ._version import __version__ as v from .const import * @@ -16,7 +25,7 @@ _LOGGER = logging.getLogger(PKG_NAME) -def build_parser(): +def build_parser() -> argparse.ArgumentParser: """Build the argument parser. Returns: @@ -112,7 +121,7 @@ def add_subparser(cmd, description): return parser -def preprocess_attrs(attrs): +def preprocess_attrs(attrs: dict) -> dict: """Rename keys based on the CHANGED_KEYS mapping (new_key:old_key). Args: @@ -131,7 +140,7 @@ def preprocess_attrs(attrs): return attrs_cpy -def get_openapi_version(app): +def get_openapi_version(app: FastAPI) -> str: """Get the OpenAPI version from the OpenAPI description JSON. Args: @@ -148,8 +157,11 @@ def get_openapi_version(app): def get_datapath_for_genome( - rgc, fill_dict, pth_templ="{base}/{genome}/{file_name}", remote_key=None -): + rgc: RefGenConf, + fill_dict: dict[str, str], + pth_templ: str = "{base}/{genome}/{file_name}", + remote_key: str | None = None, +) -> tuple[str, bool]: """Get the path to the data file to serve. Depending on the remote URL base being set or not, returns either a remote @@ -188,7 +200,7 @@ def get_datapath_for_genome( return pth_templ.format(**fill_dict), remote -def is_data_remote(rgc): +def is_data_remote(rgc: RefGenConf) -> bool: """Determine if the server genome config defines a remote data source. Checks for a 'remotes' key with correct structure (each remote has a @@ -214,7 +226,7 @@ def is_data_remote(rgc): ) -def purge_nonservable(rgc): +def purge_nonservable(rgc: RefGenConf) -> RefGenConf: """Remove entries not processed by the archiver that should not be served. Args: @@ -224,7 +236,7 @@ def purge_nonservable(rgc): The configuration object with only servable entries. """ - def _check_servable(rgc, genome, asset, tag): + def _check_servable(rgc: RefGenConf, genome: str, asset: str, tag: str) -> bool: tag_data = rgc[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset][ CFG_ASSET_TAGS_KEY ][tag] @@ -248,7 +260,9 @@ def _check_servable(rgc, genome, asset, tag): return rgc -def safely_get_example(rgc, entity, rgc_method, default, **kwargs): +def safely_get_example( + rgc: RefGenConf, entity: str, rgc_method: str, default: str, **kwargs: Any +) -> str: """Safely get an example value from the config, falling back to a default. Args: @@ -272,7 +286,14 @@ def safely_get_example(rgc, entity, rgc_method, default, **kwargs): return default -def create_asset_file_path(rgc, genome, asset, tag, seek_key, remote_key="http"): +def create_asset_file_path( + rgc: RefGenConf, + genome: str, + asset: str, + tag: str | None, + seek_key: str, + remote_key: str = "http", +) -> str: """Construct a path to an unarchived asset file. Args: @@ -316,7 +337,9 @@ def create_asset_file_path(rgc, genome, asset, tag, seek_key, remote_key="http") return path -def serve_file_for_asset(rgc, genome, asset, tag, template): +def serve_file_for_asset( + rgc: RefGenConf, genome: str, asset: str, tag: str | None, template: str +) -> Response: """Serve a file, like a build log. Args: @@ -353,7 +376,9 @@ def serve_file_for_asset(rgc, genome, asset, tag, template): raise HTTPException(status_code=404, detail=msg) -def serve_json_for_asset(rgc, genome, asset, tag, template): +def serve_json_for_asset( + rgc: RefGenConf, genome: str, asset: str, tag: str | None, template: str +) -> Response: """Serve a JSON object, like a recipe or asset directory contents. Args: @@ -390,7 +415,9 @@ def serve_json_for_asset(rgc, genome, asset, tag, template): raise HTTPException(status_code=404, detail=msg) -def get_asset_dir_contents(rgc, genome, asset, tag): +def get_asset_dir_contents( + rgc: RefGenConf, genome: str, asset: str, tag: str | None +) -> list: """Get the asset directory contents as a list. Args: diff --git a/refgenieserver/main.py b/refgenieserver/main.py index c7323f2..034ba25 100644 --- a/refgenieserver/main.py +++ b/refgenieserver/main.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import sys import logmuse @@ -24,7 +26,7 @@ templates.env.filters["os_path_join"] = lambda paths: os.path.join(*paths) -def main(): +def main() -> None: """Entry point for the refgenieserver CLI.""" global rgc, _LOGGER parser = build_parser() diff --git a/refgenieserver/routers/private.py b/refgenieserver/routers/private.py index d6ce909..5b9223a 100644 --- a/refgenieserver/routers/private.py +++ b/refgenieserver/routers/private.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from fastapi import APIRouter from ..const import * @@ -15,7 +17,7 @@ operation_id=PRIVATE_API + API_ID_GENOMES_DICT, response_model=Dict[str, Genome], ) -async def get_genomes_dict(): +async def get_genomes_dict() -> dict: """Return the entire 'genomes' section of the config (private endpoint).""" _LOGGER.info(f"serving genomes dict: '{rgc[CFG_GENOMES_KEY]}'") return rgc[CFG_GENOMES_KEY] diff --git a/refgenieserver/routers/version1.py b/refgenieserver/routers/version1.py index a01436d..3785ca0 100644 --- a/refgenieserver/routers/version1.py +++ b/refgenieserver/routers/version1.py @@ -1,9 +1,11 @@ +from __future__ import annotations + from copy import copy from fastapi import APIRouter, HTTPException from refgenconf.helpers import replace_str_in_obj from starlette.requests import Request -from starlette.responses import FileResponse, RedirectResponse +from starlette.responses import FileResponse, RedirectResponse, Response from ..const import * from ..helpers import get_datapath_for_genome, get_openapi_version, preprocess_attrs @@ -16,7 +18,7 @@ @router.get("/", tags=api_version_tags) @router.get("/index", tags=api_version_tags) -async def index(request: Request): +async def index(request: Request) -> Response: """Return a landing page HTML with the server resources ready to download.""" _LOGGER.debug("RefGenConf object:\n{}".format(rgc)) templ_vars = { @@ -30,14 +32,14 @@ async def index(request: Request): @router.get("/genomes", tags=api_version_tags) -def list_available_genomes(): +def list_available_genomes() -> list[str]: """Return a list of genomes this server holds at least one asset for.""" _LOGGER.info("serving genomes string: '{}'".format(rgc.genomes_str())) return rgc.genomes_list() @router.get("/assets", tags=api_version_tags) -def list_available_assets(): +def list_available_assets() -> dict: """Return a list of all assets that can be downloaded.""" ret_dict = rgc.list(include_tags=True) _LOGGER.info("serving assets dict: {}".format(ret_dict)) @@ -45,7 +47,7 @@ def list_available_assets(): @router.get("/asset/{genome}/{asset}/archive", tags=api_version_tags) -async def download_asset(genome: str, asset: str, tag: str = None): +async def download_asset(genome: str, asset: str, tag: str | None = None) -> Response: """Return an asset archive. Since tags were introduced, the default tag is selected behind the scenes. @@ -83,7 +85,7 @@ async def download_asset(genome: str, asset: str, tag: str = None): @router.get("/asset/{genome}/{asset}", tags=api_version_tags) -def download_asset_attributes(genome: str, asset: str): +def download_asset_attributes(genome: str, asset: str) -> dict: """Return a dictionary of asset attributes (archive size, checksum, etc.). Since tags were introduced, the default tag is selected behind the scenes. @@ -124,7 +126,7 @@ def download_asset_attributes(genome: str, asset: str): @router.get("/genomes/{asset}", tags=api_version_tags) -def list_genomes_by_asset(asset: str): +def list_genomes_by_asset(asset: str) -> list[str]: """Return a list of genomes that have the requested asset defined. Args: diff --git a/refgenieserver/routers/version2.py b/refgenieserver/routers/version2.py index 551afad..9959309 100644 --- a/refgenieserver/routers/version2.py +++ b/refgenieserver/routers/version2.py @@ -1,10 +1,12 @@ +from __future__ import annotations + from copy import copy from fastapi import APIRouter, HTTPException from refgenconf.helpers import replace_str_in_obj from refgenconf.refgenconf import map_paths_by_id from starlette.requests import Request -from starlette.responses import FileResponse, JSONResponse, RedirectResponse +from starlette.responses import FileResponse, JSONResponse, RedirectResponse, Response from ubiquerg import parse_registry_path from ..const import * @@ -18,7 +20,7 @@ @router.get("/", tags=api_version_tags) @router.get("/index", tags=api_version_tags) -async def index(request: Request): +async def index(request: Request) -> Response: """Return a landing page HTML with the server resources ready to download.""" _LOGGER.debug("RefGenConf object:\n{}".format(rgc)) templ_vars = { @@ -32,7 +34,9 @@ async def index(request: Request): @router.get("/asset/{genome}/{asset}/splash", tags=api_version_tags) -async def asset_splash_page(request: Request, genome: str, asset: str, tag: str = None): +async def asset_splash_page( + request: Request, genome: str, asset: str, tag: str | None = None +) -> Response: """Return an asset splash page. Args: @@ -64,14 +68,14 @@ async def asset_splash_page(request: Request, genome: str, asset: str, tag: str @router.get("/genomes", tags=api_version_tags) -async def list_available_genomes(): +async def list_available_genomes() -> list[str]: """Return a list of genomes this server holds at least one asset for.""" _LOGGER.info("serving genomes string: '{}'".format(rgc.genomes_str())) return rgc.genomes_list() @router.get("/assets", operation_id=API_ID_ASSETS, tags=api_version_tags) -async def list_available_assets(): +async def list_available_assets() -> dict: """Return a list of all assets that can be downloaded.""" ret_dict = rgc.list(include_tags=True) _LOGGER.info("serving assets dict: {}".format(ret_dict)) @@ -83,7 +87,7 @@ async def list_available_assets(): operation_id=API_ID_ARCHIVE, tags=api_version_tags, ) -async def download_asset(genome: str, asset: str, tag: str = None): +async def download_asset(genome: str, asset: str, tag: str | None = None) -> Response: """Return an asset archive. Args: @@ -123,7 +127,7 @@ async def download_asset(genome: str, asset: str, tag: str = None): operation_id=API_ID_DEFAULT_TAG, tags=api_version_tags, ) -async def get_asset_default_tag(genome: str, asset: str): +async def get_asset_default_tag(genome: str, asset: str) -> str: """Return the default tag name. Args: @@ -138,7 +142,7 @@ async def get_asset_default_tag(genome: str, asset: str): operation_id=API_ID_DIGEST, tags=api_version_tags, ) -async def get_asset_digest(genome: str, asset: str, tag: str): +async def get_asset_digest(genome: str, asset: str, tag: str) -> str: """Return the asset digest. Args: @@ -163,7 +167,7 @@ async def get_asset_digest(genome: str, asset: str, tag: str): operation_id=API_ID_ARCHIVE_DIGEST, tags=api_version_tags, ) -async def get_archive_digest(genome: str, asset: str, tag: str): +async def get_archive_digest(genome: str, asset: str, tag: str) -> str: """Return the archive digest. Args: @@ -186,7 +190,9 @@ async def get_archive_digest(genome: str, asset: str, tag: str): @router.get( "/asset/{genome}/{asset}/log", operation_id=API_ID_LOG, tags=api_version_tags ) -async def download_asset_build_log(genome: str, asset: str, tag: str = None): +async def download_asset_build_log( + genome: str, asset: str, tag: str | None = None +) -> Response: """Return a build log. Args: @@ -223,7 +229,9 @@ async def download_asset_build_log(genome: str, asset: str, tag: str = None): @router.get( "/asset/{genome}/{asset}/recipe", operation_id=API_ID_RECIPE, tags=api_version_tags ) -async def download_asset_build_recipe(genome: str, asset: str, tag: str = None): +async def download_asset_build_recipe( + genome: str, asset: str, tag: str | None = None +) -> Response: """Return a build recipe. Args: @@ -262,7 +270,9 @@ async def download_asset_build_recipe(genome: str, asset: str, tag: str = None): @router.get( "/asset/{genome}/{asset}", operation_id=API_ID_ASSET_ATTRS, tags=api_version_tags ) -async def download_asset_attributes(genome: str, asset: str, tag: str = None): +async def download_asset_attributes( + genome: str, asset: str, tag: str | None = None +) -> dict: """Return a dictionary of asset attributes (archive size, digest, etc.). Args: @@ -303,7 +313,7 @@ async def download_asset_attributes(genome: str, asset: str, tag: str = None): @router.get("/genome/{genome}/genome_digest", tags=api_version_tags) -async def download_genome_digest(genome: str): +async def download_genome_digest(genome: str) -> str: """Return the genome digest. Args: @@ -320,7 +330,7 @@ async def download_genome_digest(genome: str): @router.get("/genome/{genome}", operation_id=API_ID_GENOME_ATTRS, tags=api_version_tags) -async def download_genome_attributes(genome: str): +async def download_genome_attributes(genome: str) -> dict: """Return a dictionary of genome attributes (archive size, digest, etc.). Args: @@ -339,7 +349,7 @@ async def download_genome_attributes(genome: str): @router.get("/genomes/{asset}", tags=api_version_tags) -async def list_genomes_by_asset(asset: str): +async def list_genomes_by_asset(asset: str) -> list[str]: """Return a list of genomes that have the requested asset defined. Args: diff --git a/refgenieserver/routers/version3.py b/refgenieserver/routers/version3.py index 5b315e6..76b3932 100644 --- a/refgenieserver/routers/version3.py +++ b/refgenieserver/routers/version3.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from copy import copy from datetime import date from enum import Enum @@ -91,7 +93,7 @@ @router.get("/", tags=api_version_tags) @router.get("/index", tags=api_version_tags) -async def index(request: Request): +async def index(request: Request) -> Response: """Return a landing page HTML with the server resources ready to download.""" _LOGGER.debug(f"RefGenConf object:\n{rgc}") templ_vars = { @@ -108,13 +110,13 @@ async def index(request: Request): @router.get( "/remotes/dict", tags=api_version_tags, response_model=Dict[str, Dict[str, str]] ) -async def get_remotes_dict(): +async def get_remotes_dict() -> dict[str, dict[str, str]] | None: """Return the remotes section of the server configuration file.""" return rgc["remotes"] if "remotes" in rgc else None @router.get("/genomes/splash/{genome}", tags=api_version_tags) -async def genome_splash_page(request: Request, genome: str = g): +async def genome_splash_page(request: Request, genome: str = g) -> Response: """Return a genome splash page.""" templ_vars = { "openapi_version": get_openapi_version(app), @@ -139,7 +141,7 @@ async def genome_splash_page(request: Request, genome: str = g): @router.get("/assets/splash/{genome}/{asset}", tags=api_version_tags) async def asset_splash_page( request: Request, genome: str = g, asset: str = a, tag: Optional[str] = tq -): +) -> Response: """Return an asset splash page.""" tag = tag or rgc.get_default_tag( genome, asset @@ -193,7 +195,7 @@ async def asset_splash_page( @router.get("/genomes/list", response_model=List[str], tags=api_version_tags) -async def list_available_genomes(): +async def list_available_genomes() -> list[str]: """Return a list of genome digests this server serves at least one asset for.""" _LOGGER.info("serving genomes list") return list(rgc.genomes[IK]["aliases_raw"].keys()) @@ -205,7 +207,7 @@ async def list_available_genomes(): tags=api_version_tags, operation_id=API_VERSION + API_ID_ALIASES_DICT, ) -async def get_alias_dict(): +async def get_alias_dict() -> dict[str, list[str]]: """Return a dictionary of alias lists keyed by genome digests.""" _LOGGER.info("serving genomes alias dict") return rgc.genomes[IK]["aliases_raw"] @@ -221,7 +223,7 @@ async def list_available_assets( includeSeekKeys: Optional[bool] = Query( False, description="Whether to include seek keys in the response" ), -): +) -> dict[str, list[str]]: """Return a list of assets that can be downloaded, keyed by genome digests.""" ret_dict = ( rgc.list(include_tags=True) if includeSeekKeys else rgc.list_assets_by_genome() @@ -239,7 +241,9 @@ async def list_available_assets( operation_id=API_VERSION + API_ID_ARCHIVE, tags=api_version_tags, ) -async def download_asset(genome: str = g, asset: str = a, tag: Optional[str] = tq): +async def download_asset( + genome: str = g, asset: str = a, tag: Optional[str] = tq +) -> Response: """Return an asset archive. Optionally, 'tag' query parameter can be specified to get a tagged asset @@ -281,7 +285,7 @@ async def get_asset_file_path( remoteClass: RemoteClassEnum = Query( "http", description="Remote data provider class" ), -): +) -> Response: """Return a path to the unarchived asset file. Optionally, query parameters can be specified: @@ -308,7 +312,7 @@ async def get_asset_file_path( response_model=str, tags=api_version_tags, ) -async def get_asset_default_tag(genome: str = g, asset: str = a): +async def get_asset_default_tag(genome: str = g, asset: str = a) -> Response: """Return the default tag name for a genome/asset pair.""" return Response(content=rgc.get_default_tag(genome, asset), media_type="text/plain") @@ -319,7 +323,9 @@ async def get_asset_default_tag(genome: str = g, asset: str = a): response_model=str, tags=api_version_tags, ) -async def get_asset_digest(genome: str = g, asset: str = a, tag: Optional[str] = tq): +async def get_asset_digest( + genome: str = g, asset: str = a, tag: Optional[str] = tq +) -> Response: """Return the asset digest for a genome/asset:tag combination.""" tag = tag or DEFAULT_TAG try: @@ -341,7 +347,9 @@ async def get_asset_digest(genome: str = g, asset: str = a, tag: Optional[str] = response_model=str, tags=api_version_tags, ) -async def get_archive_digest(genome: str = g, asset: str = a, tag: Optional[str] = tq): +async def get_archive_digest( + genome: str = g, asset: str = a, tag: Optional[str] = tq +) -> Response: """Return the archive digest for a genome/asset:tag combination.""" tag = tag or DEFAULT_TAG try: @@ -364,7 +372,7 @@ async def get_archive_digest(genome: str = g, asset: str = a, tag: Optional[str] ) async def download_asset_build_recipe( genome: str = g, asset: str = a, tag: Optional[str] = tq -): +) -> Response: """Return a build recipe. Optionally, 'tag' query parameter can be specified. Default tag is returned @@ -386,7 +394,7 @@ async def download_asset_build_recipe( ) async def download_asset_build_log( genome: str = g, asset: str = a, tag: Optional[str] = tq -): +) -> Response: """Return a build log. Optionally, 'tag' query parameter can be specified. Default tag is returned @@ -408,7 +416,7 @@ async def download_asset_build_log( ) async def download_asset_directory_contents( genome: str = g, asset: str = a, tag: Optional[str] = tq -): +) -> Response: """Return an asset directory tree file. Optionally, 'tag' query parameter can be specified. Default tag is returned @@ -431,7 +439,7 @@ async def download_asset_directory_contents( ) async def download_asset_attributes( genome: str = g, asset: str = a, tag: Optional[str] = tq -): +) -> dict: """Return a dictionary of asset attributes (archive size, digest, etc.). Optionally, 'tag' query parameter can be specified to get tagged asset @@ -466,7 +474,7 @@ async def download_asset_attributes( response_model=Dict[str, str], tags=api_version_tags, ) -async def download_genome_attributes(genome: str = g): +async def download_genome_attributes(genome: str = g) -> dict: """Return a dictionary of genome attributes (archive size, digest, etc.).""" try: attrs = rgc.get_genome_attributes(genome) @@ -481,7 +489,7 @@ async def download_genome_attributes(genome: str = g): @router.get( "/genomes/by_asset/{asset}", response_model=List[str], tags=api_version_tags ) -async def list_genomes_by_asset(asset: str = a): +async def list_genomes_by_asset(asset: str = a) -> list[str]: """Return a list of genomes that have the requested asset defined.""" genomes = rgc.list_genomes_by_asset(asset) _LOGGER.info(f"serving genomes by '{asset}' asset: {genomes}") @@ -494,7 +502,7 @@ async def list_genomes_by_asset(asset: str = a): response_model=str, tags=api_version_tags, ) -async def get_genome_alias_digest(alias: str = al): +async def get_genome_alias_digest(alias: str = al) -> Response: """Return the genome digest for a given alias.""" try: digest = rgc.get_genome_alias_digest(alias=alias) @@ -512,7 +520,7 @@ async def get_genome_alias_digest(alias: str = al): response_model=List[str], tags=api_version_tags, ) -async def get_genome_alias(genome_digest: str = g): +async def get_genome_alias(genome_digest: str = g) -> list[str]: """Return the genome aliases for a given digest.""" try: alias = rgc[CFG_GENOMES_KEY][genome_digest][CFG_ALIASES_KEY] diff --git a/refgenieserver/server_builder.py b/refgenieserver/server_builder.py index f931c73..cccfd0a 100644 --- a/refgenieserver/server_builder.py +++ b/refgenieserver/server_builder.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import logging import sys from glob import glob @@ -21,7 +23,14 @@ _LOGGER = logging.getLogger(PKG_NAME) -def archive(rgc, registry_paths, force, remove, cfg_path, genomes_desc): +def archive( + rgc: RefGenConf, + registry_paths: list[dict] | None, + force: bool, + remove: bool, + cfg_path: str, + genomes_desc: str | None, +) -> None: """Build tar archives for serving with 'refgenieserver serve'. Determines md5 checksums and file sizes and updates the original refgenie @@ -305,7 +314,7 @@ def archive(rgc, registry_paths, force, remove, cfg_path, genomes_desc): _LOGGER.info(f"Builder finished; server config file saved: {rgc_server.file_path}") -def _check_tgz(path, output): +def _check_tgz(path: str, output: str) -> None: """Check if file exists and tar it, using pigz if available. Args: @@ -331,7 +340,9 @@ def _check_tgz(path, output): raise OSError(f"Entity '{path}' does not exist") -def _check_tgz_legacy(path, output, asset_name, genome_name, alias): +def _check_tgz_legacy( + path: str, output: str, asset_name: str, genome_name: str, alias: str | list[str] +) -> None: """Legacy version of _check_tgz, to be removed in the future. Checks if file exists and tars it with alias-based naming. Uses pigz @@ -373,7 +384,7 @@ def _check_tgz_legacy(path, output, asset_name, genome_name, alias): raise OSError(f"Entity '{path}' does not exist") -def _copy_log(input_dir, target_dir, asset_name, tag_name): +def _copy_log(input_dir: str, target_dir: str, asset_name: str, tag_name: str) -> None: """Copy the build log file. Args: @@ -396,7 +407,7 @@ def _copy_log(input_dir, target_dir, asset_name, tag_name): _LOGGER.warning(f"Log not found: {log_path}") -def _copy_asset_dir(input_dir, target_dir): +def _copy_asset_dir(input_dir: str, target_dir: str) -> None: """Copy the asset directory via rsync. Args: @@ -413,7 +424,7 @@ def _copy_asset_dir(input_dir, target_dir): _LOGGER.warning(f"Asset directory not found: {input_dir}") -def _get_asset_dir_contents(asset_dir, asset_name, tag_name): +def _get_asset_dir_contents(asset_dir: str, asset_name: str, tag_name: str) -> None: """Create a JSON file listing the unarchived asset directory contents. Args: @@ -439,7 +450,9 @@ def _get_asset_dir_contents(asset_dir, asset_name, tag_name): ) -def _copy_recipe(input_dir, target_dir, asset_name, tag_name): +def _copy_recipe( + input_dir: str, target_dir: str, asset_name: str, tag_name: str +) -> None: """Copy the build recipe file. Args: @@ -459,7 +472,11 @@ def _copy_recipe(input_dir, target_dir, asset_name, tag_name): _LOGGER.warning(f"Recipe not found: {recipe_path}") -def _remove_archive(rgc, registry_paths, cfg_archive_folder_key=CFG_ARCHIVE_KEY): +def _remove_archive( + rgc: RefGenConf, + registry_paths: list[dict], + cfg_archive_folder_key: str = CFG_ARCHIVE_KEY, +) -> list[str]: """Remove archives and corresponding entries from the RefGenConf object. Args: @@ -513,7 +530,7 @@ def _remove_archive(rgc, registry_paths, cfg_archive_folder_key=CFG_ARCHIVE_KEY) return ret -def _correct_registry_paths(registry_paths): +def _correct_registry_paths(registry_paths: list[dict]) -> list[dict]: """Correct registry paths by swapping 'namespace' and 'item' keys. parse_registry_path recognizes 'item' as the central element, but we @@ -526,7 +543,7 @@ def _correct_registry_paths(registry_paths): Corrected registry paths. """ - def _swap(rp): + def _swap(rp: dict) -> dict: """Swap 'namespace' and 'item' values in a registry path dict. Args: @@ -542,7 +559,7 @@ def _swap(rp): return [_swap(x) if x["namespace"] is None else x for x in registry_paths] -def _get_paths_element(registry_paths, element): +def _get_paths_element(registry_paths: list[dict], element: str) -> list[str | None]: """Extract a specific element from a collection of registry paths. Args: From 051840a3d9a8f2e55adc5ecbe53e363f8662e524 Mon Sep 17 00:00:00 2001 From: nsheff Date: Tue, 10 Feb 2026 19:52:50 -0500 Subject: [PATCH 04/15] prep for new yacman --- refgenieserver/main.py | 2 +- refgenieserver/server_builder.py | 27 ++++++++++++++++----------- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/refgenieserver/main.py b/refgenieserver/main.py index 034ba25..b756eb7 100644 --- a/refgenieserver/main.py +++ b/refgenieserver/main.py @@ -48,7 +48,7 @@ def main() -> None: ) ) # this RefGenConf object will be used in the server, so it's read-only - rgc = RefGenConf(filepath=selected_cfg, writable=False) + rgc = RefGenConf.from_yaml_file(selected_cfg) if args.command == "archive": arp = ( [parse_registry_path(x) for x in args.asset_registry_paths] diff --git a/refgenieserver/server_builder.py b/refgenieserver/server_builder.py index cccfd0a..4967074 100644 --- a/refgenieserver/server_builder.py +++ b/refgenieserver/server_builder.py @@ -6,7 +6,7 @@ from json import dump from subprocess import run -from attmap import PathExAttMap as PXAM +from yacman import write_lock from refgenconf import RefGenConf from refgenconf.exceptions import ( ConfigNotCompliantError, @@ -80,7 +80,7 @@ def archive( # make it RW compatible and point to new target path for server use or initialize a new object if os.path.exists(server_rgc_path): _LOGGER.debug(f"'{server_rgc_path}' file was found and will be updated") - rgc_server = RefGenConf(filepath=server_rgc_path) + rgc_server = RefGenConf.from_yaml_file(server_rgc_path) if remove: if not registry_paths: _LOGGER.error( @@ -88,8 +88,9 @@ def archive( "Use 'asset_registry_path' argument." ) exit(1) - with rgc_server as r: + with write_lock(rgc_server) as r: _remove_archive(r, registry_paths, CFG_ARCHIVE_KEY) + r.write() exit(0) else: if remove: @@ -98,9 +99,9 @@ def archive( ) exit(1) _LOGGER.debug(f"'{server_rgc_path}' file was not found and will be created") - rgc_server = RefGenConf(filepath=rgc.file_path) - rgc_server.make_writable(filepath=server_rgc_path) - rgc_server.make_readonly() + rgc_server = RefGenConf.from_yaml_file(rgc.file_path) + rgc_server.write_copy(server_rgc_path) + rgc_server.filepath = os.path.abspath(server_rgc_path) if registry_paths: genomes = _get_paths_element(registry_paths, "namespace") asset_list = _get_paths_element(registry_paths, "item") @@ -151,9 +152,10 @@ def archive( CFG_GENOME_DESC_KEY: genome_desc, CFG_ALIASES_KEY: genome_aliases, } - with rgc_server as r: - r[CFG_GENOMES_KEY].setdefault(genome, PXAM()) + with write_lock(rgc_server) as r: + r[CFG_GENOMES_KEY].setdefault(genome, {}) r[CFG_GENOMES_KEY][genome].update(genome_attrs) + r.write() _LOGGER.debug(f"Updating '{genome}' genome attributes...") asset = asset_list[counter] if asset_list is not None else None assets = asset or rgc[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY].keys() @@ -174,8 +176,9 @@ def archive( CFG_ASSET_DEFAULT_TAG_KEY: default_tag, } _LOGGER.debug(f"Updating '{genome}/{asset_name}' asset attributes...") - with rgc_server as r: + with write_lock(rgc_server) as r: r.update_assets(genome, asset_name, asset_attrs) + r.write() tag = tag_list[counter] if tag_list is not None else None tags = ( @@ -264,7 +267,7 @@ def archive( {CFG_LEGACY_ARCHIVE_CHECKSUM_KEY: legacy_digest} ) _LOGGER.debug(f"attr dict: {tag_attrs}") - with rgc_server as r: + with write_lock(rgc_server) as r: for parent in parents: # here we update any pre-existing parents' children # attr with the newly added asset @@ -297,6 +300,7 @@ def archive( children=True, ) r.update_tags(genome, asset_name, tag_name, tag_attrs) + r.write() else: exists_msg = f"'{target_file}' exists." try: @@ -307,8 +311,9 @@ def archive( except KeyError: _LOGGER.debug(exists_msg + " Calculating archive digest") tag_attrs = {CFG_ARCHIVE_CHECKSUM_KEY: checksum(target_file)} - with rgc_server as r: + with write_lock(rgc_server) as r: r.update_tags(genome, asset_name, tag_name, tag_attrs) + r.write() counter += 1 _LOGGER.info(f"Builder finished; server config file saved: {rgc_server.file_path}") From f38b2b523667fcc3211d40da540c9ca6d75e59a7 Mon Sep 17 00:00:00 2001 From: nsheff Date: Thu, 12 Feb 2026 14:43:58 -0500 Subject: [PATCH 05/15] update yacman req, version bump --- pyproject.toml | 3 ++- refgenieserver/helpers.py | 12 ++++++------ refgenieserver/routers/version3.py | 9 ++++++--- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index a7e524a..7590e64 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "refgenieserver" -version = "0.7.0" +version = "0.8.0" description = "A web interface and RESTful API for reference genome assets" readme = "README.md" license = "BSD-2-Clause" @@ -27,6 +27,7 @@ dependencies = [ "refgenconf>=0.12.2", "ubiquerg>=0.6.1", "uvicorn>=0.7.1", + "yacman>=0.9.5", ] [project.urls] diff --git a/refgenieserver/helpers.py b/refgenieserver/helpers.py index 0838159..23e2b3f 100644 --- a/refgenieserver/helpers.py +++ b/refgenieserver/helpers.py @@ -11,7 +11,6 @@ from refgenconf.exceptions import RefgenconfError from refgenconf.helpers import send_data_request from ubiquerg import VersionInHelpParser, is_url -from yacman import get_first_env_var if TYPE_CHECKING: from fastapi import FastAPI @@ -31,11 +30,12 @@ def build_parser() -> argparse.ArgumentParser: Returns: The configured argument parser. """ - env_var_val = ( - get_first_env_var(CFG_ENV_VARS)[1] - if get_first_env_var(CFG_ENV_VARS) is not None - else "not set" - ) + env_var_val = "not set" + for var in CFG_ENV_VARS: + val = os.environ.get(var) + if val is not None: + env_var_val = val + break banner = "%(prog)s - refgenie web server utilities" additional_description = ( "For subcommand-specific options, type: '%(prog)s -h'" diff --git a/refgenieserver/routers/version3.py b/refgenieserver/routers/version3.py index 76b3932..6df0386 100644 --- a/refgenieserver/routers/version3.py +++ b/refgenieserver/routers/version3.py @@ -10,7 +10,7 @@ from starlette.requests import Request from starlette.responses import FileResponse, RedirectResponse from ubiquerg import parse_registry_path -from yacman import IK, UndefinedAliasError +from yacman import UndefinedAliasError from ..const import * from ..data_models import Dict, List, Tag @@ -198,7 +198,7 @@ async def asset_splash_page( async def list_available_genomes() -> list[str]: """Return a list of genome digests this server serves at least one asset for.""" _LOGGER.info("serving genomes list") - return list(rgc.genomes[IK]["aliases_raw"].keys()) + return list(rgc[CFG_GENOMES_KEY].keys()) @router.get( @@ -210,7 +210,10 @@ async def list_available_genomes() -> list[str]: async def get_alias_dict() -> dict[str, list[str]]: """Return a dictionary of alias lists keyed by genome digests.""" _LOGGER.info("serving genomes alias dict") - return rgc.genomes[IK]["aliases_raw"] + return { + g: rgc[CFG_GENOMES_KEY][g].get(CFG_ALIASES_KEY, []) + for g in rgc[CFG_GENOMES_KEY].keys() + } @router.get( From 78f72b13dddd73f1a4dcf266640ed485feaa17a6 Mon Sep 17 00:00:00 2001 From: nsheff Date: Fri, 13 Feb 2026 13:19:03 -0500 Subject: [PATCH 06/15] fixes for new refgenconf updates --- refgenieserver/app_factory.py | 81 ++++++++++++++++++++++++++++++++ refgenieserver/helpers.py | 35 +++++++++----- refgenieserver/main.py | 3 +- refgenieserver/server_builder.py | 7 +-- 4 files changed, 110 insertions(+), 16 deletions(-) create mode 100644 refgenieserver/app_factory.py diff --git a/refgenieserver/app_factory.py b/refgenieserver/app_factory.py new file mode 100644 index 0000000..90d9885 --- /dev/null +++ b/refgenieserver/app_factory.py @@ -0,0 +1,81 @@ +"""Factory function to create a configured refgenieserver FastAPI app.""" + +from __future__ import annotations + +import logging +import sys + +from fastapi import FastAPI +from refgenconf import RefGenConf + +from .const import PKG_NAME, TAGS_METADATA, PRIVATE_API +from .helpers import purge_nonservable + +_LOGGER = logging.getLogger(PKG_NAME) + + +def create_app(config_path: str, archive_base_dir: str | None = None) -> FastAPI: + """Create a configured FastAPI app for refgenieserver. + + This builds a fresh FastAPI app with the real refgenieserver routers, + configured from a given YAML config file. Used both for production + (as an alternative to the CLI entry point) and for integration tests. + + Args: + config_path: Path to the refgenie server config YAML. + archive_base_dir: Override for BASE_DIR (default: /genomes). + Used in tests to point at a temp directory. + + Returns: + Configured FastAPI app ready to serve. + """ + # Use sys.modules to get the actual module objects. Using + # `import refgenieserver.main as m` can return a different object + # than what's in sys.modules (due to __init__.py's `from .main import *`), + # which means attribute modifications won't be visible to other modules. + import refgenieserver.main # ensure loaded + import refgenieserver.const # ensure loaded + import refgenieserver.helpers # ensure loaded + + main_module = sys.modules["refgenieserver.main"] + const_module = sys.modules["refgenieserver.const"] + helpers_module = sys.modules["refgenieserver.helpers"] + + # Load config and purge non-servable entries + rgc = RefGenConf.from_yaml_file(config_path) + purge_nonservable(rgc) + + # Override the module-level globals that the routers import. + # The routers do `from ..main import _LOGGER, rgc, app, templates` + # which reads from main's module dict at import time. + main_module.rgc = rgc + main_module._LOGGER = _LOGGER + + if archive_base_dir is not None: + # Must override BASE_DIR in both const and helpers modules, + # because helpers.py uses `from .const import *` which copies + # BASE_DIR into helpers' own namespace. + const_module.BASE_DIR = archive_base_dir + helpers_module.BASE_DIR = archive_base_dir + + from ._version import __version__ as server_v + + app = FastAPI( + title=PKG_NAME, + description="a web interface and RESTful API for reference genome assets", + version=server_v, + openapi_tags=TAGS_METADATA, + ) + + # Set the app on main_module so routers that import `app` from main + # can access it (needed for openapi spec introspection) + main_module.app = app + + # Import routers AFTER rgc is set (they read rgc at import time) + from .routers import private, version3 + + app.include_router(version3.router) + app.include_router(version3.router, prefix="/v3") + app.include_router(private.router, prefix=f"/{PRIVATE_API}") + + return app diff --git a/refgenieserver/helpers.py b/refgenieserver/helpers.py index 23e2b3f..a7d6f13 100644 --- a/refgenieserver/helpers.py +++ b/refgenieserver/helpers.py @@ -244,19 +244,32 @@ def _check_servable(rgc: RefGenConf, genome: str, asset: str, tag: str) -> bool: [r in tag_data for r in [CFG_ARCHIVE_CHECKSUM_KEY, CFG_ARCHIVE_SIZE_KEY]] ) + # Collect items to remove (don't modify during iteration) + to_remove = [] + for genome_name, genome in rgc[CFG_GENOMES_KEY].items(): + if CFG_ASSETS_KEY not in genome: + continue for asset_name, asset in genome[CFG_ASSETS_KEY].items(): - try: - for tag_name, tag in asset[CFG_ASSET_TAGS_KEY].items(): - if not _check_servable(rgc, genome_name, asset_name, tag_name): - _LOGGER.debug( - "Removing '{}/{}:{}', it's not servable".format( - genome_name, asset_name, tag_name - ) + if CFG_ASSET_TAGS_KEY not in asset: + to_remove.append((genome_name, asset_name, None)) + continue + for tag_name in list(asset[CFG_ASSET_TAGS_KEY].keys()): + if not _check_servable(rgc, genome_name, asset_name, tag_name): + _LOGGER.debug( + "Removing '{}/{}:{}', it's not servable".format( + genome_name, asset_name, tag_name ) - rgc.cfg_remove_assets(genome_name, asset_name, tag_name) - except KeyError: - rgc.cfg_remove_assets(genome_name, asset_name) + ) + to_remove.append((genome_name, asset_name, tag_name)) + + # Remove after iteration completes + for genome_name, asset_name, tag_name in to_remove: + try: + rgc.cfg_remove_assets(genome_name, asset_name, tag_name) + except (KeyError, Exception): + _LOGGER.debug(f"Could not remove {genome_name}/{asset_name}:{tag_name}") + return rgc @@ -276,7 +289,7 @@ def safely_get_example( The first result element (if list) or the result itself, or the default. """ try: - res = rgc.__getattr__(rgc_method)(**kwargs) + res = getattr(rgc, rgc_method)(**kwargs) return res[0] if isinstance(res, list) else res except Exception as e: _LOGGER.warning( diff --git a/refgenieserver/main.py b/refgenieserver/main.py index b756eb7..85b4d32 100644 --- a/refgenieserver/main.py +++ b/refgenieserver/main.py @@ -58,8 +58,7 @@ def main() -> None: archive(rgc, arp, args.force, args.remove, selected_cfg, args.genomes_desc) elif args.command == "serve": # the router imports need to be after the RefGenConf object is declared - with rgc as r: - purge_nonservable(r) + purge_nonservable(rgc) from .routers import private, version1, version2, version3 app.include_router(version3.router) diff --git a/refgenieserver/server_builder.py b/refgenieserver/server_builder.py index 4967074..6e071f6 100644 --- a/refgenieserver/server_builder.py +++ b/refgenieserver/server_builder.py @@ -102,6 +102,7 @@ def archive( rgc_server = RefGenConf.from_yaml_file(rgc.file_path) rgc_server.write_copy(server_rgc_path) rgc_server.filepath = os.path.abspath(server_rgc_path) + rgc_server.locker.set_file_path(os.path.abspath(server_rgc_path)) if registry_paths: genomes = _get_paths_element(registry_paths, "namespace") asset_list = _get_paths_element(registry_paths, "item") @@ -158,7 +159,7 @@ def archive( r.write() _LOGGER.debug(f"Updating '{genome}' genome attributes...") asset = asset_list[counter] if asset_list is not None else None - assets = asset or rgc[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY].keys() + assets = asset or list(rgc[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY].keys()) if not assets: _LOGGER.error("No assets found") continue @@ -183,9 +184,9 @@ def archive( tag = tag_list[counter] if tag_list is not None else None tags = ( tag - or rgc[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset_name][ + or list(rgc[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset_name][ CFG_ASSET_TAGS_KEY - ].keys() + ].keys()) ) for tag_name in tags if isinstance(tags, list) else [tags]: if not rgc.is_asset_complete(genome, asset_name, tag_name): From 252f893f97454e968b1bd8e37f42288b15afe938 Mon Sep 17 00:00:00 2001 From: nsheff Date: Fri, 13 Feb 2026 13:51:29 -0500 Subject: [PATCH 07/15] update fastapi conventions --- refgenieserver/main.py | 3 +++ refgenieserver/routers/version3.py | 22 +++++++++++----------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/refgenieserver/main.py b/refgenieserver/main.py index 85b4d32..e3c821c 100644 --- a/refgenieserver/main.py +++ b/refgenieserver/main.py @@ -61,6 +61,9 @@ def main() -> None: purge_nonservable(rgc) from .routers import private, version1, version2, version3 + # v3 is registered at both root (latest/default API) and /v3 (versioned). + # This intentional dual-registration causes harmless "Duplicate Operation ID" + # warnings from FastAPI. These only affect OpenAPI codegen tools, not API usage. app.include_router(version3.router) app.include_router(version1.router, prefix="/v1") app.include_router(version2.router, prefix="/v2") diff --git a/refgenieserver/routers/version3.py b/refgenieserver/routers/version3.py index 6df0386..5c7c30f 100644 --- a/refgenieserver/routers/version3.py +++ b/refgenieserver/routers/version3.py @@ -50,41 +50,41 @@ g = Path( ..., description="Genome digest", - regex=r"^\w+$", + pattern=r"^\w+$", max_length=48, min_length=48, - example=ex_digest, + examples={"default": ex_digest}, ) al = Path( ..., description="Genome alias", - regex=r"^\S+$", - example=ex_alias, + pattern=r"^\S+$", + examples={"default": ex_alias}, ) a = Path( ..., description="Asset name", - regex=r"^\S+$", - example=ex_asset, + pattern=r"^\S+$", + examples={"default": ex_asset}, ) s = Path( ..., description="Seek key name", - regex=r"^\S+$", - example=ex_asset, + pattern=r"^\S+$", + examples={"default": ex_asset}, ) t = Path( ..., description="Tag name", - regex=r"^\S+$", - example=DEFAULT_TAG, + pattern=r"^\S+$", + examples={"default": DEFAULT_TAG}, ) # API query parameter definitions tq = Query( None, description="Tag name", - regex=r"^\S+$", + pattern=r"^\S+$", ) api_version_tags = [API3_ID] From ef4ecd6f7c076a588b23ba598225e4ffa7ed6f39 Mon Sep 17 00:00:00 2001 From: nsheff Date: Fri, 13 Feb 2026 14:07:57 -0500 Subject: [PATCH 08/15] format --- pyproject.toml | 2 +- refgenieserver/server_builder.py | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 7590e64..ca23287 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ dependencies = [ "fastapi", "jinja2", "logmuse>=0.2", - "refgenconf>=0.12.2", + "refgenconf>=0.13.0", "ubiquerg>=0.6.1", "uvicorn>=0.7.1", "yacman>=0.9.5", diff --git a/refgenieserver/server_builder.py b/refgenieserver/server_builder.py index 6e071f6..58b0273 100644 --- a/refgenieserver/server_builder.py +++ b/refgenieserver/server_builder.py @@ -182,11 +182,10 @@ def archive( r.write() tag = tag_list[counter] if tag_list is not None else None - tags = ( - tag - or list(rgc[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset_name][ + tags = tag or list( + rgc[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset_name][ CFG_ASSET_TAGS_KEY - ].keys()) + ].keys() ) for tag_name in tags if isinstance(tags, list) else [tags]: if not rgc.is_asset_complete(genome, asset_name, tag_name): From ee3f9a3409bf404c0de844ccf657b9fb890e5881 Mon Sep 17 00:00:00 2001 From: nsheff Date: Wed, 25 Feb 2026 07:49:19 -0500 Subject: [PATCH 09/15] changelog for release --- changelog.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/changelog.md b/changelog.md index 9602475..199609f 100644 --- a/changelog.md +++ b/changelog.md @@ -2,6 +2,22 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. +## [0.8.0] -- 2026-02-25 + +### Changed +- Updated yacman and refgenconf dependency requirements +- Modernized packaging to use pyproject.toml with hatchling +- Updated FastAPI route definitions to use `pattern` instead of deprecated `regex` +- Updated GitHub Actions to modern versions + +### Added +- `app_factory.create_app()` function for programmatic server creation + +### Fixed +- Compatibility with yacman v1 (`with rgc as r:` context manager removed) +- Compatibility with refgenconf 0.13.0 +- Various modernization and small bugfix improvements + ## [0.7.0] -- 2021-04-27 ### Added - `remotes` section in the refgenieserver config, which supersedes `remote_url_base`. It can be used to define multiple remote data providers. From 589378fc7cc4eac4e270a3efa440188ac1be44e5 Mon Sep 17 00:00:00 2001 From: nsheff Date: Wed, 25 Feb 2026 12:16:41 -0500 Subject: [PATCH 10/15] format --- refgenieserver/app_factory.py | 8 ++++---- refgenieserver/server_builder.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/refgenieserver/app_factory.py b/refgenieserver/app_factory.py index 90d9885..51602f2 100644 --- a/refgenieserver/app_factory.py +++ b/refgenieserver/app_factory.py @@ -8,7 +8,7 @@ from fastapi import FastAPI from refgenconf import RefGenConf -from .const import PKG_NAME, TAGS_METADATA, PRIVATE_API +from .const import PKG_NAME, PRIVATE_API, TAGS_METADATA from .helpers import purge_nonservable _LOGGER = logging.getLogger(PKG_NAME) @@ -33,9 +33,9 @@ def create_app(config_path: str, archive_base_dir: str | None = None) -> FastAPI # `import refgenieserver.main as m` can return a different object # than what's in sys.modules (due to __init__.py's `from .main import *`), # which means attribute modifications won't be visible to other modules. - import refgenieserver.main # ensure loaded - import refgenieserver.const # ensure loaded - import refgenieserver.helpers # ensure loaded + import refgenieserver.const # noqa: F401 ensure loaded + import refgenieserver.helpers # noqa: F401 ensure loaded + import refgenieserver.main # noqa: F401 ensure loaded main_module = sys.modules["refgenieserver.main"] const_module = sys.modules["refgenieserver.const"] diff --git a/refgenieserver/server_builder.py b/refgenieserver/server_builder.py index 58b0273..91a837f 100644 --- a/refgenieserver/server_builder.py +++ b/refgenieserver/server_builder.py @@ -6,7 +6,6 @@ from json import dump from subprocess import run -from yacman import write_lock from refgenconf import RefGenConf from refgenconf.exceptions import ( ConfigNotCompliantError, @@ -16,6 +15,7 @@ ) from refgenconf.helpers import replace_str_in_obj, swap_names_in_tree from ubiquerg import checksum, is_command_callable, parse_registry_path, size +from yacman import write_lock from .const import * From 5c09002f17acaa46fec61cfa7f0a90cbe0dccb40 Mon Sep 17 00:00:00 2001 From: nsheff Date: Wed, 25 Feb 2026 12:20:37 -0500 Subject: [PATCH 11/15] update python in container --- Dockerfile | 4 +++- pyproject.toml | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index eed51c2..d51f12a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,7 @@ -FROM tiangolo/uvicorn-gunicorn:python3.7-alpine3.8 +FROM python:3.12-slim LABEL authors="Nathan Sheffield, Michal Stolarczyk" COPY . /app +WORKDIR /app RUN pip install . +CMD ["uvicorn", "refgenieserver.main:app", "--host", "0.0.0.0", "--port", "80"] diff --git a/pyproject.toml b/pyproject.toml index ca23287..7a08fd9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ name = "refgenieserver" version = "0.8.0" description = "A web interface and RESTful API for reference genome assets" readme = "README.md" -license = "BSD-2-Clause" +license = {text = "BSD-2-Clause"} requires-python = ">=3.10" authors = [ { name = "Michal Stolarczyk" }, From 323ca4f17484bde659058b0b2db37e47775a4b95 Mon Sep 17 00:00:00 2001 From: nsheff Date: Wed, 25 Feb 2026 12:24:58 -0500 Subject: [PATCH 12/15] and staging dockerfile --- staging.Dockerfile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/staging.Dockerfile b/staging.Dockerfile index f74376b..d51f12a 100644 --- a/staging.Dockerfile +++ b/staging.Dockerfile @@ -1,6 +1,7 @@ -FROM tiangolo/uvicorn-gunicorn:python3.7-alpine3.8 +FROM python:3.12-slim LABEL authors="Nathan Sheffield, Michal Stolarczyk" COPY . /app -#RUN pip install https://github.com/refgenie/refgenconf/archive/dev.zip +WORKDIR /app RUN pip install . +CMD ["uvicorn", "refgenieserver.main:app", "--host", "0.0.0.0", "--port", "80"] From 2a9acc6ab756df26df44eb674b439e48b054d295 Mon Sep 17 00:00:00 2001 From: nsheff Date: Wed, 25 Feb 2026 12:36:38 -0500 Subject: [PATCH 13/15] update templates, and refgenconf update --- pyproject.toml | 2 +- refgenieserver/helpers.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 7a08fd9..2b4d42a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,7 +44,7 @@ build-backend = "setuptools.build_meta" include = ["refgenieserver*"] [tool.setuptools.package-data] -refgenieserver = ["templates/*", "static/*"] +refgenieserver = ["templates/**", "static/*"] [project.optional-dependencies] test = [ diff --git a/refgenieserver/helpers.py b/refgenieserver/helpers.py index a7d6f13..758633d 100644 --- a/refgenieserver/helpers.py +++ b/refgenieserver/helpers.py @@ -332,7 +332,7 @@ def create_asset_file_path( msg = MSG_404.format(f"asset ({genome}/{asset}:{tag})") _LOGGER.warning(msg) raise HTTPException(status_code=404, detail=msg) - tag_dict = rgc.genomes[genome][CFG_ASSETS_KEY][asset][CFG_ASSET_TAGS_KEY][tag] + tag_dict = rgc[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset][CFG_ASSET_TAGS_KEY][tag] if seek_key not in tag_dict[CFG_SEEK_KEYS_KEY]: msg = MSG_404.format(f"seek_key ({genome}/{asset}.{seek_key}:{tag})") _LOGGER.warning(msg) From 4b8434f9bd73b4fb4018436bf6233a6ebb9da450 Mon Sep 17 00:00:00 2001 From: nsheff Date: Wed, 25 Feb 2026 12:38:06 -0500 Subject: [PATCH 14/15] format --- refgenieserver/helpers.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/refgenieserver/helpers.py b/refgenieserver/helpers.py index 758633d..b341cab 100644 --- a/refgenieserver/helpers.py +++ b/refgenieserver/helpers.py @@ -332,7 +332,9 @@ def create_asset_file_path( msg = MSG_404.format(f"asset ({genome}/{asset}:{tag})") _LOGGER.warning(msg) raise HTTPException(status_code=404, detail=msg) - tag_dict = rgc[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset][CFG_ASSET_TAGS_KEY][tag] + tag_dict = rgc[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset][CFG_ASSET_TAGS_KEY][ + tag + ] if seek_key not in tag_dict[CFG_SEEK_KEYS_KEY]: msg = MSG_404.format(f"seek_key ({genome}/{asset}.{seek_key}:{tag})") _LOGGER.warning(msg) From 60a2b50c3d880bfbc02135655baf2b874eb47aba Mon Sep 17 00:00:00 2001 From: nsheff Date: Wed, 25 Feb 2026 12:46:41 -0500 Subject: [PATCH 15/15] update for fastapi behavior change --- refgenieserver/routers/version3.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/refgenieserver/routers/version3.py b/refgenieserver/routers/version3.py index 5c7c30f..0049d8b 100644 --- a/refgenieserver/routers/version3.py +++ b/refgenieserver/routers/version3.py @@ -518,18 +518,18 @@ async def get_genome_alias_digest(alias: str = al) -> Response: @router.get( - "/genomes/aliases/{genome_digest}", + "/genomes/aliases/{genome}", operation_id=API_VERSION + API_ID_ALIAS_ALIAS, response_model=List[str], tags=api_version_tags, ) -async def get_genome_alias(genome_digest: str = g) -> list[str]: +async def get_genome_alias(genome: str = g) -> list[str]: """Return the genome aliases for a given digest.""" try: - alias = rgc[CFG_GENOMES_KEY][genome_digest][CFG_ALIASES_KEY] - _LOGGER.info(f"alias returned for '{genome_digest}': {alias}") + alias = rgc[CFG_GENOMES_KEY][genome][CFG_ALIASES_KEY] + _LOGGER.info(f"alias returned for '{genome}': {alias}") return alias except (KeyError, UndefinedAliasError): - msg = MSG_404.format(f"genome ({genome_digest})") + msg = MSG_404.format(f"genome ({genome})") _LOGGER.warning(msg) raise HTTPException(status_code=404, detail=msg)