diff --git a/CHANGELOG.md b/CHANGELOG.md index 8166c4c5eb..6f50b9a683 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,10 @@ - switch changelog bot trigger only on comments ([#4241](https://github.com/nf-core/tools/pull/4241)) - fix indentation in generated api docs ([#4245](https://github.com/nf-core/tools/pull/4245)) +### Linting + +- Store `EDAM.tsv` in `NFCORE_CACHE_DIR` and fix yaml comment loss ([#4242](https://github.com/nf-core/tools/pull/4242)) + ### Modules - Allow task.ext.prefix2 in modules linting ([#4234](https://github.com/nf-core/tools/pull/4234)) diff --git a/nf_core/modules/lint/__init__.py b/nf_core/modules/lint/__init__.py index f2472db0d3..2f80b9f974 100644 --- a/nf_core/modules/lint/__init__.py +++ b/nf_core/modules/lint/__init__.py @@ -611,13 +611,21 @@ def _add_edam_ontologies(section, edam_formats, desc): section["ontologies"] = [] log.debug(f"expected ontologies for {desc}: {expected_ontologies}") log.debug(f"current ontologies for {desc}: {current_ontologies}") - for ontology, ext in expected_ontologies: - if ontology not in current_ontologies: + for ontology_url, ext in expected_ontologies: + comment_text = edam_formats[ext][1] + if ontology_url not in current_ontologies: try: - section["ontologies"].append(ruamel.yaml.comments.CommentedMap({"edam": ontology})) - section["ontologies"][-1].yaml_add_eol_comment(f"{edam_formats[ext][1]}", "edam") + cm = ruamel.yaml.comments.CommentedMap() + cm["edam"] = ontology_url + cm.yaml_add_eol_comment(comment_text, key="edam") + section["ontologies"].append(cm) except KeyError: log.warning(f"Could not add ontologies in {desc}") + else: + for item in section["ontologies"]: + if isinstance(item, ruamel.yaml.comments.CommentedMap) and item.get("edam") == ontology_url: + item.yaml_add_eol_comment(comment_text, key="edam") + break # EDAM ontologies edam_formats = nf_core.modules.modules_utils.load_edam() @@ -681,7 +689,14 @@ def _add_edam_ontologies(section, edam_formats, desc): def _ensure_string_keys(obj): """Recursively ensure all dict keys are strings (e.g., convert 1.2 -> "1.2")""" - if isinstance(obj, dict): + # This first block is needed to keep the comments in the yml + if isinstance(obj, ruamel.yaml.comments.CommentedMap): + for key in list(obj.keys()): + value = obj.pop(key) + new_key = str(key) if not isinstance(key, str) else key + obj[new_key] = _ensure_string_keys(value) + return obj + elif isinstance(obj, dict): return {str(k) if not isinstance(k, str) else k: _ensure_string_keys(v) for k, v in obj.items()} elif isinstance(obj, list): return [_ensure_string_keys(item) for item in obj] diff --git a/nf_core/modules/modules_utils.py b/nf_core/modules/modules_utils.py index 7b38369e8c..21496fdef3 100644 --- a/nf_core/modules/modules_utils.py +++ b/nf_core/modules/modules_utils.py @@ -1,13 +1,19 @@ import logging +import time from pathlib import Path from urllib.parse import urlparse import requests +from nf_core.utils import NFCORE_CACHE_DIR + from ..components.nfcore_component import NFCoreComponent log = logging.getLogger(__name__) +EDAM_TSV_URL = "https://edamontology.org/EDAM.tsv" +EDAM_CACHE_TTL = 7 * 24 * 60 * 60 # one week + class ModuleExceptionError(Exception): """Exception raised when there was an error with module commands""" @@ -96,15 +102,41 @@ def get_installed_modules(directory: Path, repo_type="modules") -> tuple[list[st return local_modules, nfcore_modules +def cache_is_expired(path: Path) -> bool: + """Return True if the cache file is older than the configured TTL.""" + age = time.time() - path.stat().st_mtime + return age > EDAM_CACHE_TTL + + def load_edam(): """Load the EDAM ontology from the nf-core repository""" edam_formats = {} - try: - response = requests.get("https://edamontology.org/EDAM.tsv") - except requests.exceptions.RequestException as e: - log.warning(f"Failed to load EDAM ontology: {e}") - return edam_formats - for line in response.content.splitlines(): + cache_path = Path(NFCORE_CACHE_DIR) / "EDAM.tsv" + + # Remove stale cache file + if cache_path.exists() and cache_is_expired(cache_path): + log.debug("Cached EDAM ontology expired; removing old cache file") + cache_path.unlink(missing_ok=True) + + if not cache_path.exists(): + log.debug("EDAM.tsv file not found in NFCORE_CACHE_DIR; downloading") + try: + response = requests.get(EDAM_TSV_URL, timeout=15) + response.raise_for_status() + data_bytes = response.content + cache_path.write_bytes(data_bytes) + except requests.exceptions.RequestException as e: + log.warning(f"Failed to download EDAM ontology: {e}") + return edam_formats + else: + log.debug("Using EDAM.tsv file found in NFCORE_CACHE_DIR") + try: + data_bytes = cache_path.read_bytes() + except OSError as e: + log.warning(f"Failed to load EDAM ontology: {e}") + return edam_formats + + for line in data_bytes.splitlines(): fields = line.decode("utf-8").split("\t") if fields[0].split("/")[-1].startswith("format") and fields[14]: # We choose an already provided extension extensions = fields[14].split("|") diff --git a/tests/modules/test_modules_utils.py b/tests/modules/test_modules_utils.py index 8ef8d9d404..889c0ee2d4 100644 --- a/tests/modules/test_modules_utils.py +++ b/tests/modules/test_modules_utils.py @@ -1,3 +1,6 @@ +from pathlib import Path +from unittest.mock import patch + import nf_core.modules.modules_utils from ..test_modules import TestModules @@ -82,3 +85,37 @@ def test_filter_modules_by_name_empty_list(self): filtered = nf_core.modules.modules_utils.filter_modules_by_name(modules, "fastqc") assert len(filtered) == 0 + + @patch("nf_core.modules.modules_utils.NFCORE_CACHE_DIR", new="test_cache") + def test_load_edam(self): + """Test EDAM ontology loading""" + + cache_dir = Path("test_cache") + cache_path = cache_dir / "EDAM.tsv" + + # Ensure clean state + if cache_dir.exists(): + for f in cache_dir.iterdir(): + f.unlink() + cache_dir.rmdir() + + cache_dir.mkdir() + + # Cache should not exist before loading + assert not cache_path.exists() + + edam_formats = nf_core.modules.modules_utils.load_edam() + + # Cache file should now exist + assert cache_path.exists() + + first_key, first_value = next(iter(edam_formats.items())) + + assert isinstance(first_key, str) + assert isinstance(first_value, tuple) + assert len(first_value) == 2 + + # Cleanup (important since we're not using tmp_path) + for f in cache_dir.iterdir(): + f.unlink() + cache_dir.rmdir()