Skip to content
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@
- switch changelog bot trigger only on comments ([#4241](https://github.com/nf-core/tools/pull/4241))
- fix indentation in generated api docs ([#4245](https://github.com/nf-core/tools/pull/4245))

### Linting

- Store `EDAM.tsv` in `NFCORE_CACHE_DIR` and fix yaml comment loss ([#4242](https://github.com/nf-core/tools/pull/4242))

### Modules

- Allow task.ext.prefix2 in modules linting ([#4234](https://github.com/nf-core/tools/pull/4234))
Expand Down
25 changes: 20 additions & 5 deletions nf_core/modules/lint/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -611,13 +611,21 @@ def _add_edam_ontologies(section, edam_formats, desc):
section["ontologies"] = []
log.debug(f"expected ontologies for {desc}: {expected_ontologies}")
log.debug(f"current ontologies for {desc}: {current_ontologies}")
for ontology, ext in expected_ontologies:
if ontology not in current_ontologies:
for ontology_url, ext in expected_ontologies:
comment_text = edam_formats[ext][1]
if ontology_url not in current_ontologies:
try:
section["ontologies"].append(ruamel.yaml.comments.CommentedMap({"edam": ontology}))
section["ontologies"][-1].yaml_add_eol_comment(f"{edam_formats[ext][1]}", "edam")
cm = ruamel.yaml.comments.CommentedMap()
cm["edam"] = ontology_url
cm.yaml_add_eol_comment(comment_text, key="edam")
section["ontologies"].append(cm)
except KeyError:
log.warning(f"Could not add ontologies in {desc}")
else:
for item in section["ontologies"]:
if isinstance(item, ruamel.yaml.comments.CommentedMap) and item.get("edam") == ontology_url:
item.yaml_add_eol_comment(comment_text, key="edam")
break

# EDAM ontologies
edam_formats = nf_core.modules.modules_utils.load_edam()
Expand Down Expand Up @@ -681,7 +689,14 @@ def _add_edam_ontologies(section, edam_formats, desc):

def _ensure_string_keys(obj):
"""Recursively ensure all dict keys are strings (e.g., convert 1.2 -> "1.2")"""
if isinstance(obj, dict):
# This first block is needed to keep the comments in the yml
if isinstance(obj, ruamel.yaml.comments.CommentedMap):
Comment thread
LouisLeNezet marked this conversation as resolved.
for key in list(obj.keys()):
value = obj.pop(key)
new_key = str(key) if not isinstance(key, str) else key
obj[new_key] = _ensure_string_keys(value)
return obj
elif isinstance(obj, dict):
return {str(k) if not isinstance(k, str) else k: _ensure_string_keys(v) for k, v in obj.items()}
elif isinstance(obj, list):
return [_ensure_string_keys(item) for item in obj]
Expand Down
44 changes: 38 additions & 6 deletions nf_core/modules/modules_utils.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
import logging
import time
from pathlib import Path
from urllib.parse import urlparse

import requests

from nf_core.utils import NFCORE_CACHE_DIR

from ..components.nfcore_component import NFCoreComponent

log = logging.getLogger(__name__)

EDAM_TSV_URL = "https://edamontology.org/EDAM.tsv"
EDAM_CACHE_TTL = 7 * 24 * 60 * 60 # one week


class ModuleExceptionError(Exception):
"""Exception raised when there was an error with module commands"""
Expand Down Expand Up @@ -96,15 +102,41 @@ def get_installed_modules(directory: Path, repo_type="modules") -> tuple[list[st
return local_modules, nfcore_modules


def cache_is_expired(path: Path) -> bool:
"""Return True if the cache file is older than the configured TTL."""
age = time.time() - path.stat().st_mtime
return age > EDAM_CACHE_TTL


def load_edam():
"""Load the EDAM ontology from the nf-core repository"""
edam_formats = {}
try:
response = requests.get("https://edamontology.org/EDAM.tsv")
Comment thread
LouisLeNezet marked this conversation as resolved.
except requests.exceptions.RequestException as e:
log.warning(f"Failed to load EDAM ontology: {e}")
return edam_formats
for line in response.content.splitlines():
cache_path = Path(NFCORE_CACHE_DIR) / "EDAM.tsv"

# Remove stale cache file
if cache_path.exists() and cache_is_expired(cache_path):
log.debug("Cached EDAM ontology expired; removing old cache file")
cache_path.unlink(missing_ok=True)

if not cache_path.exists():
log.debug("EDAM.tsv file not found in NFCORE_CACHE_DIR; downloading")
try:
response = requests.get(EDAM_TSV_URL, timeout=15)
response.raise_for_status()
data_bytes = response.content
cache_path.write_bytes(data_bytes)
except requests.exceptions.RequestException as e:
log.warning(f"Failed to download EDAM ontology: {e}")
return edam_formats
else:
log.debug("Using EDAM.tsv file found in NFCORE_CACHE_DIR")
try:
data_bytes = cache_path.read_bytes()
except OSError as e:
log.warning(f"Failed to load EDAM ontology: {e}")
return edam_formats

for line in data_bytes.splitlines():
fields = line.decode("utf-8").split("\t")
if fields[0].split("/")[-1].startswith("format") and fields[14]: # We choose an already provided extension
extensions = fields[14].split("|")
Expand Down
37 changes: 37 additions & 0 deletions tests/modules/test_modules_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
from pathlib import Path
from unittest.mock import patch

import nf_core.modules.modules_utils

from ..test_modules import TestModules
Expand Down Expand Up @@ -82,3 +85,37 @@ def test_filter_modules_by_name_empty_list(self):

filtered = nf_core.modules.modules_utils.filter_modules_by_name(modules, "fastqc")
assert len(filtered) == 0

@patch("nf_core.modules.modules_utils.NFCORE_CACHE_DIR", new="test_cache")
def test_load_edam(self):
"""Test EDAM ontology loading"""

cache_dir = Path("test_cache")
cache_path = cache_dir / "EDAM.tsv"

# Ensure clean state
if cache_dir.exists():
for f in cache_dir.iterdir():
f.unlink()
cache_dir.rmdir()

cache_dir.mkdir()

# Cache should not exist before loading
assert not cache_path.exists()

edam_formats = nf_core.modules.modules_utils.load_edam()

# Cache file should now exist
assert cache_path.exists()

first_key, first_value = next(iter(edam_formats.items()))

assert isinstance(first_key, str)
assert isinstance(first_value, tuple)
assert len(first_value) == 2

# Cleanup (important since we're not using tmp_path)
for f in cache_dir.iterdir():
f.unlink()
cache_dir.rmdir()
Loading