Skip to content
27 changes: 27 additions & 0 deletions .github/workflows/run-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,22 @@ jobs:
- '3.13'
mode:
- normal
vendored_dandiapi:
# Allow vendor information for the dandi-api instance to default to
# the default values specified in
# dandi/tests/data/dandiarchive-docker/docker-compose.yml
- default
include:
- os: ubuntu-latest
python: '3.10'
mode: dandi-api
- os: ubuntu-latest
python: '3.10'
mode: dandi-api
vendored_dandiapi: ember-dandi
instance_name: EMBER-DANDI
instance_identifier: 'RRID:SCR_026700'
doi_prefix: '10.82754'
- os: ubuntu-latest
python: 3.13
mode: obolibrary-only
Expand Down Expand Up @@ -76,6 +88,21 @@ jobs:
python -m pip install --upgrade pip wheel
pip install ".[extras,test]"

# Set only if matrix.instance_name is defined
- name: Set DANDI_TESTS_INSTANCE_NAME
if: ${{ matrix.instance_name }}
run: echo "DANDI_TESTS_INSTANCE_NAME=${{ matrix.instance_name }}" >> "$GITHUB_ENV"

# Set only if matrix.instance_identifier is defined
- name: Set DANDI_TESTS_INSTANCE_IDENTIFIER
if: ${{ matrix.instance_identifier }}
run: echo "DANDI_TESTS_INSTANCE_IDENTIFIER=${{ matrix.instance_identifier }}" >> "$GITHUB_ENV"

# Set only if matrix.doi_prefix is defined
- name: Set DANDI_TESTS_DOI_PREFIX
if: ${{ matrix.doi_prefix }}
run: echo "DANDI_TESTS_DOI_PREFIX=${{ matrix.doi_prefix }}" >> "$GITHUB_ENV"

- name: Install dev versions of select dependencies
if: matrix.mode == 'dev-deps'
run: |
Expand Down
13 changes: 11 additions & 2 deletions dandi/cli/cmd_service_scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
from requests.auth import HTTPBasicAuth
from requests.exceptions import HTTPError

from dandi.consts import known_instances

from .base import ChoiceList, instance_option, map_to_click_exceptions
from .. import __version__, lgr
from ..dandiapi import DandiAPIClient, RemoteBlobAsset, RESTFullAPIClient
Expand Down Expand Up @@ -246,8 +248,15 @@ def update_dandiset_from_doi(
Update the metadata for the draft version of a Dandiset with information
from a given DOI record.
"""
if dandiset.startswith("DANDI:"):
dandiset = dandiset[6:]
known_instance_names = [k.upper() for k in known_instances.keys()]

# Strip instance name prefix from dandiset ID, if present
for name in known_instance_names:
dandiset_id_prefix = f"{name}:"
if dandiset.startswith(dandiset_id_prefix):
dandiset = dandiset[len(dandiset_id_prefix) :]
break

start_time = datetime.now().astimezone()
with DandiAPIClient.for_dandi_instance(dandi_instance, authenticate=True) as client:
with RESTFullAPIClient(
Expand Down
5 changes: 3 additions & 2 deletions dandi/cli/tests/test_service_scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import anys
from click.testing import CliRunner
from dandischema.consts import DANDI_SCHEMA_VERSION
from dandischema.models import ID_PATTERN
import pytest

from dandi import __version__
Expand Down Expand Up @@ -105,13 +106,13 @@ def test_update_dandiset_from_doi(
metadata = new_dandiset.dandiset.get_raw_metadata()
with (DATA_DIR / "update_dandiset_from_doi" / f"{name}.json").open() as fp:
expected = json.load(fp)
expected["id"] = f"DANDI:{dandiset_id}/draft"
expected["id"] = anys.AnyFullmatch(rf"{ID_PATTERN}:{dandiset_id}/draft")
expected["url"] = f"{repository}/dandiset/{dandiset_id}/draft"
expected["@context"] = (
"https://raw.githubusercontent.com/dandi/schema/master/releases"
f"/{DANDI_SCHEMA_VERSION}/context.json"
)
expected["identifier"] = f"DANDI:{dandiset_id}"
expected["identifier"] = anys.AnyFullmatch(rf"{ID_PATTERN}:{dandiset_id}")
expected["repository"] = repository
expected["dateCreated"] = anys.ANY_AWARE_DATETIME_STR
expected["schemaVersion"] = DANDI_SCHEMA_VERSION
Expand Down
3 changes: 2 additions & 1 deletion dandi/dandiarchive.py
Original file line number Diff line number Diff line change
Expand Up @@ -743,7 +743,8 @@ def parse(
continue
groups = match.groupdict()
if "instance_name" in groups:
# map to lower case so we could immediately map DANDI: into "dandi" instance
# map to lower case so we could immediately map an instance name to
# an instance in `dandi.consts.known_instances`
groups["instance_name"] = groups["instance_name"].lower()
lgr.log(5, "Matched %r into %s", url, groups)
rewrite = settings.get("rewrite", False)
Expand Down
12 changes: 1 addition & 11 deletions dandi/dandiset.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,17 +127,7 @@ def _get_identifier(metadata: dict) -> str | None:
id_ = metadata["identifier"]
lgr.debug("Found identifier %s in top level 'identifier'", str(id_))

if isinstance(id_, dict):
# New formalized model, but see below DANDI: way
# TODO: add schemaVersion handling but only after we have them provided
# in all metadata records from dandi-api server
if id_.get("propertyID") != "DANDI":
raise ValueError(
f"Got following identifier record when was expecting a record "
f"with 'propertyID: DANDI': {id_}"
)
id_ = str(id_.get("value", ""))
elif id_ is not None:
if id_ is not None:
assert isinstance(id_, str)
# result of https://github.com/dandi/dandi-cli/pull/348 which ???
# TODO: RF to avoid this evil!!!
Expand Down
3 changes: 2 additions & 1 deletion dandi/tests/test_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import time
from unittest import mock

from dandischema.models import ID_PATTERN
import numpy as np
import pytest
from pytest_mock import MockerFixture
Expand Down Expand Up @@ -258,7 +259,7 @@ def test_download_dandiset_yaml(text_dandiset: SampleDandiset, tmp_path: Path) -
assert list_paths(tmp_path, dirs=True) == [tmp_path / dandiset_metadata_file]
with (tmp_path / dandiset_metadata_file).open(encoding="utf-8") as fp:
metadata = yaml_load(fp)
assert metadata["id"] == f"DANDI:{dandiset_id}/draft"
assert re.match(rf"^{ID_PATTERN}:{dandiset_id}/draft$", metadata["id"])


def test_download_asset_id(text_dandiset: SampleDandiset, tmp_path: Path) -> None:
Expand Down
65 changes: 65 additions & 0 deletions dandi/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
get_utcnow_datetime,
is_page2_url,
is_same_time,
is_url,
on_windows,
post_upload_size_check,
under_paths,
Expand Down Expand Up @@ -589,3 +590,67 @@ def test_post_upload_size_check_erroring(
logging.ERROR,
f"Size of {p} was 42 at start of upload but is now 19 after upload",
) in caplog.record_tuples


class TestIsUrl:
@pytest.mark.parametrize(
"s",
[
# Standard valid HTTP/FTP URLs
"http://example.com",
"https://example.com",
"https://example.com/path",
"https://example.com/path?query=1#frag",
"https://example.com:8443/path",
"http://127.0.0.1:8000",
"ftp://example.com/path/file.txt",
"ftp://user:pass@example.com/dir",
# These pass pydantic validation but are not very useful URLs
"http:/example.com",
# Typical DANDI Archive dandiset URLs (also valid HTTP URLs)
"https://dandiarchive.org/dandiset/000027",
"https://dandiarchive.org/dandiset/000027/draft",
"https://dandiarchive.org/dandiset/000027/0.210428.2206",
# DANDI identifiers and ids
"DANDI:123456",
"DANDI:123456/draft",
"DANDI:123456/1.123456.1234",
"DANDI-SANDBOX:123456",
"DANDI-SANDBOX:123456/draft",
"DANDI-SANDBOX:123456/1.123456.1234",
# Customized DANDI URLs
"dandi://dandi/123456",
"dandi://dandi/123456/draft",
"dandi://dandi/123456/1.123456.1234",
"dandi://dandi-sandbox/123456",
"dandi://dandi-sandbox/123456/draft",
"dandi://dandi-sandbox/123456/1.123456.1234",
],
)
def test_valid_urls(self, s: str) -> None:
assert is_url(s) is True

@pytest.mark.parametrize(
"s",
[
# Clearly invalid URLs
"not a url",
"example",
"example .com",
"://example.com",
"",
" ",
# DANDI-like string that should not be treated as a valid DANDI URL
"dandi://not-a-real-dandiset",
# Invalid DANDI identifiers and ids because of unknown instance name
"FAKEDANDI:123456",
"FAKEDANDI:123456/draft",
"FAKEDANDI:123456/1.123456.1234",
# Customized DANDI URLs
"dandi://fakedandi/123456",
"dandi://fakedandi/123456/draft",
"dandi://fakedandi/123456/1.123456.1234",
],
)
def test_invalid_urls(self, s: str) -> None:
assert is_url(s) is False
35 changes: 29 additions & 6 deletions dandi/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

import dateutil.parser
from multidict import MultiDict # dependency of yarl
from pydantic import BaseModel, Field
from pydantic import AnyHttpUrl, BaseModel, Field, FtpUrl, TypeAdapter, ValidationError
import requests
import ruamel.yaml
from semantic_version import Version
Expand Down Expand Up @@ -648,13 +648,36 @@
)


def is_url(s: str) -> bool:
"""Very primitive url detection for now
# This is defined in module level because repeated invocations of
# TypeAdapter creation is expensive
_url_adapter: TypeAdapter[AnyHttpUrl | FtpUrl] = TypeAdapter(AnyHttpUrl | FtpUrl)


TODO: redo
def is_url(s: str) -> bool:
"""
return s.lower().startswith(("http://", "https://", "dandi:", "ftp://"))
# Slashes are not required after "dandi:" so as to support "DANDI:<id>"
Determines whether the input string `s` is a valid URL (standard URL or DANDI URL).
"""

# Importing from within function to avoid possible circular imports
# since this a utility module
from dandi.dandiarchive import parse_dandi_url
from dandi.exceptions import UnknownURLError

try:
_url_adapter.validate_python(s)
except ValidationError:
# `s` is not a standard URL, try parsing it as DANDI URL
try:
parse_dandi_url(s)
except UnknownURLError:
# `s` is neither a standard URL nor a DANDI URL, returning `False`
return False

# `s` is a DANDI URL, returning `True`
return True

# `s` is a standard URL
return True


def get_module_version(module: str | types.ModuleType) -> str | None:
Expand Down
Loading