From 5051aa80a970d6acf3cc7d083ce7d95589d4de69 Mon Sep 17 00:00:00 2001 From: Jan Range <30547301+JR-1991@users.noreply.github.com> Date: Wed, 16 Apr 2025 15:37:14 +0200 Subject: [PATCH 1/6] add `tab_ingest` parameter for file adder Also added validators instead of annotation to prevent linter errors where nothing is wrong --- easyDataverse/dataset.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/easyDataverse/dataset.py b/easyDataverse/dataset.py index 3ee904a..fbb770c 100644 --- a/easyDataverse/dataset.py +++ b/easyDataverse/dataset.py @@ -6,7 +6,7 @@ import nob import xmltodict import yaml -from pydantic import BaseModel, ConfigDict, Field, HttpUrl +from pydantic import BaseModel, ConfigDict, Field from dvuploader import File, add_directory @@ -54,7 +54,7 @@ class Dataset(BaseModel): ) API_TOKEN: Optional[str] = Field(None) - DATAVERSE_URL: Optional[HttpUrl] = Field(None) + DATAVERSE_URL: Optional[str] = Field(None) # ! Adders def add_metadatablock(self, metadatablock: DataverseBase) -> None: @@ -85,6 +85,7 @@ def add_file( file_name: Optional[str] = None, categories: List[str] = ["DATA"], description: str = "", + tab_ingest: bool = True, ): """Adds a file to the dataset based on the provided path. @@ -94,6 +95,7 @@ def add_file( file_name (str, optional): Name of the file in Dataverse. Defaults to None, which will use the basename of local_path. categories (List[str], optional): List of categories to assign to the file. Defaults to ["DATA"]. description (str, optional): Description of the file. Defaults to "". + tab_ingest (bool, optional): Whether to use tab-separated ingest. Defaults to True. Raises: FileExistsError: If the file has already been added to the dataset. @@ -105,6 +107,7 @@ def add_file( description=description, categories=categories, file_name=file_name, # type: ignore + tab_ingest=tab_ingest, # type: ignore ) if file not in self.files: From fe4a3fffeb63ea8ed7c7a503bc84165697c383dc Mon Sep 17 00:00:00 2001 From: Jan Range <30547301+JR-1991@users.noreply.github.com> Date: Wed, 16 Apr 2025 15:37:35 +0200 Subject: [PATCH 2/6] use validators instead of annotations --- easyDataverse/dataverse.py | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/easyDataverse/dataverse.py b/easyDataverse/dataverse.py index 0d167c0..4d26c16 100644 --- a/easyDataverse/dataverse.py +++ b/easyDataverse/dataverse.py @@ -1,6 +1,7 @@ import asyncio from copy import deepcopy import json +from uuid import UUID from typing import Callable, Dict, List, Optional, Tuple, IO from urllib import parse @@ -18,6 +19,7 @@ HttpUrl, PrivateAttr, computed_field, + field_validator, ) from pyDataverse.api import DataAccessApi, NativeApi import rich @@ -45,12 +47,12 @@ class Dataverse(BaseModel): model_config = ConfigDict(arbitrary_types_allowed=True) - server_url: HttpUrl = Field( + server_url: str = Field( ..., description="The URL of the Dataverse installation to connect to.", ) - api_token: Optional[UUID4] = Field( + api_token: Optional[str] = Field( default=None, description="The API token to use for authentication. If not provided, only public data can be accessed.", ) @@ -63,6 +65,25 @@ class Dataverse(BaseModel): _dataset_gen: Callable = PrivateAttr() _connected: bool = PrivateAttr(default=False) + @field_validator("server_url") + def validate_url(cls, v): + """Validate the server URL.""" + try: + HttpUrl(v) + return v + except ValueError as e: + raise ValueError("Server URL must be a valid URL") from e + + @field_validator("api_token") + def validate_api_token(cls, v): + """Validate the API token.""" + if v is not None: + try: + UUID(v) + return v + except ValueError as e: + raise ValueError("API token must be a valid UUID") from e + def __init__( self, server_url: HttpUrl, From 0383d843ca493141d8eaea8da24d186bc6be5934 Mon Sep 17 00:00:00 2001 From: Jan Range <30547301+JR-1991@users.noreply.github.com> Date: Wed, 16 Apr 2025 15:37:52 +0200 Subject: [PATCH 3/6] update `dvuploader` and `httpx` --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index bc93f58..517d2b1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,9 +19,9 @@ dotted-dict = "1.1.3" rich = "^13.7.1" nob = "^0.8.2" nest-asyncio = "^1.6.0" -dvuploader = "^0.2.3" +dvuploader = "^0.3.0" email-validator = "^2.1.1" -httpx = "0.28" +httpx = "^0.28" [tool.poetry.group.test.dependencies] pytest-cov = "^5.0.0" From 08d364f4ae521fd57fe12aaa2d76c4c2fe225906 Mon Sep 17 00:00:00 2001 From: Jan Range <30547301+JR-1991@users.noreply.github.com> Date: Wed, 16 Apr 2025 15:39:21 +0200 Subject: [PATCH 4/6] tests creation and upload --- tests/integration/test_dataset_creation.py | 94 ++++++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/tests/integration/test_dataset_creation.py b/tests/integration/test_dataset_creation.py index 398e2a2..026e0bf 100644 --- a/tests/integration/test_dataset_creation.py +++ b/tests/integration/test_dataset_creation.py @@ -1,3 +1,4 @@ +import os import pytest from easyDataverse.dataset import Dataset @@ -40,6 +41,59 @@ def test_creation( assert self.sort_citation(dataset) == minimal_upload + @pytest.mark.integration + def test_creation_and_upload( + self, + credentials, + minimal_upload, + ): + # Arrange + base_url, api_token = credentials + dataverse = Dataverse( + server_url=base_url, + api_token=api_token, + ) + + # Act + dataset = dataverse.create_dataset() + + dataset.citation.title = "My dataset" + dataset.citation.subject = ["Other"] + dataset.citation.add_author(name="John Doe") + dataset.citation.add_ds_description( + value="This is a description of the dataset", + date="2024", + ) + dataset.citation.add_dataset_contact( + name="John Doe", + email="john@doe.com", + ) + + dataset.add_directory( + dirpath="./tests/fixtures", + dv_dir="some/sub/dir", + ) + + pid = dataset.upload(dataverse_name="root") + + # Re-fetch the dataset + dataset = dataverse.load_dataset(pid) + + # Check the metadata + assert self.sort_citation(dataset) == minimal_upload + + # Check the files + expected_file_count = self.count_files_recursively("./tests/fixtures") + assert len(dataset.files) == expected_file_count, ( + f"The number of files should be correct: Got {len(dataset.files)}, expected {expected_file_count}" + ) + + # Check if files have uploaded in the correct directory + for file in dataset.files: + assert "some/sub/dir" in file.directory_label, ( + "File should be in the sub-directory" + ) + @pytest.mark.integration def test_creation_other_license( self, @@ -76,6 +130,39 @@ def test_creation_other_license( assert self.sort_citation(dataset) == minimal_upload_other_license + def test_tab_ingest_disabled( + self, + credentials, + ): + # Arrange + base_url, api_token = credentials + dataverse = Dataverse( + server_url=base_url, + api_token=api_token, + ) + + # Act + dataset = dataverse.create_dataset() + + dataset.citation.title = "My dataset" + dataset.citation.subject = ["Other"] + dataset.citation.add_author(name="John Doe") + dataset.citation.add_ds_description( + value="This is a description of the dataset", + date="2024", + ) + dataset.citation.add_dataset_contact( + name="John Doe", + email="john@doe.com", + ) + + dataset.add_file( + local_path="./tests/fixtures/tabular_file.csv", + tab_ingest=False, + ) + + assert dataset.files[0].tab_ingest is False, "Tab-ingest should be disabled" + @staticmethod def sort_citation(dataset: Dataset): dv_dict = dataset.dataverse_dict() @@ -87,3 +174,10 @@ def sort_citation(dataset: Dataset): ) return dv_dict + + @staticmethod + def count_files_recursively(dirpath: str): + count = 0 + for root, dirs, files in os.walk(dirpath): + count += len(files) + return count From e30474711a362cb8fe28211fad8c0f2ecbd56149 Mon Sep 17 00:00:00 2001 From: Jan Range <30547301+JR-1991@users.noreply.github.com> Date: Wed, 16 Apr 2025 15:39:30 +0200 Subject: [PATCH 5/6] tests validation logic --- tests/unit/test_dataverse.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 tests/unit/test_dataverse.py diff --git a/tests/unit/test_dataverse.py b/tests/unit/test_dataverse.py new file mode 100644 index 0000000..f5f2bb0 --- /dev/null +++ b/tests/unit/test_dataverse.py @@ -0,0 +1,23 @@ +import pytest + +from easyDataverse.dataverse import Dataverse + + +class TestDataverse: + @pytest.mark.unit + def test_invalid_url(self): + """Test that an invalid URL raises a ValueError""" + with pytest.raises(ValueError): + Dataverse( + server_url="not a url", + api_token="9eb39a88-ab0d-415d-80c2-32cbafdb5f6f", + ) + + @pytest.mark.unit + def test_invalid_api_token(self): + """Test that an invalid API token raises a ValueError""" + with pytest.raises(ValueError): + Dataverse( + server_url="http://localhost:8080", + api_token="not a uuid", + ) From 33e258de4216a7dea775bfd862c6955291012941 Mon Sep 17 00:00:00 2001 From: Jan Range <30547301+JR-1991@users.noreply.github.com> Date: Wed, 16 Apr 2025 15:52:28 +0200 Subject: [PATCH 6/6] ditch `3.9` support --- .github/workflows/integration-tests.yaml | 2 +- .github/workflows/unit-tests.yaml | 2 +- pyproject.toml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/integration-tests.yaml b/.github/workflows/integration-tests.yaml index 6da92df..88f7e35 100644 --- a/.github/workflows/integration-tests.yaml +++ b/.github/workflows/integration-tests.yaml @@ -13,7 +13,7 @@ jobs: strategy: max-parallel: 4 matrix: - python-version: ['3.8', '3.9', '3.10', '3.11', '3.12', '3.13'] + python-version: ['3.9', '3.10', '3.11', '3.12', '3.13'] env: PORT: 8080 diff --git a/.github/workflows/unit-tests.yaml b/.github/workflows/unit-tests.yaml index b5de450..c4cdb67 100644 --- a/.github/workflows/unit-tests.yaml +++ b/.github/workflows/unit-tests.yaml @@ -13,7 +13,7 @@ jobs: strategy: max-parallel: 4 matrix: - python-version: ['3.8', '3.9', '3.10', '3.11', '3.12', '3.13'] + python-version: ['3.9', '3.10', '3.11', '3.12', '3.13'] steps: - name: Checkout diff --git a/pyproject.toml b/pyproject.toml index 517d2b1..f8bb9ed 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ readme = "Readme.md" packages = [{ include = "easyDataverse" }] [tool.poetry.dependencies] -python = "^3.8" +python = "^3.9" pydantic = "^2.7.1" pydataverse = "^0.3.1" pyaml = "^24.4.0"