Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/integration-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
strategy:
max-parallel: 4
matrix:
python-version: ['3.8', '3.9', '3.10', '3.11', '3.12', '3.13']
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']

env:
PORT: 8080
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/unit-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
strategy:
max-parallel: 4
matrix:
python-version: ['3.8', '3.9', '3.10', '3.11', '3.12', '3.13']
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']

steps:
- name: Checkout
Expand Down
7 changes: 5 additions & 2 deletions easyDataverse/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import nob
import xmltodict
import yaml
from pydantic import BaseModel, ConfigDict, Field, HttpUrl
from pydantic import BaseModel, ConfigDict, Field

from dvuploader import File, add_directory

Expand Down Expand Up @@ -54,7 +54,7 @@ class Dataset(BaseModel):
)

API_TOKEN: Optional[str] = Field(None)
DATAVERSE_URL: Optional[HttpUrl] = Field(None)
DATAVERSE_URL: Optional[str] = Field(None)

# ! Adders
def add_metadatablock(self, metadatablock: DataverseBase) -> None:
Expand Down Expand Up @@ -85,6 +85,7 @@ def add_file(
file_name: Optional[str] = None,
categories: List[str] = ["DATA"],
description: str = "",
tab_ingest: bool = True,
):
"""Adds a file to the dataset based on the provided path.

Expand All @@ -94,6 +95,7 @@ def add_file(
file_name (str, optional): Name of the file in Dataverse. Defaults to None, which will use the basename of local_path.
categories (List[str], optional): List of categories to assign to the file. Defaults to ["DATA"].
description (str, optional): Description of the file. Defaults to "".
tab_ingest (bool, optional): Whether to use tab-separated ingest. Defaults to True.

Raises:
FileExistsError: If the file has already been added to the dataset.
Expand All @@ -105,6 +107,7 @@ def add_file(
description=description,
categories=categories,
file_name=file_name, # type: ignore
tab_ingest=tab_ingest, # type: ignore
)

if file not in self.files:
Expand Down
25 changes: 23 additions & 2 deletions easyDataverse/dataverse.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import asyncio
from copy import deepcopy
import json
from uuid import UUID
from typing import Callable, Dict, List, Optional, Tuple, IO
from urllib import parse

Expand All @@ -18,6 +19,7 @@
HttpUrl,
PrivateAttr,
computed_field,
field_validator,
)
from pyDataverse.api import DataAccessApi, NativeApi
import rich
Expand Down Expand Up @@ -45,12 +47,12 @@ class Dataverse(BaseModel):

model_config = ConfigDict(arbitrary_types_allowed=True)

server_url: HttpUrl = Field(
server_url: str = Field(
...,
description="The URL of the Dataverse installation to connect to.",
)

api_token: Optional[UUID4] = Field(
api_token: Optional[str] = Field(
default=None,
description="The API token to use for authentication. If not provided, only public data can be accessed.",
)
Expand All @@ -63,6 +65,25 @@ class Dataverse(BaseModel):
_dataset_gen: Callable = PrivateAttr()
_connected: bool = PrivateAttr(default=False)

@field_validator("server_url")
def validate_url(cls, v):
"""Validate the server URL."""
try:
HttpUrl(v)
return v
except ValueError as e:
raise ValueError("Server URL must be a valid URL") from e

@field_validator("api_token")
def validate_api_token(cls, v):
"""Validate the API token."""
if v is not None:
try:
UUID(v)
return v
except ValueError as e:
raise ValueError("API token must be a valid UUID") from e

def __init__(
self,
server_url: HttpUrl,
Expand Down
6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ readme = "Readme.md"
packages = [{ include = "easyDataverse" }]

[tool.poetry.dependencies]
python = "^3.8"
python = "^3.9"
pydantic = "^2.7.1"
pydataverse = "^0.3.1"
pyaml = "^24.4.0"
Expand All @@ -19,9 +19,9 @@ dotted-dict = "1.1.3"
rich = "^13.7.1"
nob = "^0.8.2"
nest-asyncio = "^1.6.0"
dvuploader = "^0.2.3"
dvuploader = "^0.3.0"
email-validator = "^2.1.1"
httpx = "0.28"
httpx = "^0.28"

[tool.poetry.group.test.dependencies]
pytest-cov = "^5.0.0"
Expand Down
94 changes: 94 additions & 0 deletions tests/integration/test_dataset_creation.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import pytest
from easyDataverse.dataset import Dataset

Expand Down Expand Up @@ -40,6 +41,59 @@ def test_creation(

assert self.sort_citation(dataset) == minimal_upload

@pytest.mark.integration
def test_creation_and_upload(
self,
credentials,
minimal_upload,
):
# Arrange
base_url, api_token = credentials
dataverse = Dataverse(
server_url=base_url,
api_token=api_token,
)

# Act
dataset = dataverse.create_dataset()

dataset.citation.title = "My dataset"
dataset.citation.subject = ["Other"]
dataset.citation.add_author(name="John Doe")
dataset.citation.add_ds_description(
value="This is a description of the dataset",
date="2024",
)
dataset.citation.add_dataset_contact(
name="John Doe",
email="john@doe.com",
)

dataset.add_directory(
dirpath="./tests/fixtures",
dv_dir="some/sub/dir",
)

pid = dataset.upload(dataverse_name="root")

# Re-fetch the dataset
dataset = dataverse.load_dataset(pid)

# Check the metadata
assert self.sort_citation(dataset) == minimal_upload

# Check the files
expected_file_count = self.count_files_recursively("./tests/fixtures")
assert len(dataset.files) == expected_file_count, (
f"The number of files should be correct: Got {len(dataset.files)}, expected {expected_file_count}"
)

# Check if files have uploaded in the correct directory
for file in dataset.files:
assert "some/sub/dir" in file.directory_label, (
"File should be in the sub-directory"
)

@pytest.mark.integration
def test_creation_other_license(
self,
Expand Down Expand Up @@ -76,6 +130,39 @@ def test_creation_other_license(

assert self.sort_citation(dataset) == minimal_upload_other_license

def test_tab_ingest_disabled(
self,
credentials,
):
# Arrange
base_url, api_token = credentials
dataverse = Dataverse(
server_url=base_url,
api_token=api_token,
)

# Act
dataset = dataverse.create_dataset()

dataset.citation.title = "My dataset"
dataset.citation.subject = ["Other"]
dataset.citation.add_author(name="John Doe")
dataset.citation.add_ds_description(
value="This is a description of the dataset",
date="2024",
)
dataset.citation.add_dataset_contact(
name="John Doe",
email="john@doe.com",
)

dataset.add_file(
local_path="./tests/fixtures/tabular_file.csv",
tab_ingest=False,
)

assert dataset.files[0].tab_ingest is False, "Tab-ingest should be disabled"

@staticmethod
def sort_citation(dataset: Dataset):
dv_dict = dataset.dataverse_dict()
Expand All @@ -87,3 +174,10 @@ def sort_citation(dataset: Dataset):
)

return dv_dict

@staticmethod
def count_files_recursively(dirpath: str):
count = 0
for root, dirs, files in os.walk(dirpath):
count += len(files)
return count
23 changes: 23 additions & 0 deletions tests/unit/test_dataverse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import pytest

from easyDataverse.dataverse import Dataverse


class TestDataverse:
@pytest.mark.unit
def test_invalid_url(self):
"""Test that an invalid URL raises a ValueError"""
with pytest.raises(ValueError):
Dataverse(
server_url="not a url",
api_token="9eb39a88-ab0d-415d-80c2-32cbafdb5f6f",
)

@pytest.mark.unit
def test_invalid_api_token(self):
"""Test that an invalid API token raises a ValueError"""
with pytest.raises(ValueError):
Dataverse(
server_url="http://localhost:8080",
api_token="not a uuid",
)