Skip to content

Commit 57520c3

Browse files
authored
feat(cli): support rsync-style syntax using / in dataset add (#3362)
1 parent 818a89d commit 57520c3

File tree

3 files changed

+37
-4
lines changed

3 files changed

+37
-4
lines changed

renku/core/dataset/providers/local.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,13 +130,17 @@ def add(
130130
elif copy:
131131
prompt_action = False
132132

133+
ends_with_slash = False
133134
u = urllib.parse.urlparse(uri)
134135
path = u.path
135136

136137
action = DatasetAddAction.SYMLINK if external else default_action
137138
source_root = Path(get_absolute_path(path))
138139
warnings: List[str] = []
139140

141+
if source_root.is_dir() and uri.endswith("/"):
142+
ends_with_slash = True
143+
140144
def check_recursive_addition(src: Path):
141145
if is_subpath(destination, src):
142146
raise errors.ParameterError(f"Cannot recursively add path containing dataset's data directory: {path}")
@@ -155,7 +159,12 @@ def get_destination_root():
155159
if source_root.is_dir() and destination_exists and not destination_is_dir:
156160
raise errors.ParameterError(f"Cannot copy directory '{path}' to non-directory '{destination}'")
157161

158-
return destination / source_root.name if destination_exists and destination_is_dir else destination
162+
if destination_exists and destination_is_dir:
163+
if ends_with_slash:
164+
return destination
165+
166+
return destination / source_root.name
167+
return destination
159168

160169
def get_metadata(src: Path) -> DatasetAddMetadata:
161170
is_tracked = repository.contains(src)

renku/ui/cli/dataset.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,14 @@
137137
This will copy the contents of ``data-url`` to the dataset and add it
138138
to the dataset metadata.
139139
140+
.. note::
141+
142+
If the URL refers to a local directory, data is added differently depending
143+
on if there is a trailing slash (``/``) or not. If the URL ends in a slash,
144+
files inside the directory are added to the target directory. If it does
145+
not end in a slash, then the directory itself will be added inside the
146+
target directory.
147+
140148
You can create a dataset when you add data to it for the first time by passing
141149
``--create`` flag to add command:
142150

tests/core/commands/test_dataset.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
from renku.core.util.contexts import chdir
4343
from renku.core.util.git import get_git_user
4444
from renku.core.util.urls import get_slug
45-
from renku.domain_model.dataset import Dataset, is_dataset_name_valid
45+
from renku.domain_model.dataset import Dataset, Url, is_dataset_name_valid
4646
from renku.domain_model.project_context import project_context
4747
from renku.domain_model.provenance.agent import Person
4848
from renku.infrastructure.gateway.dataset_gateway import DatasetGateway
@@ -88,6 +88,22 @@ def test_data_add(scheme, path, overwrite, error, project_with_injection, direct
8888
assert os.path.exists(target_path)
8989

9090

91+
@pytest.mark.parametrize(
92+
"slash, target",
93+
[
94+
(False, "data/dataset/dir1/file2"),
95+
(True, "data/dataset/file2"),
96+
],
97+
)
98+
def test_data_add_trailing_slash(slash, target, directory_tree, project_with_injection):
99+
"""Test recursive data imports."""
100+
101+
dataset = add_to_dataset("dataset", [str(directory_tree / "dir1") + ("/" if slash else "")], create=True)
102+
103+
file = next(f for f in dataset.files if f.entity.path.endswith("file2"))
104+
assert file.entity.path == target
105+
106+
91107
def test_data_add_recursive(directory_tree, project_with_injection):
92108
"""Test recursive data imports."""
93109
dataset = add_to_dataset("dataset", [str(directory_tree / "dir1")], create=True)
@@ -178,7 +194,7 @@ def test_mutate(project):
178194
name="my-dataset",
179195
creators=[Person.from_string("John Doe <john.doe@mail.com>")],
180196
date_published=datetime.datetime.now(datetime.timezone.utc),
181-
same_as="http://some-url",
197+
same_as=Url(url_str="http://some-url"),
182198
)
183199
old_dataset = copy.deepcopy(dataset)
184200

@@ -197,7 +213,7 @@ def test_mutator_is_added_once(project):
197213
name="my-dataset",
198214
creators=[mutator],
199215
date_published=datetime.datetime.now(datetime.timezone.utc),
200-
same_as="http://some-url",
216+
same_as=Url(url_str="http://some-url"),
201217
)
202218
old_dataset = copy.deepcopy(dataset)
203219

0 commit comments

Comments
 (0)