Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 6 additions & 29 deletions solvebio/cli/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from solvebio import DatasetImport
from solvebio import DatasetTemplate
from solvebio import GlobalSearch
from solvebio.utils.files import check_gzip_path
from solvebio.utils.files import check_gzip_path, edp_path_join, edp_path
from solvebio.utils.md5sum import md5sum
from solvebio.errors import SolveError
from solvebio.errors import NotFoundError
Expand Down Expand Up @@ -85,37 +85,14 @@ def _check_uploaded_folders(base_remote_path, local_start, all_folders, follow_s
not already exist.

"""

def _folder_exists(folder_full_path, remote_folders_existing, follow_shortcuts):
if follow_shortcuts:
# When following shortcuts we cannot determine which folders exist based on global search.
# Each folder has to be checked separately.
try:
Object.get_by_full_path(full_path=folder_full_path, follow_shortcuts=True)
except NotFoundError:
return False
return True
else:
return folder_full_path in remote_folders_existing

upload_root_path, _ = Object.validate_full_path(
os.path.join(base_remote_path, local_start)
edp_path_join(base_remote_path, local_start)
)
results = GlobalSearch().filter(path__prefix=upload_root_path, type="folder")
remote_folders_existing = set([x.full_path for x in results])
all_folder_parts = set()
for vault, remote_folder_path in all_folders:
subfolders = remote_folder_path.lstrip("/").split("/")
parent_folder_path = subfolders[0]
# Split each folder into parts and check if these exist
# Skip root folder as we don't need to create this
for folder in subfolders[1:]:
folder_full_path = os.path.join(parent_folder_path, folder)
if not _folder_exists(folder_full_path, remote_folders_existing, follow_shortcuts):
all_folder_parts.add(folder_full_path)
parent_folder_path = folder_full_path

return all_folder_parts
remote_folders_existing = {edp_path(x.full_path) for x in results}
all_folders = {edp_path(f) for vault, f in all_folders}

return {f for f in all_folders if f not in remote_folders_existing}


def _upload_folder(
Expand Down
19 changes: 19 additions & 0 deletions solvebio/utils/files.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import mimetypes
import os
import pathlib

COMPRESSIONS = ('.gz', '.gzip', '.bz2', '.z', '.zip', '.bgz')

Expand Down Expand Up @@ -43,3 +44,21 @@ def get_home_dir():
except:
from os.path import expanduser
return expanduser("~")


def edp_path_join(*edp_paths):
p = str(pathlib.PurePosixPath(*edp_paths))

if not p.endswith("/"):
p += "/"

return p


def edp_path(path: str):
"""normalize OS path to EDP remote path"""

win_path = pathlib.PureWindowsPath(path)
posix_path = pathlib.PurePosixPath('/', *win_path.parts)
return posix_path.as_posix().removeprefix("/")

Loading