From 9b62b88877ec0445efd60f75fb1e7c7cf36f87ab Mon Sep 17 00:00:00 2001 From: Nikola Rasulic Date: Thu, 17 Apr 2025 08:14:29 +0000 Subject: [PATCH 1/2] various bugfixes --- solvebio/cli/data.py | 11 +++++++++-- solvebio/global_search.py | 2 +- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/solvebio/cli/data.py b/solvebio/cli/data.py index d46850f6..3d2ad692 100644 --- a/solvebio/cli/data.py +++ b/solvebio/cli/data.py @@ -197,7 +197,8 @@ def _upload_folder( for folder in all_folder_parts: print("{}Creating folder {}".format( "[Dry Run] " if dry_run else "", folder)) - Object.create_folder(vault, folder) + if not dry_run: + Object.create_folder(vault, folder) # Create files in parallel # Signal handling allows for graceful exit upon KeyboardInterrupt @@ -752,9 +753,11 @@ def _download_recursive( min_depth = min([x.depth for x in remote_objects]) num_at_min_depth = len([x for x in remote_objects if x.depth == min_depth]) - if num_at_min_depth == 1: + if num_at_min_depth == 1 and not _is_single_file(remote_objects): + # when downloading from folder base_folder_depth = min_depth else: + # when downloading from vault root or singular file base_folder_depth = min_depth - 1 downloaded_files = set() @@ -995,6 +998,10 @@ def _ls(full_path, recursive=False, follow_shortcuts=False): return files +def _is_single_file(objects): + return len(objects) == 1 and objects[0].get("object_type") == "file" + + def should_tag_by_object_type(args, object_): """Returns True if object matches object type requirements""" valid = True diff --git a/solvebio/global_search.py b/solvebio/global_search.py index ed79b51e..35299849 100644 --- a/solvebio/global_search.py +++ b/solvebio/global_search.py @@ -202,7 +202,7 @@ def subjects(self): # Executes a query to get a full API response which contains subjects list gs = self.limit(0) - gs.execute(include_subjects=True) + gs.execute(include_subjects=True, include_all_subjects=True) return gs._response.get('subjects') From bea854e6ebe49912834c7ccb96f4bdfab650e230 Mon Sep 17 00:00:00 2001 From: Nikola Rasulic Date: Thu, 24 Apr 2025 14:03:33 +0000 Subject: [PATCH 2/2] check for file/folder before uploading: --- solvebio/cli/data.py | 67 ++++++++++++++++++++++++++++++------- solvebio/resource/object.py | 1 + 2 files changed, 55 insertions(+), 13 deletions(-) diff --git a/solvebio/cli/data.py b/solvebio/cli/data.py index 3d2ad692..b729eca5 100644 --- a/solvebio/cli/data.py +++ b/solvebio/cli/data.py @@ -195,9 +195,16 @@ def _upload_folder( # be populated all_folder_parts = sorted(all_folder_parts, key=lambda x: len(x.split("/"))) for folder in all_folder_parts: - print("{}Creating folder {}".format( - "[Dry Run] " if dry_run else "", folder)) - if not dry_run: + if dry_run: + try: + f = Object.get_by_full_path(folder) + if not f.is_folder: + print("[Dry Run] {} is not a folder - this will cause an error on upload.".format(folder)) + else: + print("[Dry Run] Folder {} already exists - skipping creation".format(folder)) + except NotFoundError: + print("[Dry Run] Creating folder {}".format(folder)) + else: Object.create_folder(vault, folder) # Create files in parallel @@ -235,18 +242,32 @@ def _create_file_job(args): try: local_file_path, remote_folder_full_path, vault_path, dry_run, archive_folder, client_auth, follow_shortcuts \ = args - if dry_run: - print("[Dry Run] Uploading {} to {}".format( - local_file_path, remote_folder_full_path)) - return + # Provides the global host, token, token_type client = SolveClient(*client_auth) - remote_parent = Object.get_by_full_path( - remote_folder_full_path, - assert_type="folder", - follow_shortcuts=follow_shortcuts, - client=client - ) + + remote_parent = None + try: + remote_parent = Object.get_by_full_path( + remote_folder_full_path, + assert_type="folder", + follow_shortcuts=follow_shortcuts, + client=client + ) + except NotFoundError as e: + if not dry_run: + raise e + + if dry_run: + if not _object_exists(remote_parent, local_file_path, client): + print("[Dry Run] Uploading {} to {}".format( + local_file_path, remote_folder_full_path)) + return + else: + print("[Dry Run] File {} already exists at {} - skipping upload".format( + local_file_path, remote_folder_full_path)) + return + Object.upload_file( local_file_path, remote_parent.path, @@ -261,6 +282,26 @@ def _create_file_job(args): except Exception as e: return e +def _object_exists(remote_parent, local_path, _client): + if remote_parent is None: + return False + full_path, path_dict = Object.validate_full_path( + os.path.join('{}:{}'.format(remote_parent.vault.full_path, remote_parent.path), + os.path.basename(local_path)), client=_client) + try: + obj = Object.get_by_full_path(full_path, client=_client) + if not obj.is_file: + return False + else: + # Check if the md5sum matches + local_md5 = md5sum(local_path)[0] + remote_md5 = obj.get("md5") + if remote_md5 and remote_md5 == local_md5: + return True + else: + return False + except NotFoundError: + return False def _create_template_from_file(template_file, dry_run=False): mode = "r" diff --git a/solvebio/resource/object.py b/solvebio/resource/object.py index 8e69bc93..3b75f0b8 100644 --- a/solvebio/resource/object.py +++ b/solvebio/resource/object.py @@ -370,6 +370,7 @@ def create_folder(cls, vault, full_path, tags=None, **kwargs): parent_object_id = parent.id # Make the API call + print("Creating folder {}".format(full_path)) new_obj = Object.create( vault_id=vault.id, parent_object_id=parent_object_id,