From 879a370ca0e8cb4b78c939364c3827678e5d83c5 Mon Sep 17 00:00:00 2001 From: Lars Vilhuber Date: Mon, 26 May 2025 22:42:44 -0400 Subject: [PATCH 1/5] Re #25 correcting incorrect reference --- dataverse.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dataverse.py b/dataverse.py index 7fac891..58450b4 100644 --- a/dataverse.py +++ b/dataverse.py @@ -72,11 +72,11 @@ def check_dataset_lock(num): auth = (token , "")) # check if there is a list of dirs to upload - paths = ['repo'] + paths = [args.repo] if args.dir: dirs = args.dir.strip().replace(",", " ") dirs = dirs.split() - paths = [join('repo', d) for d in dirs] + paths = [join(args.repo, d) for d in dirs] # the following adds all files from the repository to Dataverse for path in paths: From 7c5eb06cb9a36fb6d1483fe4fdb29593832ed5ad Mon Sep 17 00:00:00 2001 From: Lars Vilhuber Date: Mon, 26 May 2025 23:25:35 -0400 Subject: [PATCH 2/5] Making it more verbose --- dataverse.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dataverse.py b/dataverse.py index 58450b4..85826f9 100644 --- a/dataverse.py +++ b/dataverse.py @@ -80,12 +80,16 @@ def check_dataset_lock(num): # the following adds all files from the repository to Dataverse for path in paths: + print(f"Uploading files from {path} to Dataverse...") for root, subdirs, files in walk(path): + print(f"Current directory: {root}") + print(f"Subdirectories: {subdirs}") if '.git' in subdirs: subdirs.remove('.git') if '.github' in subdirs: subdirs.remove('.github') for f in files: + print(f"Uploading {f} to Dataverse...") df = Datafile() df.set({ "pid" : args.doi, From 37760a51995f537fea44415dda63b9fa120bcceb Mon Sep 17 00:00:00 2001 From: Lars Vilhuber Date: Mon, 26 May 2025 23:31:57 -0400 Subject: [PATCH 3/5] Correcting the working directory --- action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/action.yml b/action.yml index 151c42f..4484d81 100644 --- a/action.yml +++ b/action.yml @@ -52,7 +52,7 @@ runs: "${{inputs.DATAVERSE_TOKEN}}" "${{inputs.DATAVERSE_SERVER}}" "${{inputs.DATAVERSE_DATASET_DOI}}" - $GITHUB_REPOSITORY + . -d "${{inputs.GITHUB_DIR}}" -r "${{inputs.DELETE}}" -p "${{inputs.PUBLISH}}" From e193fddaf01d8039b99080818e329935e5d39207 Mon Sep 17 00:00:00 2001 From: Lars Vilhuber Date: Tue, 13 Jan 2026 20:48:15 -0500 Subject: [PATCH 4/5] Different verbosity --- dataverse.py | 44 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 6 deletions(-) diff --git a/dataverse.py b/dataverse.py index 85826f9..dabccb3 100644 --- a/dataverse.py +++ b/dataverse.py @@ -57,52 +57,84 @@ def check_dataset_lock(num): dataverse_server = args.server.strip("/") api = NativeApi(dataverse_server , token) + print(f"Connecting to Dataverse server: {dataverse_server}") + print(f"Dataset DOI: {args.doi}") + dataset = api.get_dataset(args.doi) files_list = dataset.json()['data']['latestVersion']['files'] dataset_dbid = dataset.json()['data']['id'] + print(f"Dataset ID: {dataset_dbid}") + print(f"Found {len(files_list)} existing files in dataset") + if args.remove.lower() == 'true': # the following deletes all the files in the dataset + print(f"Deleting {len(files_list)} existing files...") delete_api = dataverse_server + \ '/dvn/api/data-deposit/v1.1/swordv2/edit-media/file/' for f in files_list: fileid = f["dataFile"]["id"] + filename = f["dataFile"].get("filename", "unknown") + print(f" Deleting file: {filename} (ID: {fileid})") resp = requests.delete( delete_api + str(fileid), \ auth = (token , "")) + print(f" Response status: {resp.status_code}") - # check if there is a list of dirs to upload + # check if there is a list of dirs to upload paths = [args.repo] if args.dir: dirs = args.dir.strip().replace(",", " ") dirs = dirs.split() paths = [join(args.repo, d) for d in dirs] + print(f"\nPaths to upload: {paths}") + # the following adds all files from the repository to Dataverse + upload_count = 0 for path in paths: - print(f"Uploading files from {path} to Dataverse...") + print(f"\nScanning path: {path}") + if not walk(path): + print(f" Warning: Path does not exist or is empty") for root, subdirs, files in walk(path): - print(f"Current directory: {root}") - print(f"Subdirectories: {subdirs}") if '.git' in subdirs: subdirs.remove('.git') if '.github' in subdirs: subdirs.remove('.github') + print(f" Directory: {root} (contains {len(files)} files)") for f in files: - print(f"Uploading {f} to Dataverse...") + upload_count += 1 + full_path = join(root, f) + # Remove the repo prefix from the directory label + repo_prefix_len = len(args.repo) + 1 if args.repo != '.' else 0 + directory_label = root[repo_prefix_len:] if len(root) > repo_prefix_len else "" + print(f" Uploading [{upload_count}]: {full_path}") + print(f" Filename: {f}") + print(f" Directory label: '{directory_label}'") df = Datafile() df.set({ "pid" : args.doi, "filename" : f, - "directoryLabel": root[5:], + "directoryLabel": directory_label, "description" : \ "Uploaded with GitHub Action from {}.".format( args.repo), }) resp = api.upload_datafile( args.doi, join(root,f), df.json()) + print(f" Response status: {resp.status_code}") + if resp.status_code not in [200, 201]: + print(f" ERROR: {resp.text}") check_dataset_lock(5) + print(f"\nTotal files uploaded: {upload_count}") + if args.publish.lower() == 'true': # publish updated dataset + print("\nPublishing dataset...") resp = api.publish_dataset(args.doi, release_type="major") + print(f"Publish response status: {resp.status_code}") + if resp.status_code not in [200, 201]: + print(f"ERROR: {resp.text}") + + print("\nDone!") From c6e1eada484a6d54f1db3fdfeae74f57aba6c322 Mon Sep 17 00:00:00 2001 From: Lars Vilhuber Date: Tue, 13 Jan 2026 20:49:31 -0500 Subject: [PATCH 5/5] Fixing Python version --- action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/action.yml b/action.yml index 4484d81..5cbe89c 100644 --- a/action.yml +++ b/action.yml @@ -38,7 +38,7 @@ runs: - name: Install Python uses: actions/setup-python@v5 with: - python-version: '3.x' + python-version: '3.12' - name: Install dependencies shell: bash run: |