diff --git a/action.yml b/action.yml index 151c42f..5cbe89c 100644 --- a/action.yml +++ b/action.yml @@ -38,7 +38,7 @@ runs: - name: Install Python uses: actions/setup-python@v5 with: - python-version: '3.x' + python-version: '3.12' - name: Install dependencies shell: bash run: | @@ -52,7 +52,7 @@ runs: "${{inputs.DATAVERSE_TOKEN}}" "${{inputs.DATAVERSE_SERVER}}" "${{inputs.DATAVERSE_DATASET_DOI}}" - $GITHUB_REPOSITORY + . -d "${{inputs.GITHUB_DIR}}" -r "${{inputs.DELETE}}" -p "${{inputs.PUBLISH}}" diff --git a/dataverse.py b/dataverse.py index 7fac891..dabccb3 100644 --- a/dataverse.py +++ b/dataverse.py @@ -57,48 +57,84 @@ def check_dataset_lock(num): dataverse_server = args.server.strip("/") api = NativeApi(dataverse_server , token) + print(f"Connecting to Dataverse server: {dataverse_server}") + print(f"Dataset DOI: {args.doi}") + dataset = api.get_dataset(args.doi) files_list = dataset.json()['data']['latestVersion']['files'] dataset_dbid = dataset.json()['data']['id'] + print(f"Dataset ID: {dataset_dbid}") + print(f"Found {len(files_list)} existing files in dataset") + if args.remove.lower() == 'true': # the following deletes all the files in the dataset + print(f"Deleting {len(files_list)} existing files...") delete_api = dataverse_server + \ '/dvn/api/data-deposit/v1.1/swordv2/edit-media/file/' for f in files_list: fileid = f["dataFile"]["id"] + filename = f["dataFile"].get("filename", "unknown") + print(f" Deleting file: {filename} (ID: {fileid})") resp = requests.delete( delete_api + str(fileid), \ auth = (token , "")) + print(f" Response status: {resp.status_code}") - # check if there is a list of dirs to upload - paths = ['repo'] + # check if there is a list of dirs to upload + paths = [args.repo] if args.dir: dirs = args.dir.strip().replace(",", " ") dirs = dirs.split() - paths = [join('repo', d) for d in dirs] + paths = [join(args.repo, d) for d in dirs] + + print(f"\nPaths to upload: {paths}") # the following adds all files from the repository to Dataverse + upload_count = 0 for path in paths: + print(f"\nScanning path: {path}") + if not walk(path): + print(f" Warning: Path does not exist or is empty") for root, subdirs, files in walk(path): if '.git' in subdirs: subdirs.remove('.git') if '.github' in subdirs: subdirs.remove('.github') + print(f" Directory: {root} (contains {len(files)} files)") for f in files: + upload_count += 1 + full_path = join(root, f) + # Remove the repo prefix from the directory label + repo_prefix_len = len(args.repo) + 1 if args.repo != '.' else 0 + directory_label = root[repo_prefix_len:] if len(root) > repo_prefix_len else "" + print(f" Uploading [{upload_count}]: {full_path}") + print(f" Filename: {f}") + print(f" Directory label: '{directory_label}'") df = Datafile() df.set({ "pid" : args.doi, "filename" : f, - "directoryLabel": root[5:], + "directoryLabel": directory_label, "description" : \ "Uploaded with GitHub Action from {}.".format( args.repo), }) resp = api.upload_datafile( args.doi, join(root,f), df.json()) + print(f" Response status: {resp.status_code}") + if resp.status_code not in [200, 201]: + print(f" ERROR: {resp.text}") check_dataset_lock(5) + print(f"\nTotal files uploaded: {upload_count}") + if args.publish.lower() == 'true': # publish updated dataset + print("\nPublishing dataset...") resp = api.publish_dataset(args.doi, release_type="major") + print(f"Publish response status: {resp.status_code}") + if resp.status_code not in [200, 201]: + print(f"ERROR: {resp.text}") + + print("\nDone!")