Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ runs:
- name: Install Python
uses: actions/setup-python@v5
with:
python-version: '3.x'
python-version: '3.12'
- name: Install dependencies
shell: bash
run: |
Expand All @@ -52,7 +52,7 @@ runs:
"${{inputs.DATAVERSE_TOKEN}}"
"${{inputs.DATAVERSE_SERVER}}"
"${{inputs.DATAVERSE_DATASET_DOI}}"
$GITHUB_REPOSITORY
.
-d "${{inputs.GITHUB_DIR}}"
-r "${{inputs.DELETE}}"
-p "${{inputs.PUBLISH}}"
44 changes: 40 additions & 4 deletions dataverse.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,48 +57,84 @@ def check_dataset_lock(num):
dataverse_server = args.server.strip("/")
api = NativeApi(dataverse_server , token)

print(f"Connecting to Dataverse server: {dataverse_server}")
print(f"Dataset DOI: {args.doi}")

dataset = api.get_dataset(args.doi)
files_list = dataset.json()['data']['latestVersion']['files']
dataset_dbid = dataset.json()['data']['id']

print(f"Dataset ID: {dataset_dbid}")
print(f"Found {len(files_list)} existing files in dataset")

if args.remove.lower() == 'true':
# the following deletes all the files in the dataset
print(f"Deleting {len(files_list)} existing files...")
delete_api = dataverse_server + \
'/dvn/api/data-deposit/v1.1/swordv2/edit-media/file/'
for f in files_list:
fileid = f["dataFile"]["id"]
filename = f["dataFile"].get("filename", "unknown")
print(f" Deleting file: {filename} (ID: {fileid})")
resp = requests.delete(
delete_api + str(fileid), \
auth = (token , ""))
print(f" Response status: {resp.status_code}")

# check if there is a list of dirs to upload
paths = ['repo']
# check if there is a list of dirs to upload
paths = [args.repo]
if args.dir:
dirs = args.dir.strip().replace(",", " ")
dirs = dirs.split()
paths = [join('repo', d) for d in dirs]
paths = [join(args.repo, d) for d in dirs]

print(f"\nPaths to upload: {paths}")

# the following adds all files from the repository to Dataverse
upload_count = 0
for path in paths:
print(f"\nScanning path: {path}")
if not walk(path):
print(f" Warning: Path does not exist or is empty")
for root, subdirs, files in walk(path):
if '.git' in subdirs:
subdirs.remove('.git')
if '.github' in subdirs:
subdirs.remove('.github')
print(f" Directory: {root} (contains {len(files)} files)")
for f in files:
upload_count += 1
full_path = join(root, f)
# Remove the repo prefix from the directory label
repo_prefix_len = len(args.repo) + 1 if args.repo != '.' else 0
directory_label = root[repo_prefix_len:] if len(root) > repo_prefix_len else ""
print(f" Uploading [{upload_count}]: {full_path}")
print(f" Filename: {f}")
print(f" Directory label: '{directory_label}'")
df = Datafile()
df.set({
"pid" : args.doi,
"filename" : f,
"directoryLabel": root[5:],
"directoryLabel": directory_label,
"description" : \
"Uploaded with GitHub Action from {}.".format(
args.repo),
})
resp = api.upload_datafile(
args.doi, join(root,f), df.json())
print(f" Response status: {resp.status_code}")
if resp.status_code not in [200, 201]:
print(f" ERROR: {resp.text}")
check_dataset_lock(5)

print(f"\nTotal files uploaded: {upload_count}")

if args.publish.lower() == 'true':
# publish updated dataset
print("\nPublishing dataset...")
resp = api.publish_dataset(args.doi, release_type="major")
print(f"Publish response status: {resp.status_code}")
if resp.status_code not in [200, 201]:
print(f"ERROR: {resp.text}")

print("\nDone!")