-
Notifications
You must be signed in to change notification settings - Fork 14
Award Numbers function #9
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,77 @@ | ||
| ###Produce a list of award numbers in our metadata and their information | ||
|
|
||
| #####First use download_data_objects from datamgmt | ||
| download_data_objects <- function(mn, data_pids, out_paths, n_max = 3) { | ||
| stopifnot(methods::is(mn, "MNode")) | ||
| stopifnot(is.character(data_pids)) | ||
|
|
||
| for (i in seq_along(out_paths)) { | ||
|
|
||
| if (file.exists(out_paths[i])) { | ||
| warning(call. = FALSE, | ||
| paste0("The file ", out_paths[i], " already exists. Skipping download.")) | ||
| } else { | ||
| n_tries <- 0 | ||
| dataObj <- "error" | ||
|
|
||
| while (dataObj[1] == "error" & n_tries < n_max) { | ||
| dataObj <- tryCatch({ | ||
| dataone::getObject(mn, data_pids[i]) | ||
| }, error = function(e) {return("error")}) | ||
|
|
||
| n_tries <- n_tries + 1 | ||
| } | ||
| writeBin(dataObj, out_paths[i]) | ||
| } | ||
| } | ||
|
|
||
| return(invisible()) | ||
| } | ||
| ###### | ||
|
|
||
| #Function with arguments: mn, metapid | ||
| getAwardNumbers <- function(mn, metapid){ | ||
| #outputs list of pid, raw_award_string, award_number, title of xml file | ||
|
|
||
| stopifnot(methods::is(mn, "MNode")) | ||
| stopifnot(is.character(metapid)) | ||
|
|
||
| #get outpath name from metapid | ||
| systema <- getSystemMetadata(mn, metapid) | ||
| systema_name <- systema@fileName | ||
| outpath <- paste("/home/mnguyen/", systema_name, sep = "") | ||
|
|
||
| #download data from metapid | ||
| download_data_objects(mn, c(metapid), c(outpath)) | ||
| eml_file <- outpath | ||
|
|
||
| #get xml from metapid | ||
| eml <- read_eml(eml_file) #read in eml_file | ||
| list <- list() #will be output list | ||
|
|
||
| #filename | ||
| filename <- eml_file | ||
| list[["filename"]] <- filename | ||
|
|
||
| #pid | ||
| list[["metapid"]] <- metapid | ||
|
|
||
| #the raw award string numbers | ||
| raw_award_string <- c() | ||
| funding <- eml@dataset@project@funding@para | ||
| for(i in 1:length(funding)){ | ||
| award <- funding@.Data[[i]]@.Data[[1]] | ||
| raw_award_string[i] <- as_list(award)[[1]] | ||
| } | ||
| list[["raw_award_string"]] <- raw_award_string | ||
|
|
||
| #award_number | ||
| award_number <- str_extract_all(string = raw_award_string, pattern ="[0-9]+") | ||
| list[["award_number"]] <- unlist(award_number) | ||
|
|
||
| #title | ||
| title <- eml@dataset@title@.Data[[1]]@.Data | ||
| list[["title"]] <- title | ||
|
|
||
| return(list) | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,77 @@ | ||
| ###Produce a list of award numbers in our metadata and their information | ||
|
|
||
| #####First use download_data_objects from datamgmt | ||
| download_data_objects <- function(mn, data_pids, out_paths, n_max = 3) { | ||
| stopifnot(methods::is(mn, "MNode")) | ||
| stopifnot(is.character(data_pids)) | ||
|
|
||
| for (i in seq_along(out_paths)) { | ||
|
|
||
| if (file.exists(out_paths[i])) { | ||
| warning(call. = FALSE, | ||
| paste0("The file ", out_paths[i], " already exists. Skipping download.")) | ||
| } else { | ||
| n_tries <- 0 | ||
| dataObj <- "error" | ||
|
|
||
| while (dataObj[1] == "error" & n_tries < n_max) { | ||
| dataObj <- tryCatch({ | ||
| dataone::getObject(mn, data_pids[i]) | ||
| }, error = function(e) {return("error")}) | ||
|
|
||
| n_tries <- n_tries + 1 | ||
| } | ||
| writeBin(dataObj, out_paths[i]) | ||
| } | ||
| } | ||
|
|
||
| return(invisible()) | ||
| } | ||
| ###### | ||
|
|
||
| #Function with arguments: mn, metapid | ||
| getAwardNumbers <- function(mn, metapid){ | ||
| #outputs list of pid, raw_award_string, award_number, title of xml file | ||
|
|
||
| stopifnot(methods::is(mn, "MNode")) | ||
| stopifnot(is.character(metapid)) | ||
|
|
||
| #get outpath name from metapid | ||
| systema <- getSystemMetadata(mn, metapid) | ||
| systema_name <- systema@fileName | ||
| outpath <- paste("/home/mnguyen/", systema_name, sep = "") | ||
|
|
||
| #download data from metapid | ||
| download_data_objects(mn, c(metapid), c(outpath)) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is pretty useful: You can download dataone objects directly like this So you won't need to use |
||
| eml_file <- outpath | ||
|
|
||
| #get xml from metapid | ||
| eml <- read_eml(eml_file) #read in eml_file | ||
| list <- list() #will be output list | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You generally don't want to name your objects using already existing names. Since list is already the name of a function you want to rename your list to something like "output" |
||
|
|
||
| #filename | ||
| filename <- eml_file | ||
| list[["filename"]] <- filename | ||
|
|
||
| #pid | ||
| list[["metapid"]] <- metapid | ||
|
|
||
| #the raw award string numbers | ||
| raw_award_string <- c() | ||
| funding <- eml@dataset@project@funding@para | ||
| for(i in 1:length(funding)){ | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if you change this to |
||
| award <- funding@.Data[[i]]@.Data[[1]] | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You might be able to replace some of these with eml_get function. Look up the documentation with ?eml_get and see if you can use it to simplify some of the code |
||
| raw_award_string[i] <- as_list(award)[[1]] | ||
| } | ||
| list[["raw_award_string"]] <- raw_award_string | ||
|
|
||
| #award_number | ||
| award_number <- str_extract_all(string = raw_award_string, pattern ="[0-9]+") | ||
| list[["award_number"]] <- unlist(award_number) | ||
|
|
||
| #title | ||
| title <- eml@dataset@title@.Data[[1]]@.Data | ||
| list[["title"]] <- title | ||
|
|
||
| return(list) | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this is a path to my user... not sure how else to create an outpath that could apply to any user