From d00d034ef27e440f31e36823ca0bdbdf650f8a50 Mon Sep 17 00:00:00 2001 From: Megan Nguyen Date: Thu, 7 Jun 2018 10:40:46 -0700 Subject: [PATCH 1/2] This function produces a list of award numbers in a package metadata and some of their information #182 --- R/function.R | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 R/function.R diff --git a/R/function.R b/R/function.R new file mode 100644 index 0000000..9ea9eb9 --- /dev/null +++ b/R/function.R @@ -0,0 +1,77 @@ +###Produce a list of award numbers in our metadata and their information + +#####First use download_data_objects from datamgmt +download_data_objects <- function(mn, data_pids, out_paths, n_max = 3) { + stopifnot(methods::is(mn, "MNode")) + stopifnot(is.character(data_pids)) + + for (i in seq_along(out_paths)) { + + if (file.exists(out_paths[i])) { + warning(call. = FALSE, + paste0("The file ", out_paths[i], " already exists. Skipping download.")) + } else { + n_tries <- 0 + dataObj <- "error" + + while (dataObj[1] == "error" & n_tries < n_max) { + dataObj <- tryCatch({ + dataone::getObject(mn, data_pids[i]) + }, error = function(e) {return("error")}) + + n_tries <- n_tries + 1 + } + writeBin(dataObj, out_paths[i]) + } + } + + return(invisible()) +} +###### + +#Function with arguments: mn, metapid +getAwardNumbers <- function(mn, metapid){ + #outputs list of pid, raw_award_string, award_number, title of xml file + + stopifnot(methods::is(mn, "MNode")) + stopifnot(is.character(metapid)) + + #get outpath name from metapid + systema <- getSystemMetadata(mn, metapid) + systema_name <- systema@fileName + outpath <- paste("/home/mnguyen/", systema_name, sep = "") + + #download data from metapid + download_data_objects(mn, c(metapid), c(outpath)) + eml_file <- outpath + + #get xml from metapid + eml <- read_eml(eml_file) #read in eml_file + list <- list() #will be output list + + #filename + filename <- eml_file + list[["filename"]] <- filename + + #pid + list[["metapid"]] <- metapid + + #the raw award string numbers + raw_award_string <- c() + funding <- eml@dataset@project@funding@para + for(i in 1:length(funding)){ + award <- funding@.Data[[i]]@.Data[[1]] + raw_award_string[i] <- as_list(award)[[1]] + } + list[["raw_award_string"]] <- raw_award_string + + #award_number + award_number <- str_extract_all(string = raw_award_string, pattern ="[0-9]+") + list[["award_number"]] <- unlist(award_number) + + #title + title <- eml@dataset@title@.Data[[1]]@.Data + list[["title"]] <- title + + return(list) +} From 0e41cb3a4dc2684c64bab144e65ad79936a48442 Mon Sep 17 00:00:00 2001 From: Megan Nguyen Date: Thu, 7 Jun 2018 10:44:45 -0700 Subject: [PATCH 2/2] Produce a list of award numbers in our metadata and their information #182 --- R/award_script.R | 77 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 R/award_script.R diff --git a/R/award_script.R b/R/award_script.R new file mode 100644 index 0000000..9ea9eb9 --- /dev/null +++ b/R/award_script.R @@ -0,0 +1,77 @@ +###Produce a list of award numbers in our metadata and their information + +#####First use download_data_objects from datamgmt +download_data_objects <- function(mn, data_pids, out_paths, n_max = 3) { + stopifnot(methods::is(mn, "MNode")) + stopifnot(is.character(data_pids)) + + for (i in seq_along(out_paths)) { + + if (file.exists(out_paths[i])) { + warning(call. = FALSE, + paste0("The file ", out_paths[i], " already exists. Skipping download.")) + } else { + n_tries <- 0 + dataObj <- "error" + + while (dataObj[1] == "error" & n_tries < n_max) { + dataObj <- tryCatch({ + dataone::getObject(mn, data_pids[i]) + }, error = function(e) {return("error")}) + + n_tries <- n_tries + 1 + } + writeBin(dataObj, out_paths[i]) + } + } + + return(invisible()) +} +###### + +#Function with arguments: mn, metapid +getAwardNumbers <- function(mn, metapid){ + #outputs list of pid, raw_award_string, award_number, title of xml file + + stopifnot(methods::is(mn, "MNode")) + stopifnot(is.character(metapid)) + + #get outpath name from metapid + systema <- getSystemMetadata(mn, metapid) + systema_name <- systema@fileName + outpath <- paste("/home/mnguyen/", systema_name, sep = "") + + #download data from metapid + download_data_objects(mn, c(metapid), c(outpath)) + eml_file <- outpath + + #get xml from metapid + eml <- read_eml(eml_file) #read in eml_file + list <- list() #will be output list + + #filename + filename <- eml_file + list[["filename"]] <- filename + + #pid + list[["metapid"]] <- metapid + + #the raw award string numbers + raw_award_string <- c() + funding <- eml@dataset@project@funding@para + for(i in 1:length(funding)){ + award <- funding@.Data[[i]]@.Data[[1]] + raw_award_string[i] <- as_list(award)[[1]] + } + list[["raw_award_string"]] <- raw_award_string + + #award_number + award_number <- str_extract_all(string = raw_award_string, pattern ="[0-9]+") + list[["award_number"]] <- unlist(award_number) + + #title + title <- eml@dataset@title@.Data[[1]]@.Data + list[["title"]] <- title + + return(list) +}