Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 77 additions & 0 deletions R/award_script.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
###Produce a list of award numbers in our metadata and their information

#####First use download_data_objects from datamgmt
download_data_objects <- function(mn, data_pids, out_paths, n_max = 3) {
stopifnot(methods::is(mn, "MNode"))
stopifnot(is.character(data_pids))

for (i in seq_along(out_paths)) {

if (file.exists(out_paths[i])) {
warning(call. = FALSE,
paste0("The file ", out_paths[i], " already exists. Skipping download."))
} else {
n_tries <- 0
dataObj <- "error"

while (dataObj[1] == "error" & n_tries < n_max) {
dataObj <- tryCatch({
dataone::getObject(mn, data_pids[i])
}, error = function(e) {return("error")})

n_tries <- n_tries + 1
}
writeBin(dataObj, out_paths[i])
}
}

return(invisible())
}
######

#Function with arguments: mn, metapid
getAwardNumbers <- function(mn, metapid){
#outputs list of pid, raw_award_string, award_number, title of xml file

stopifnot(methods::is(mn, "MNode"))
stopifnot(is.character(metapid))

#get outpath name from metapid
systema <- getSystemMetadata(mn, metapid)
systema_name <- systema@fileName
outpath <- paste("/home/mnguyen/", systema_name, sep = "")

#download data from metapid
download_data_objects(mn, c(metapid), c(outpath))
eml_file <- outpath

#get xml from metapid
eml <- read_eml(eml_file) #read in eml_file
list <- list() #will be output list

#filename
filename <- eml_file
list[["filename"]] <- filename

#pid
list[["metapid"]] <- metapid

#the raw award string numbers
raw_award_string <- c()
funding <- eml@dataset@project@funding@para
for(i in 1:length(funding)){
award <- funding@.Data[[i]]@.Data[[1]]
raw_award_string[i] <- as_list(award)[[1]]
}
list[["raw_award_string"]] <- raw_award_string

#award_number
award_number <- str_extract_all(string = raw_award_string, pattern ="[0-9]+")
list[["award_number"]] <- unlist(award_number)

#title
title <- eml@dataset@title@.Data[[1]]@.Data
list[["title"]] <- title

return(list)
}
77 changes: 77 additions & 0 deletions R/function.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
###Produce a list of award numbers in our metadata and their information

#####First use download_data_objects from datamgmt
download_data_objects <- function(mn, data_pids, out_paths, n_max = 3) {
stopifnot(methods::is(mn, "MNode"))
stopifnot(is.character(data_pids))

for (i in seq_along(out_paths)) {

if (file.exists(out_paths[i])) {
warning(call. = FALSE,
paste0("The file ", out_paths[i], " already exists. Skipping download."))
} else {
n_tries <- 0
dataObj <- "error"

while (dataObj[1] == "error" & n_tries < n_max) {
dataObj <- tryCatch({
dataone::getObject(mn, data_pids[i])
}, error = function(e) {return("error")})

n_tries <- n_tries + 1
}
writeBin(dataObj, out_paths[i])
}
}

return(invisible())
}
######

#Function with arguments: mn, metapid
getAwardNumbers <- function(mn, metapid){
#outputs list of pid, raw_award_string, award_number, title of xml file

stopifnot(methods::is(mn, "MNode"))
stopifnot(is.character(metapid))

#get outpath name from metapid
systema <- getSystemMetadata(mn, metapid)
systema_name <- systema@fileName
outpath <- paste("/home/mnguyen/", systema_name, sep = "")
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is a path to my user... not sure how else to create an outpath that could apply to any user


#download data from metapid
download_data_objects(mn, c(metapid), c(outpath))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is pretty useful: You can download dataone objects directly like this eml <- read_eml(rawToChar(dataone::getObject(mn, metapid)))

So you won't need to use download_data_objects

eml_file <- outpath

#get xml from metapid
eml <- read_eml(eml_file) #read in eml_file
list <- list() #will be output list
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You generally don't want to name your objects using already existing names. Since list is already the name of a function you want to rename your list to something like "output"


#filename
filename <- eml_file
list[["filename"]] <- filename

#pid
list[["metapid"]] <- metapid

#the raw award string numbers
raw_award_string <- c()
funding <- eml@dataset@project@funding@para
for(i in 1:length(funding)){
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if you change this to seq_along(funding) then it handles the case where length = 0. Rather than the case of for in 1:0 which gives you an error

award <- funding@.Data[[i]]@.Data[[1]]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You might be able to replace some of these with eml_get function. Look up the documentation with ?eml_get and see if you can use it to simplify some of the code

raw_award_string[i] <- as_list(award)[[1]]
}
list[["raw_award_string"]] <- raw_award_string

#award_number
award_number <- str_extract_all(string = raw_award_string, pattern ="[0-9]+")
list[["award_number"]] <- unlist(award_number)

#title
title <- eml@dataset@title@.Data[[1]]@.Data
list[["title"]] <- title

return(list)
}