diff --git a/R/Sharis/Ameriflux_functions.R b/R/Sharis/Ameriflux_functions.R new file mode 100644 index 0000000..c62d6f9 --- /dev/null +++ b/R/Sharis/Ameriflux_functions.R @@ -0,0 +1,105 @@ +#' Generate Attributes table +#' +#' @description This function creates a complete attributes table by combining information gathered from +#' the csv data file itself and a csv of preliminary attribute information. This function was written +#' specifically to work with Ameriflux data and the information that was provided along with it. +#' +#' +#' @param csv_file_path (character) Path to the CSV file of data +#' @param attributes_file_path (character) Path to CSV file of preliminary attribute information + + +generate_attributes_table <- function(csv_file_path, + attributes_file_path) { + # Check that files exist + stopifnot(file.exists(csv_file_path)) + stopifnot(file.exists(attributes_file_path)) + + # Read in files + data <- read.csv(csv_file_path, stringsAsFactors = FALSE, skip = 2) + n <- dim(data)[2] + attributes <- try(read.csv(attributes_file_path, stringsAsFactors = FALSE)) + colnames(attributes) <- ("category", "label", "definition", "unit", "SI_unit") + + # Initialize data frame + att_table <- data.frame(attributeName = rep("NA", n), + attributeDefinition = rep("NA", n), + measurementScale = rep("NA", n), + domain = rep("NA", n), + formatString = rep("NA", n), + definition = rep("NA", n), + unit = rep("NA", n), + numberType = rep("NA", n), + missingValueCode = rep("NA", n), + missingValueCodeExplanation = rep("NA", n), + stringsAsFactors = F) + + qualifiers<- c("_PI", "_QC", "_F", "_IU", "_H_V_R", "_H_V_A", "_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9", "_SD", "_N") + num_qualifiers<- c("_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9") + + # add attribute name + att_table$attributeName <- colnames(data) + col_names <- colnames(data) + + for (i in seq_len(n)) { + + ## check if the name has a qualifier at the end + if (any(endsWith(col_names[i], suffix = qualifiers))) { + # identify the qualifier + current_qual <- which(endsWith(col_names[i], suffix = qualifiers)) + qualifier<- qualifiers[current_qual] + len<- nchar(qualifier) + main_label<- substr(col_names[i], 1, nchar(col_names[i])-len) + + # get definition for main label + main_def <- attributes$definition[attributes$label == main_label] + + # get definition for qualifier label, special case if it is a number + if (qualifier %in% num_qualifiers){ + qual_def <- attributes$definition[attributes$label == "_#"] + } else{ + qual_def <- attributes$definition[attributes$label == qualifier] + } + + # concatenate the definitions + att_table$attributeDefinition[i] <- paste(main_def, ". ", qual_def) + + # check if it is a time variable + if (grepl("TIME", main_label)){ + att_table$measurementScale[i] <- "dateTime" + att_table$domain[i] <- "dateTimeDomain" + att_table$formatString[i] <- "YYYYMMDDHHMM" + att_table$unit[i] <- "NA" + } else { + att_table$measurementScale[i] <- "ratio" + att_table$domain[i] <- "numericDomain" + att_table$numberType[i] <- "real" + att_table$unit[i] <- attributes$SI_unit[attributes$label == main_label] + att_table$missingValueCode[i] <- "-9999" + att_table$missingValueCodeExplanation[i] <- "Missing values are represented as -9999" + } + + # case if there is no qualifier + } else { + att_table$attributeDefinition[i] <- attributes$definition[attributes$label == col_names[i]] + # check if it is a time variable + if (grepl("TIME", col_names[i])){ + att_table$measurementScale[i] <- "dateTime" + att_table$domain[i] <- "dateTimeDomain" + att_table$formatString[i] <- "YYYYMMDDHHMM" + att_table$unit[i] <- "NA" + } else { + att_table$measurementScale[i] <- "ratio" + att_table$domain[i] <- "numericDomain" + att_table$numberType[i] <- "real" + att_table$unit[i] <- attributes$SI_unit[attributes$label == col_names[i]] + att_table$missingValueCode[i] <- "-9999" + att_table$missingValueCodeExplanation[i] <- "Missing values are represented as -9999" + } + } + + } + + return(att_table) + +} diff --git a/R/Sharis/code_chunks.Rmd b/R/Sharis/code_chunks.Rmd new file mode 100644 index 0000000..6599988 --- /dev/null +++ b/R/Sharis/code_chunks.Rmd @@ -0,0 +1,170 @@ +--- +title: "Code Chunks" +output: html_document +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE) +``` + +## Updating Coverage + +If you want to add to the current coverage without having to restate all the other coverage elements using `set_coverage`, these chunks allow you to just add taxonomic coverage and single date temporal coverage. + +### Adding taxonomic coverage + +```{r, eval = FALSE} +# add each new element as a tax object +tax1 <- new("taxonomicClassification", + taxonRankName = new("taxonRankName", "Species"), + taxonRankValue = new("taxonRankValue", "Calamagrostis deschampsioides")) + +tax2 <- new("taxonomicClassification", + taxonRankName = new("taxonRankName", "Species"), + taxonRankValue = new("taxonRankValue", "Carex aquatilis")) + +# combine all tax elements into taxonomic coverage object +taxcov <- new("taxonomicCoverage", + taxonomicClassification = c(tax1, tax2)) + +eml@dataset@coverage@taxonomicCoverage <- c(taxcov) + +``` + +### Adding single date temporal coverage + +```{r, eval = FALSE} +date <- new("singleDateTime", + calendarDate = "2011-09-15") + +tempcov1 <- new("temporalCoverage", + singleDateTime = date) + +eml@dataset@coverage@temporalCoverage <- c(tempcov1) +``` + +## Data Objects +### Adding data tables for a whole folder of files with the same attributes + +```{r, eval = FALSE} +# set path to data folder +data_path <- '/home/visitor/…”' + +# list the files. recursive = TRUE will get all files even if they are in folders in folders +paths <- list.files(data_path, full.names=TRUE, recursive = TRUE) + +# generate pids. replace format_id with the one that applies +new_pids1 <- sapply(paths, function(path) {publish_object(mn, path, format_id = "text/plain")}) + +attributes1 <- read.csv("/home/me/file_attributes.csv", stringsAsFactors = FALSE, na.strings = c('')) +attributeList1 <- set_attributes(attributes1) + +# assign attributes to data tables +eml <- eml_add_entities(eml, data.frame(type = "dataTable", + path = paths, + pid = new_pids1, + format_id = "text/plain")) +eml@dataset@dataTable[[1]]@attributeList <- attributeList1 +eml@dataset@dataTable[[1]]@attributeList@id <- new("xml_attribute", "shared_attributes1") + +#run a loop over all the data pids to add data tables with descriptions +for (i in 2:length(eml@dataset@dataTable)) { + eml@dataset@dataTable[[i]]@attributeList@references <- new("references", "shared_attributes1") + eml@dataset@dataTable[[i]]@entityDescription <- new("entityDescription", .Data = "entity description") +} + +``` + +## System Metadata +### Obsolescence chain + +This chunk is to obsolete one dataset. If there are more to add to the chain, more steps can be added, carefully making sure to fill in `obsoletes` and `obsoletedBy` slots for each one. + +```{r, eval = FALSE} +# get oldest version of the file you want to be visible. Use get_all_versions and look at the first. +# urn:uuid:... + +# PID for data set to be hidden: doi:10… + +# adding data set to hide in the slot before the first version of the visible data set +gsmOG <- getSystemMetadata(mn, "urn:uuid:...") +gsmOG@obsoletes <- "doi:10…" +updateSystemMetadata(mn, "urn:uuid:...", gsmOG) + +# adding first version to obsolescence chain after hidden version +gsmObs1 <- getSystemMetadata(mn, "doi:10…") +gsmObs1@obsoletedBy <- "urn:uuid:..." +updateSystemMetadata(mn, "doi:10…", gsmObs1) + +``` + +### Set rights and access + +This chunk sets rights and access for metadata, resource map, and all data objects in a package + +```{r, eval = FALSE} +## Fix rights holder and access +PI_name <- "http://orcid.org/…." +project_pid <- "resource_map_doi:10…." +project_package <- get_package(mn, project_pid) +set_rights_and_access(mn, c(project_package$metadata, project_package$resource_map, project_package$data), PI_name, c("read", "write", "changePermission")) + +``` + +## Miscellaneous + +### Adding sampling info to methods section + +```{r, eval = FALSE} +step1 <- new('methodStep', + description = "methods text") + +stEx <- new("studyExtent", + description = "study extent description") + +samp <- new("sampling", + studyExtent = stEx, + samplingDescription = "sampling description text") + +methods1 <- new("methods", + methodStep = c(step1), + sampling = samp) +eml@dataset@methods <- methods1 +``` + +### Adding a pre generated identifier to the eml + +When you pre generate a UUID or DOI, the change is not automatically reflected in the "Identifier" section of the eml so this makes sure that the eml lines up with the identifier being used. + +```{r, eval = FALSE} +## Generate DOI and add to EML +doiPid <- generateIdentifier(mn, "DOI") +eml@packageId <- new("xml_attribute", + .Data = doiPid) + +``` + +### Dealing with netCDFs + +This section is for dealing with netCDF (.nc) files. These files require data tables but since they can not be simply opened on the computer using a text editor or Excel, you can use Panoply to explore them or these R commands: + + +```{r} +library(arcticdatautils) +library(ncdf4) +filepath <- '/home/sharisnochs/Petterson/ICECAPS_precip_product_2010_2015.nc' + +# gets attribute info +atts <- get_ncdf4_attributes(filepath) +# preview of View(atts) +atts[1:5,] + + +# returns the actual values for a specified attribute +t <- nc_open(filepath) +test <- ncvar_get(t, 't2m') +# preview of View(test) +test[1:10] +``` + +The `formatId` in the sysmeta will most likely be `netCDF-4`. \ No newline at end of file