NCEAS · sharisochs · Mar 16, 2018 · Apr 11, 2018 · Jun 4, 2018 · Jun 4, 2018
diff --git a/R/Sharis/Ameriflux_functions.R b/R/Sharis/Ameriflux_functions.R
@@ -0,0 +1,105 @@
+#' Generate Attributes table 
+#'
+#' @description This function creates a complete attributes table by combining information gathered from 
+#' the csv data file itself and a csv of preliminary attribute information. This function was written 
+#' specifically to work with Ameriflux data and the information that was provided along with it.
+#'  
+#'
+#' @param csv_file_path (character) Path to the CSV file of data
+#' @param attributes_file_path (character) Path to CSV file of preliminary attribute information
+
+
+generate_attributes_table <- function(csv_file_path,
+                                      attributes_file_path) {
+  # Check that files exist 
+  stopifnot(file.exists(csv_file_path))
+  stopifnot(file.exists(attributes_file_path))
+
+  # Read in files 
+  data <- read.csv(csv_file_path, stringsAsFactors = FALSE, skip = 2)
+  n <- dim(data)[2]
+  attributes <- try(read.csv(attributes_file_path, stringsAsFactors = FALSE))
+  colnames(attributes) <- ("category", "label", "definition", "unit", "SI_unit")
+
+  # Initialize data frame
+  att_table <- data.frame(attributeName = rep("NA", n),
+                      attributeDefinition = rep("NA", n),
+                      measurementScale = rep("NA", n),
+                      domain = rep("NA", n),
+                      formatString = rep("NA", n),
+                      definition = rep("NA", n),
+                      unit = rep("NA", n),
+                      numberType = rep("NA", n),
+                      missingValueCode = rep("NA", n),
+                      missingValueCodeExplanation = rep("NA", n),
+                      stringsAsFactors = F)
+
+  qualifiers<- c("_PI", "_QC", "_F", "_IU", "_H_V_R", "_H_V_A", "_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9", "_SD", "_N")
+  num_qualifiers<- c("_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9")
+
+  # add attribute name
+  att_table$attributeName <- colnames(data)
+  col_names <- colnames(data)
+
+  for (i in seq_len(n)) {
+
+      ## check if the name has a qualifier at the end
+      if (any(endsWith(col_names[i], suffix = qualifiers))) {
+        # identify the qualifier
+        current_qual <- which(endsWith(col_names[i], suffix = qualifiers))
+        qualifier<- qualifiers[current_qual]
+        len<- nchar(qualifier)
+        main_label<- substr(col_names[i], 1, nchar(col_names[i])-len)
+
+        # get definition for main label
+        main_def <- attributes$definition[attributes$label == main_label]
+
+        # get definition for qualifier label, special case if it is a number
+        if (qualifier %in% num_qualifiers){
+          qual_def <- attributes$definition[attributes$label == "_#"]
+        } else{
+          qual_def <- attributes$definition[attributes$label == qualifier]
+        }
+
+        # concatenate the definitions
+        att_table$attributeDefinition[i] <- paste(main_def, ". ", qual_def)
+
+        # check if it is a time variable
+        if (grepl("TIME", main_label)){
+          att_table$measurementScale[i] <- "dateTime"
+          att_table$domain[i] <- "dateTimeDomain"
+          att_table$formatString[i] <- "YYYYMMDDHHMM"
+          att_table$unit[i] <- "NA"
+        } else {
+          att_table$measurementScale[i] <- "ratio"
+          att_table$domain[i] <- "numericDomain"
+          att_table$numberType[i] <- "real"
+          att_table$unit[i] <- attributes$SI_unit[attributes$label == main_label]
+          att_table$missingValueCode[i] <- "-9999"
+          att_table$missingValueCodeExplanation[i] <- "Missing values are represented as -9999"
+        } 
+
+        # case if there is no qualifier
+      } else {
+        att_table$attributeDefinition[i] <- attributes$definition[attributes$label == col_names[i]]
+        # check if it is a time variable
+        if (grepl("TIME", col_names[i])){
+          att_table$measurementScale[i] <- "dateTime"
+          att_table$domain[i] <- "dateTimeDomain"
+          att_table$formatString[i] <- "YYYYMMDDHHMM"
+          att_table$unit[i] <- "NA"
+        } else {
+          att_table$measurementScale[i] <- "ratio"
+          att_table$domain[i] <- "numericDomain"
+          att_table$numberType[i] <- "real"
+          att_table$unit[i] <- attributes$SI_unit[attributes$label == col_names[i]]
+          att_table$missingValueCode[i] <- "-9999"
+          att_table$missingValueCodeExplanation[i] <- "Missing values are represented as -9999"
+        } 
+     }
+
+  }
+
+  return(att_table)
+
+}
diff --git a/R/Sharis/code_chunks.Rmd b/R/Sharis/code_chunks.Rmd
@@ -0,0 +1,170 @@
+---
+title: "Code Chunks"
+output: html_document
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = TRUE)
+```
+
+## Updating Coverage
+
+If you want to add to the current coverage without having to restate all the other coverage elements using `set_coverage`, these chunks allow you to just add taxonomic coverage and single date temporal coverage. 
+
+### Adding taxonomic coverage
+
+```{r, eval = FALSE}
+# add each new element as a tax object
+tax1 <- new("taxonomicClassification",
+            taxonRankName = new("taxonRankName", "Species"),
+            taxonRankValue = new("taxonRankValue", "Calamagrostis deschampsioides"))
+
+tax2 <- new("taxonomicClassification", 
+            taxonRankName = new("taxonRankName", "Species"),
+            taxonRankValue = new("taxonRankValue", "Carex aquatilis"))
+
+# combine all tax elements into taxonomic coverage object
+taxcov <- new("taxonomicCoverage",
+               taxonomicClassification = c(tax1, tax2))
+
+eml@dataset@coverage@taxonomicCoverage <- c(taxcov)
+
+```
+
+### Adding single date temporal coverage
+
+```{r, eval = FALSE}
+date <- new("singleDateTime",
+           calendarDate = "2011-09-15")
+
+tempcov1 <- new("temporalCoverage",
+               singleDateTime = date)
+
+eml@dataset@coverage@temporalCoverage <- c(tempcov1)
+```
+
+## Data Objects
+### Adding data tables for a whole folder of files with the same attributes
+
+```{r, eval = FALSE}
+# set path to data folder
+data_path <- '/home/visitor/…”' 
+
+# list the files. recursive = TRUE will get all files even if they are in folders in folders
+paths <- list.files(data_path, full.names=TRUE, recursive = TRUE) 
+
+# generate pids. replace format_id with the one that applies
+new_pids1 <- sapply(paths, function(path) {publish_object(mn, path, format_id = "text/plain")}) 
+
+attributes1 <- read.csv("/home/me/file_attributes.csv", stringsAsFactors = FALSE, na.strings = c(''))
+attributeList1 <- set_attributes(attributes1)
+
+# assign attributes to data tables
+eml <- eml_add_entities(eml, data.frame(type = "dataTable",
+                                        path = paths,
+                                        pid = new_pids1,
+                                        format_id = "text/plain"))
+eml@dataset@dataTable[[1]]@attributeList <- attributeList1
+eml@dataset@dataTable[[1]]@attributeList@id <- new("xml_attribute", "shared_attributes1")
+
+#run a loop over all the data pids to add data tables with descriptions
+for (i in 2:length(eml@dataset@dataTable)) {
+  eml@dataset@dataTable[[i]]@attributeList@references <- new("references", "shared_attributes1")
+  eml@dataset@dataTable[[i]]@entityDescription <- new("entityDescription", .Data = "entity description")
+}
+
+```
+
+## System Metadata
+### Obsolescence chain
+
+This chunk is to obsolete one dataset. If there are more to add to the chain, more steps can be added, carefully making sure to fill in `obsoletes` and `obsoletedBy` slots for each one. 
+
+```{r, eval = FALSE}
+# get oldest version of the file you want to be visible. Use get_all_versions and look at the first.
+# urn:uuid:...
+
+# PID for data set to be hidden: doi:10…
+
+# adding data set to hide in the slot before the first version of the visible data set
+gsmOG <- getSystemMetadata(mn, "urn:uuid:...")
+gsmOG@obsoletes <- "doi:10…"
+updateSystemMetadata(mn, "urn:uuid:...", gsmOG)
+
+# adding first version to obsolescence chain after hidden version
+gsmObs1 <- getSystemMetadata(mn, "doi:10…")
+gsmObs1@obsoletedBy <- "urn:uuid:..."
+updateSystemMetadata(mn, "doi:10…", gsmObs1)
+
+```
+
+### Set rights and access
+
+This chunk sets rights and access for metadata, resource map, and all data objects in a package
+
+```{r, eval = FALSE}
+## Fix rights holder and access
+PI_name <- "http://orcid.org/…."
+project_pid <- "resource_map_doi:10…."
+project_package <- get_package(mn, project_pid)
+set_rights_and_access(mn, c(project_package$metadata, project_package$resource_map, project_package$data), PI_name, c("read", "write", "changePermission"))
+
+```
+
+## Miscellaneous 
+
+### Adding sampling info to methods section
+
+```{r, eval = FALSE}
+step1 <- new('methodStep',
+             description = "methods text")
+
+stEx <- new("studyExtent", 
+           description = "study extent description")
+
+samp <- new("sampling", 
+           studyExtent = stEx,
+           samplingDescription = "sampling description text")
+
+methods1 <- new("methods",
+               methodStep = c(step1),
+               sampling = samp)
+eml@dataset@methods <- methods1
+```
+
+### Adding a pre generated identifier to the eml
+
+When you pre generate a UUID or DOI, the change is not automatically reflected in the "Identifier" section of the eml so this makes sure that the eml lines up with the identifier being used. 
+
+```{r, eval = FALSE}
+## Generate DOI and add to EML
+doiPid <- generateIdentifier(mn, "DOI")
+eml@packageId <- new("xml_attribute",
+                      .Data = doiPid)
+
+```
+
+### Dealing with netCDFs
+
+This section is for dealing with netCDF (.nc) files. These files require data tables but since they can not be simply opened on the computer using a text editor or Excel, you can use Panoply to explore them or these R commands:
+
+
+```{r}
+library(arcticdatautils)
+library(ncdf4)
+filepath <- '/home/sharisnochs/Petterson/ICECAPS_precip_product_2010_2015.nc' 
+
+# gets attribute info
+atts <- get_ncdf4_attributes(filepath)
+# preview of View(atts)
+atts[1:5,]
+
+
+# returns the actual values for a specified attribute
+t <- nc_open(filepath)
+test <- ncvar_get(t, 't2m')
+# preview of View(test)
+test[1:10]
+```
+
+The `formatId` in the sysmeta will most likely be `netCDF-4`.