From 20e6b873d3532eae58160db4fde55f235db620f7 Mon Sep 17 00:00:00 2001
From: Sharis Ochs <sharisnochs@gmail.com>
Date: Fri, 16 Mar 2018 14:12:55 -0700
Subject: [PATCH 1/4] Scripts for review

---
 R/Sharis/Ameriflux_functions.R |  92 ++++++++++++++++++
 R/Sharis/code_chunks.Rmd       | 170 +++++++++++++++++++++++++++++++++
 2 files changed, 262 insertions(+)
 create mode 100644 R/Sharis/Ameriflux_functions.R
 create mode 100644 R/Sharis/code_chunks.Rmd

diff --git a/R/Sharis/Ameriflux_functions.R b/R/Sharis/Ameriflux_functions.R
new file mode 100644
index 0000000..05076ca
--- /dev/null
+++ b/R/Sharis/Ameriflux_functions.R
@@ -0,0 +1,92 @@
+generate_attributes_table <- function(csv_file_path,
+                                      attributes_file_path) {
+  # Check that files exist 
+  stopifnot(file.exists(csv_file_path))
+  stopifnot(file.exists(attributes_file_path))
+  
+  # Read in files 
+  data <- read.csv(csv_file_path, stringsAsFactors = FALSE, skip = 2)
+  n <- dim(data)[2]
+  attributes <- try(read.csv(attributes_file_path, stringsAsFactors = FALSE))
+
+  
+  # Initialize data frame
+  table <- data.frame(attributeName = rep("NA", n),
+                      attributeDefinition = rep("NA", n),
+                      measurementScale = rep("NA", n),
+                      domain = rep("NA", n),
+                      formatString = rep("NA", n),
+                      definition = rep("NA", n),
+                      unit = rep("NA", n),
+                      numberType = rep("NA", n),
+                      missingValueCode = rep("NA", n),
+                      missingValueCodeExplanation = rep("NA", n),
+                      stringsAsFactors = F)
+  
+  qualifiers<- c("_PI", "_QC", "_F", "_IU", "_H_V_R", "_H_V_A", "_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9", "_SD", "_N")
+  num_qualifiers<- c("_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9")
+  
+  for (i in seq_len(n)) {
+    # add attribute name
+    table$attributeName[i] = colnames(data)[i]
+    
+      ## check if the name has a qualifier at the end
+      if (any(endsWith(colnames(data)[i], suffix = qualifiers))) {
+        # identify the qualifier
+        current_qual <- which(endsWith(colnames(data)[i], suffix = qualifiers))
+        qualifier<- qualifiers[current_qual]
+        len<- nchar(qualifier)
+        main_label<- substr(colnames(data)[i], 1, nchar(colnames(data)[i])-len)
+        
+        # get definition for main label
+        main_def <- attributes$uniqueAttributeDefinition[attributes$uniqueAttributeLabel == main_label]
+        
+        # get definition for qualifier label, special case if it is a number
+        if (qualifier %in% num_qualifiers){
+          qual_def <- attributes$uniqueAttributeDefinition[attributes$uniqueAttributeLabel == "_#"]
+        } else{
+          qual_def <- attributes$uniqueAttributeDefinition[attributes$uniqueAttributeLabel == qualifier]
+        }
+        
+        # concatenate the definitions
+        table$attributeDefinition[i] = paste(main_def, ". ", qual_def)
+        
+        # check if it is a time variable
+        if (grepl("TIME", main_label)){
+          table$measurementScale[i] = "dateTime"
+          table$domain[i] = "dateTimeDomain"
+          table$formatString[i] = "YYYYMMDDHHMM"
+          table$unit[i] = "NA"
+        } else {
+          table$measurementScale[i] = "ratio"
+          table$domain[i] = "numericDomain"
+          table$numberType[i] <- "real"
+          table$unit[i] = attributes$SI_unit[attributes$uniqueAttributeLabel == main_label]
+          table$missingValueCode[i] = "-9999"
+          table$missingValueCodeExplanation[i] = "Missing values are represented as -9999"
+        } 
+        
+        # case if there is no qualifier
+      } else {
+        table$attributeDefinition[i] = attributes$uniqueAttributeDefinition[attributes$uniqueAttributeLabel ==            colnames(data)[i]]
+        # check if it is a time variable
+        if (grepl("TIME", colnames(data)[i])){
+          table$measurementScale[i] = "dateTime"
+          table$domain[i] = "dateTimeDomain"
+          table$formatString[i] = "YYYYMMDDHHMM"
+          table$unit[i] = "NA"
+        } else {
+          table$measurementScale[i] = "ratio"
+          table$domain[i] = "numericDomain"
+          table$numberType[i] <- "real"
+          table$unit[i] = attributes$SI_unit[attributes$uniqueAttributeLabel == colnames(data)[i]]
+          table$missingValueCode[i] = "-9999"
+          table$missingValueCodeExplanation[i] = "Missing values are represented as -9999"
+        } 
+     }
+    
+  }
+  
+  return(table)
+  
+}
diff --git a/R/Sharis/code_chunks.Rmd b/R/Sharis/code_chunks.Rmd
new file mode 100644
index 0000000..6599988
--- /dev/null
+++ b/R/Sharis/code_chunks.Rmd
@@ -0,0 +1,170 @@
+---
+title: "Code Chunks"
+output: html_document
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = TRUE)
+```
+
+## Updating Coverage
+
+If you want to add to the current coverage without having to restate all the other coverage elements using `set_coverage`, these chunks allow you to just add taxonomic coverage and single date temporal coverage. 
+
+### Adding taxonomic coverage
+
+```{r, eval = FALSE}
+# add each new element as a tax object
+tax1 <- new("taxonomicClassification",
+            taxonRankName = new("taxonRankName", "Species"),
+            taxonRankValue = new("taxonRankValue", "Calamagrostis deschampsioides"))
+
+tax2 <- new("taxonomicClassification", 
+            taxonRankName = new("taxonRankName", "Species"),
+            taxonRankValue = new("taxonRankValue", "Carex aquatilis"))
+
+# combine all tax elements into taxonomic coverage object
+taxcov <- new("taxonomicCoverage",
+               taxonomicClassification = c(tax1, tax2))
+
+eml@dataset@coverage@taxonomicCoverage <- c(taxcov)
+
+```
+
+### Adding single date temporal coverage
+
+```{r, eval = FALSE}
+date <- new("singleDateTime",
+           calendarDate = "2011-09-15")
+
+tempcov1 <- new("temporalCoverage",
+               singleDateTime = date)
+
+eml@dataset@coverage@temporalCoverage <- c(tempcov1)
+```
+
+## Data Objects
+### Adding data tables for a whole folder of files with the same attributes
+
+```{r, eval = FALSE}
+# set path to data folder
+data_path <- '/home/visitor/…”' 
+
+# list the files. recursive = TRUE will get all files even if they are in folders in folders
+paths <- list.files(data_path, full.names=TRUE, recursive = TRUE) 
+
+# generate pids. replace format_id with the one that applies
+new_pids1 <- sapply(paths, function(path) {publish_object(mn, path, format_id = "text/plain")}) 
+
+attributes1 <- read.csv("/home/me/file_attributes.csv", stringsAsFactors = FALSE, na.strings = c(''))
+attributeList1 <- set_attributes(attributes1)
+
+# assign attributes to data tables
+eml <- eml_add_entities(eml, data.frame(type = "dataTable",
+                                        path = paths,
+                                        pid = new_pids1,
+                                        format_id = "text/plain"))
+eml@dataset@dataTable[[1]]@attributeList <- attributeList1
+eml@dataset@dataTable[[1]]@attributeList@id <- new("xml_attribute", "shared_attributes1")
+
+#run a loop over all the data pids to add data tables with descriptions
+for (i in 2:length(eml@dataset@dataTable)) {
+  eml@dataset@dataTable[[i]]@attributeList@references <- new("references", "shared_attributes1")
+  eml@dataset@dataTable[[i]]@entityDescription <- new("entityDescription", .Data = "entity description")
+}
+
+```
+
+## System Metadata
+### Obsolescence chain
+
+This chunk is to obsolete one dataset. If there are more to add to the chain, more steps can be added, carefully making sure to fill in `obsoletes` and `obsoletedBy` slots for each one. 
+
+```{r, eval = FALSE}
+# get oldest version of the file you want to be visible. Use get_all_versions and look at the first.
+# urn:uuid:...
+
+# PID for data set to be hidden: doi:10…
+
+# adding data set to hide in the slot before the first version of the visible data set
+gsmOG <- getSystemMetadata(mn, "urn:uuid:...")
+gsmOG@obsoletes <- "doi:10…"
+updateSystemMetadata(mn, "urn:uuid:...", gsmOG)
+
+# adding first version to obsolescence chain after hidden version
+gsmObs1 <- getSystemMetadata(mn, "doi:10…")
+gsmObs1@obsoletedBy <- "urn:uuid:..."
+updateSystemMetadata(mn, "doi:10…", gsmObs1)
+
+```
+
+### Set rights and access
+
+This chunk sets rights and access for metadata, resource map, and all data objects in a package
+
+```{r, eval = FALSE}
+## Fix rights holder and access
+PI_name <- "http://orcid.org/…."
+project_pid <- "resource_map_doi:10…."
+project_package <- get_package(mn, project_pid)
+set_rights_and_access(mn, c(project_package$metadata, project_package$resource_map, project_package$data), PI_name, c("read", "write", "changePermission"))
+
+```
+
+## Miscellaneous 
+
+### Adding sampling info to methods section
+
+```{r, eval = FALSE}
+step1 <- new('methodStep',
+             description = "methods text")
+
+stEx <- new("studyExtent", 
+           description = "study extent description")
+
+samp <- new("sampling", 
+           studyExtent = stEx,
+           samplingDescription = "sampling description text")
+
+methods1 <- new("methods",
+               methodStep = c(step1),
+               sampling = samp)
+eml@dataset@methods <- methods1
+```
+
+### Adding a pre generated identifier to the eml
+
+When you pre generate a UUID or DOI, the change is not automatically reflected in the "Identifier" section of the eml so this makes sure that the eml lines up with the identifier being used. 
+
+```{r, eval = FALSE}
+## Generate DOI and add to EML
+doiPid <- generateIdentifier(mn, "DOI")
+eml@packageId <- new("xml_attribute",
+                      .Data = doiPid)
+
+```
+
+### Dealing with netCDFs
+
+This section is for dealing with netCDF (.nc) files. These files require data tables but since they can not be simply opened on the computer using a text editor or Excel, you can use Panoply to explore them or these R commands:
+
+
+```{r}
+library(arcticdatautils)
+library(ncdf4)
+filepath <- '/home/sharisnochs/Petterson/ICECAPS_precip_product_2010_2015.nc' 
+
+# gets attribute info
+atts <- get_ncdf4_attributes(filepath)
+# preview of View(atts)
+atts[1:5,]
+
+
+# returns the actual values for a specified attribute
+t <- nc_open(filepath)
+test <- ncvar_get(t, 't2m')
+# preview of View(test)
+test[1:10]
+```
+
+The `formatId` in the sysmeta will most likely be `netCDF-4`.
\ No newline at end of file

From 413a36f3430108c093f806cb614f06c94881e189 Mon Sep 17 00:00:00 2001
From: sharisochs <36999120+sharisochs@users.noreply.github.com>
Date: Wed, 11 Apr 2018 11:12:16 -0700
Subject: [PATCH 2/4] Update Ameriflux_functions.R

---
 R/Sharis/Ameriflux_functions.R | 81 ++++++++++++++++++++--------------
 1 file changed, 47 insertions(+), 34 deletions(-)

diff --git a/R/Sharis/Ameriflux_functions.R b/R/Sharis/Ameriflux_functions.R
index 05076ca..c62d6f9 100644
--- a/R/Sharis/Ameriflux_functions.R
+++ b/R/Sharis/Ameriflux_functions.R
@@ -1,3 +1,14 @@
+#' Generate Attributes table 
+#'
+#' @description This function creates a complete attributes table by combining information gathered from 
+#' the csv data file itself and a csv of preliminary attribute information. This function was written 
+#' specifically to work with Ameriflux data and the information that was provided along with it.
+#'  
+#'
+#' @param csv_file_path (character) Path to the CSV file of data
+#' @param attributes_file_path (character) Path to CSV file of preliminary attribute information
+
+
 generate_attributes_table <- function(csv_file_path,
                                       attributes_file_path) {
   # Check that files exist 
@@ -8,10 +19,10 @@ generate_attributes_table <- function(csv_file_path,
   data <- read.csv(csv_file_path, stringsAsFactors = FALSE, skip = 2)
   n <- dim(data)[2]
   attributes <- try(read.csv(attributes_file_path, stringsAsFactors = FALSE))
-
+  colnames(attributes) <- ("category", "label", "definition", "unit", "SI_unit")
   
   # Initialize data frame
-  table <- data.frame(attributeName = rep("NA", n),
+  att_table <- data.frame(attributeName = rep("NA", n),
                       attributeDefinition = rep("NA", n),
                       measurementScale = rep("NA", n),
                       domain = rep("NA", n),
@@ -26,67 +37,69 @@ generate_attributes_table <- function(csv_file_path,
   qualifiers<- c("_PI", "_QC", "_F", "_IU", "_H_V_R", "_H_V_A", "_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9", "_SD", "_N")
   num_qualifiers<- c("_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9")
   
+  # add attribute name
+  att_table$attributeName <- colnames(data)
+  col_names <- colnames(data)
+  
   for (i in seq_len(n)) {
-    # add attribute name
-    table$attributeName[i] = colnames(data)[i]
     
       ## check if the name has a qualifier at the end
-      if (any(endsWith(colnames(data)[i], suffix = qualifiers))) {
+      if (any(endsWith(col_names[i], suffix = qualifiers))) {
         # identify the qualifier
-        current_qual <- which(endsWith(colnames(data)[i], suffix = qualifiers))
+        current_qual <- which(endsWith(col_names[i], suffix = qualifiers))
         qualifier<- qualifiers[current_qual]
         len<- nchar(qualifier)
-        main_label<- substr(colnames(data)[i], 1, nchar(colnames(data)[i])-len)
+        main_label<- substr(col_names[i], 1, nchar(col_names[i])-len)
         
         # get definition for main label
-        main_def <- attributes$uniqueAttributeDefinition[attributes$uniqueAttributeLabel == main_label]
+        main_def <- attributes$definition[attributes$label == main_label]
         
         # get definition for qualifier label, special case if it is a number
         if (qualifier %in% num_qualifiers){
-          qual_def <- attributes$uniqueAttributeDefinition[attributes$uniqueAttributeLabel == "_#"]
+          qual_def <- attributes$definition[attributes$label == "_#"]
         } else{
-          qual_def <- attributes$uniqueAttributeDefinition[attributes$uniqueAttributeLabel == qualifier]
+          qual_def <- attributes$definition[attributes$label == qualifier]
         }
         
         # concatenate the definitions
-        table$attributeDefinition[i] = paste(main_def, ". ", qual_def)
+        att_table$attributeDefinition[i] <- paste(main_def, ". ", qual_def)
         
         # check if it is a time variable
         if (grepl("TIME", main_label)){
-          table$measurementScale[i] = "dateTime"
-          table$domain[i] = "dateTimeDomain"
-          table$formatString[i] = "YYYYMMDDHHMM"
-          table$unit[i] = "NA"
+          att_table$measurementScale[i] <- "dateTime"
+          att_table$domain[i] <- "dateTimeDomain"
+          att_table$formatString[i] <- "YYYYMMDDHHMM"
+          att_table$unit[i] <- "NA"
         } else {
-          table$measurementScale[i] = "ratio"
-          table$domain[i] = "numericDomain"
-          table$numberType[i] <- "real"
-          table$unit[i] = attributes$SI_unit[attributes$uniqueAttributeLabel == main_label]
-          table$missingValueCode[i] = "-9999"
-          table$missingValueCodeExplanation[i] = "Missing values are represented as -9999"
+          att_table$measurementScale[i] <- "ratio"
+          att_table$domain[i] <- "numericDomain"
+          att_table$numberType[i] <- "real"
+          att_table$unit[i] <- attributes$SI_unit[attributes$label == main_label]
+          att_table$missingValueCode[i] <- "-9999"
+          att_table$missingValueCodeExplanation[i] <- "Missing values are represented as -9999"
         } 
         
         # case if there is no qualifier
       } else {
-        table$attributeDefinition[i] = attributes$uniqueAttributeDefinition[attributes$uniqueAttributeLabel ==            colnames(data)[i]]
+        att_table$attributeDefinition[i] <- attributes$definition[attributes$label == col_names[i]]
         # check if it is a time variable
-        if (grepl("TIME", colnames(data)[i])){
-          table$measurementScale[i] = "dateTime"
-          table$domain[i] = "dateTimeDomain"
-          table$formatString[i] = "YYYYMMDDHHMM"
-          table$unit[i] = "NA"
+        if (grepl("TIME", col_names[i])){
+          att_table$measurementScale[i] <- "dateTime"
+          att_table$domain[i] <- "dateTimeDomain"
+          att_table$formatString[i] <- "YYYYMMDDHHMM"
+          att_table$unit[i] <- "NA"
         } else {
-          table$measurementScale[i] = "ratio"
-          table$domain[i] = "numericDomain"
-          table$numberType[i] <- "real"
-          table$unit[i] = attributes$SI_unit[attributes$uniqueAttributeLabel == colnames(data)[i]]
-          table$missingValueCode[i] = "-9999"
-          table$missingValueCodeExplanation[i] = "Missing values are represented as -9999"
+          att_table$measurementScale[i] <- "ratio"
+          att_table$domain[i] <- "numericDomain"
+          att_table$numberType[i] <- "real"
+          att_table$unit[i] <- attributes$SI_unit[attributes$label == col_names[i]]
+          att_table$missingValueCode[i] <- "-9999"
+          att_table$missingValueCodeExplanation[i] <- "Missing values are represented as -9999"
         } 
      }
     
   }
   
-  return(table)
+  return(att_table)
   
 }

From 84687f0976ad51cc7b3f27e1517131ee612ecc55 Mon Sep 17 00:00:00 2001
From: Sharis Ochs <sharisnochs@gmail.com>
Date: Mon, 4 Jun 2018 10:42:18 -0700
Subject: [PATCH 3/4] Function to combine get_all_versions, getSystemMetadata,
 and solr query

---
 R/Sharis/getSysmetaAll.R | 157 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 157 insertions(+)
 create mode 100644 R/Sharis/getSysmetaAll.R

diff --git a/R/Sharis/getSysmetaAll.R b/R/Sharis/getSysmetaAll.R
new file mode 100644
index 0000000..b81a00f
--- /dev/null
+++ b/R/Sharis/getSysmetaAll.R
@@ -0,0 +1,157 @@
+#' Get System Metadata for all versions
+#'
+#' @description This function combines getSystemMetadata and solr query to produce 
+#' a data frame with any combination of getSystemMetadata and query fields for all 
+#' versions of the given PID. If no fields are specified, the resulting data frame
+#' will contain all fields possible from getSystemMetadata and solr query.
+#'
+#' @param node (character) Specify the node where the object should be searched for.
+#' @param object_pid (character) PID for the object that you want to return information about.
+#' @param list_of_fields (list) List of fields that you want returned in the data frame. 
+#' "Id" is added automatically.
+#'
+#'
+#' @author Sharis Ochs
+
+getSystemMetadataAll <- function(node, object_pid, list_of_fields = "all") {
+    
+    ## Checks =========================
+    # Check that object exist 
+    if (!(arcticdatautils::object_exists(node, object_pid))) {
+        stop('Object does not exist on specified node ')
+    }
+      
+    # get all solr fields
+    adc_solr <- httr::GET("https://arcticdata.io/metacat/d1/mn/v2/query/solr")
+    suppressMessages(suppressWarnings(adc_solr<- adc_solr %>% 
+        stringr::str_extract_all("name>.*<") %>% 
+        unlist() %>% 
+        stringr::str_replace_all("name>|<", "")))
+    
+    # fields in getSystemMetadata but not solr query
+    gsm_fields <- c("serialVersion", 'accessPolicy', 'preferredNodes', 'blockedNodes', 'archived', 'dateSysMetadataModified', 'originMemberNode', 'authoritativeMemberNode')
+    
+    
+    suppressWarnings(if (list_of_fields != "all"){
+        ## Check that all specified fields are valid
+        for (j in 1:length(list_of_fields)){
+            if (!(list_of_fields[j] %in% gsm_fields | list_of_fields[j] %in% adc_solr)){
+                stop('Please enter a valid field ')
+            }
+        }
+    })
+    
+    ## Initialize ===========================
+    # Check how many versions there are
+    versions<- arcticdatautils::get_all_versions(node, object_pid)
+    n<- length(versions)
+    
+    # Initialize data frame of fields only in getSystemMetadata
+    sysmeta_table <- data.frame(id = rep("NA", n),
+                                serialVersion = rep("NA", n),
+                                accessPolicy = rep("NA", n),
+                                preferredNodes = rep("NA", n),
+                                blockedNodes = rep("NA", n),
+                                archived = rep("NA", n),
+                                dateSysMetadataModified = rep("NA", n),
+                                originMemberNode = rep("NA", n),
+                                authoritativeMemberNode = rep("NA", n),
+                                stringsAsFactors = F)
+    
+    # Case 1 ========================
+    # Case: if no fields are specified
+    suppressWarnings(if (list_of_fields == "all"){
+        
+        # Query all fields
+        inside_q <- paste(versions, collapse="\"+OR+\"")
+        q <- paste0("documents:\"", inside_q, "\"")
+        df_query <- dataone::query(node, list(q=q,
+                                     fl= "*",
+                                     rows="100"),
+                                     as = "data.frame")
+        
+        # Then getSystemMetadata
+        for (i in 1:n){
+            sysmeta<- dataone::getSystemMetadata(node, versions[i])
+            sysmeta_table$id[i] <- sysmeta@identifier
+            sysmeta_table$serialVersion[i] <- sysmeta@serialVersion
+            sysmeta_table$accessPolicy[i] <- I(list((sysmeta@accessPolicy)))
+            if (length(sysmeta@preferredNodes)>0){
+                sysmeta_table$preferredNodes[i] <- list(sysmeta@preferredNodes)
+            }
+            if (length(sysmeta@blockedNodes)>0){
+                sysmeta_table$blockedNodes[i] <- list(sysmeta@blockedNodes)
+            }
+            sysmeta_table$archived[i] <- sysmeta@archived
+            sysmeta_table$dateSysMetadataModified[i] <- sysmeta@dateSysMetadataModified
+            sysmeta_table$originMemberNode[i] <- sysmeta@originMemberNode
+            sysmeta_table$authoritativeMemberNode[i] <- sysmeta@authoritativeMemberNode
+        }
+        
+        # Merge data frames
+        final_df <- merge(df_query, sysmeta_table, by = "id")
+    })
+    
+    
+    # Case 2 ========================
+    # Else: Fields are specified
+    suppressWarnings(if (list_of_fields != "all") {
+        # split fields into those in solr query and those in getSystemMetadata
+        query_fields <- list_of_fields[which(list_of_fields %in% adc_solr)]
+        gsm_fields <- list_of_fields[which(list_of_fields %in% gsm_fields)]
+        
+        # If query fields are specified, run query
+        if (length(query_fields)>0){
+            inside_q <- paste(versions, collapse="\"+OR+\"")
+            q <- paste0("documents:\"", inside_q, "\"")
+            fl <- paste(list_of_fields, collapse=", ")
+            fl_add_id <- paste(c("id", list_of_fields), collapse=", ")
+            df_query <- dataone::query(node, list(q=q,
+                                       fl= fl_add_id,
+                                       rows="100"),
+                                       as = "data.frame")
+        }
+        
+        # If getSystemMetadata fields are specified, getSystemMetadata
+        if (length(gsm_fields)>0){
+            for (i in 1:n){
+                sysmeta<- dataone::getSystemMetadata(node, versions[i])
+                sysmeta_table$id[i] <- sysmeta@identifier
+                sysmeta_table$serialVersion[i] <- sysmeta@serialVersion
+                sysmeta_table$accessPolicy[i] <- I(list((sysmeta@accessPolicy)))
+                if (length(sysmeta@preferredNodes)>0){
+                    sysmeta_table$preferredNodes[i] <- list(sysmeta@preferredNodes)
+                }
+                if (length(sysmeta@blockedNodes)>0){
+                    sysmeta_table$blockedNodes[i] <- list(sysmeta@blockedNodes)
+                }
+                sysmeta_table$archived[i] <- sysmeta@archived
+                sysmeta_table$dateSysMetadataModified[i] <- sysmeta@dateSysMetadataModified
+                sysmeta_table$originMemberNode[i] <- sysmeta@originMemberNode
+                sysmeta_table$authoritativeMemberNode[i] <- sysmeta@authoritativeMemberNode
+            }
+        }
+        
+        
+        # Make new data frame with only the desired fields
+        sysmeta_table_2 <- sysmeta_table[c("id", gsm_fields)]
+        
+        if (length(query_fields)>0 & length(gsm_fields)>0){
+            # Merge data frames
+            final_df <- merge(df_query, sysmeta_table_2, by = "id")
+        }
+        
+        if (length(query_fields)>0 & length(gsm_fields)==0){
+            final_df <- df_query
+        }
+        
+        if (length(query_fields)==0 & length(gsm_fields)>0){
+            final_df <- sysmeta_table_2
+        }
+    })
+    
+    return(final_df)
+}
+
+#example
+df<- getSystemMetadataAll(mn, "doi:10.18739/A27D2Q670", c("origin", "submitter", "title", 'accessPolicy'))

From d8f4e5e880d85c06bd3d88b3e1f64df80af7317f Mon Sep 17 00:00:00 2001
From: Sharis Ochs <sharisnochs@gmail.com>
Date: Mon, 4 Jun 2018 10:46:58 -0700
Subject: [PATCH 4/4] clean up branches

---
 R/Sharis/getSysmetaAll.R | 157 ---------------------------------------
 1 file changed, 157 deletions(-)
 delete mode 100644 R/Sharis/getSysmetaAll.R

diff --git a/R/Sharis/getSysmetaAll.R b/R/Sharis/getSysmetaAll.R
deleted file mode 100644
index b81a00f..0000000
--- a/R/Sharis/getSysmetaAll.R
+++ /dev/null
@@ -1,157 +0,0 @@
-#' Get System Metadata for all versions
-#'
-#' @description This function combines getSystemMetadata and solr query to produce 
-#' a data frame with any combination of getSystemMetadata and query fields for all 
-#' versions of the given PID. If no fields are specified, the resulting data frame
-#' will contain all fields possible from getSystemMetadata and solr query.
-#'
-#' @param node (character) Specify the node where the object should be searched for.
-#' @param object_pid (character) PID for the object that you want to return information about.
-#' @param list_of_fields (list) List of fields that you want returned in the data frame. 
-#' "Id" is added automatically.
-#'
-#'
-#' @author Sharis Ochs
-
-getSystemMetadataAll <- function(node, object_pid, list_of_fields = "all") {
-    
-    ## Checks =========================
-    # Check that object exist 
-    if (!(arcticdatautils::object_exists(node, object_pid))) {
-        stop('Object does not exist on specified node ')
-    }
-      
-    # get all solr fields
-    adc_solr <- httr::GET("https://arcticdata.io/metacat/d1/mn/v2/query/solr")
-    suppressMessages(suppressWarnings(adc_solr<- adc_solr %>% 
-        stringr::str_extract_all("name>.*<") %>% 
-        unlist() %>% 
-        stringr::str_replace_all("name>|<", "")))
-    
-    # fields in getSystemMetadata but not solr query
-    gsm_fields <- c("serialVersion", 'accessPolicy', 'preferredNodes', 'blockedNodes', 'archived', 'dateSysMetadataModified', 'originMemberNode', 'authoritativeMemberNode')
-    
-    
-    suppressWarnings(if (list_of_fields != "all"){
-        ## Check that all specified fields are valid
-        for (j in 1:length(list_of_fields)){
-            if (!(list_of_fields[j] %in% gsm_fields | list_of_fields[j] %in% adc_solr)){
-                stop('Please enter a valid field ')
-            }
-        }
-    })
-    
-    ## Initialize ===========================
-    # Check how many versions there are
-    versions<- arcticdatautils::get_all_versions(node, object_pid)
-    n<- length(versions)
-    
-    # Initialize data frame of fields only in getSystemMetadata
-    sysmeta_table <- data.frame(id = rep("NA", n),
-                                serialVersion = rep("NA", n),
-                                accessPolicy = rep("NA", n),
-                                preferredNodes = rep("NA", n),
-                                blockedNodes = rep("NA", n),
-                                archived = rep("NA", n),
-                                dateSysMetadataModified = rep("NA", n),
-                                originMemberNode = rep("NA", n),
-                                authoritativeMemberNode = rep("NA", n),
-                                stringsAsFactors = F)
-    
-    # Case 1 ========================
-    # Case: if no fields are specified
-    suppressWarnings(if (list_of_fields == "all"){
-        
-        # Query all fields
-        inside_q <- paste(versions, collapse="\"+OR+\"")
-        q <- paste0("documents:\"", inside_q, "\"")
-        df_query <- dataone::query(node, list(q=q,
-                                     fl= "*",
-                                     rows="100"),
-                                     as = "data.frame")
-        
-        # Then getSystemMetadata
-        for (i in 1:n){
-            sysmeta<- dataone::getSystemMetadata(node, versions[i])
-            sysmeta_table$id[i] <- sysmeta@identifier
-            sysmeta_table$serialVersion[i] <- sysmeta@serialVersion
-            sysmeta_table$accessPolicy[i] <- I(list((sysmeta@accessPolicy)))
-            if (length(sysmeta@preferredNodes)>0){
-                sysmeta_table$preferredNodes[i] <- list(sysmeta@preferredNodes)
-            }
-            if (length(sysmeta@blockedNodes)>0){
-                sysmeta_table$blockedNodes[i] <- list(sysmeta@blockedNodes)
-            }
-            sysmeta_table$archived[i] <- sysmeta@archived
-            sysmeta_table$dateSysMetadataModified[i] <- sysmeta@dateSysMetadataModified
-            sysmeta_table$originMemberNode[i] <- sysmeta@originMemberNode
-            sysmeta_table$authoritativeMemberNode[i] <- sysmeta@authoritativeMemberNode
-        }
-        
-        # Merge data frames
-        final_df <- merge(df_query, sysmeta_table, by = "id")
-    })
-    
-    
-    # Case 2 ========================
-    # Else: Fields are specified
-    suppressWarnings(if (list_of_fields != "all") {
-        # split fields into those in solr query and those in getSystemMetadata
-        query_fields <- list_of_fields[which(list_of_fields %in% adc_solr)]
-        gsm_fields <- list_of_fields[which(list_of_fields %in% gsm_fields)]
-        
-        # If query fields are specified, run query
-        if (length(query_fields)>0){
-            inside_q <- paste(versions, collapse="\"+OR+\"")
-            q <- paste0("documents:\"", inside_q, "\"")
-            fl <- paste(list_of_fields, collapse=", ")
-            fl_add_id <- paste(c("id", list_of_fields), collapse=", ")
-            df_query <- dataone::query(node, list(q=q,
-                                       fl= fl_add_id,
-                                       rows="100"),
-                                       as = "data.frame")
-        }
-        
-        # If getSystemMetadata fields are specified, getSystemMetadata
-        if (length(gsm_fields)>0){
-            for (i in 1:n){
-                sysmeta<- dataone::getSystemMetadata(node, versions[i])
-                sysmeta_table$id[i] <- sysmeta@identifier
-                sysmeta_table$serialVersion[i] <- sysmeta@serialVersion
-                sysmeta_table$accessPolicy[i] <- I(list((sysmeta@accessPolicy)))
-                if (length(sysmeta@preferredNodes)>0){
-                    sysmeta_table$preferredNodes[i] <- list(sysmeta@preferredNodes)
-                }
-                if (length(sysmeta@blockedNodes)>0){
-                    sysmeta_table$blockedNodes[i] <- list(sysmeta@blockedNodes)
-                }
-                sysmeta_table$archived[i] <- sysmeta@archived
-                sysmeta_table$dateSysMetadataModified[i] <- sysmeta@dateSysMetadataModified
-                sysmeta_table$originMemberNode[i] <- sysmeta@originMemberNode
-                sysmeta_table$authoritativeMemberNode[i] <- sysmeta@authoritativeMemberNode
-            }
-        }
-        
-        
-        # Make new data frame with only the desired fields
-        sysmeta_table_2 <- sysmeta_table[c("id", gsm_fields)]
-        
-        if (length(query_fields)>0 & length(gsm_fields)>0){
-            # Merge data frames
-            final_df <- merge(df_query, sysmeta_table_2, by = "id")
-        }
-        
-        if (length(query_fields)>0 & length(gsm_fields)==0){
-            final_df <- df_query
-        }
-        
-        if (length(query_fields)==0 & length(gsm_fields)>0){
-            final_df <- sysmeta_table_2
-        }
-    })
-    
-    return(final_df)
-}
-
-#example
-df<- getSystemMetadataAll(mn, "doi:10.18739/A27D2Q670", c("origin", "submitter", "title", 'accessPolicy'))