From 202cc66abc85257b7c56a7273300011b9a02eb02 Mon Sep 17 00:00:00 2001
From: Angel Chen <angelchen@ucsb.edu>
Date: Mon, 2 Mar 2020 10:38:33 -0800
Subject: [PATCH 1/2] adding processing scripts for ticket #19667 Russell

---
 R/Angel Chen/ticket #19667 Russell.R | 56 ++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)
 create mode 100644 R/Angel Chen/ticket #19667 Russell.R

diff --git a/R/Angel Chen/ticket #19667 Russell.R b/R/Angel Chen/ticket #19667 Russell.R
new file mode 100644
index 0000000..dbb4374
--- /dev/null
+++ b/R/Angel Chen/ticket #19667 Russell.R	
@@ -0,0 +1,56 @@
+#ticket #19667: https://arcticdata.io/catalog/view/urn:uuid:6a64c7ad-aa08-424a-9cf9-8a5747436813
+#formatted like this https://www-air.larc.nasa.gov/missions/etc/IcarttDataFormat.htm
+#spending time to do https://learning.nceas.ucsb.edu/2020-02-RRCourse/data-cleaning-and-manipulation.html
+
+#making attributes for Barrow_Spectra_Barrow_20080302_R1_thru20100305.ict
+#grab the names of the columns using names() and make that a data frame (using data.frame()
+#split the names into wavelength and frame number columns using separate()
+#create descriptions of the attributes using a combination of mutate() and paste(), descriptions can match a format like "absorbance at x nm, frame y"
+#add units, etc using mutate (unit can be dimensionless
+
+#1878 total variables
+
+
+pkg <- get_package(adc, 'resource_map_urn:uuid:6b36ffd1-98f3-415d-a1b7-822a67745a1b', file_names = TRUE)
+doc <- read_eml(getObject(adc, pkg$metadata))
+
+dat <- read.csv("~/ticket #19667 Russell/Barrow_Spectra_Barrow_20080302_R1_thru20100305.ict", skip = 1906)
+
+names_df <- data.frame(names(dat))
+names_vector <- as.vector(names_df[[1]])
+
+descriptions1 <- c("number of seconds elapsed since 03/02/2008 in Coordinated Universal Time (UTC), recorded when a day began", "number of seconds elapsed since 03/02/2008 in Coordinated Universal Time (UTC), recorded when a day ended")
+
+wavenumbers <- seq(400, 4000, by=1.92)
+descriptions2 <- paste("absorbance at wavenumber", wavenumbers)
+
+scales1 <- c("ratio", "ratio")
+scales2 <- rep("ratio", 1876)
+
+domains <- rep("numericDomain", 1878)
+
+format_strings <- rep(NA, 1878)
+
+definitions <- rep(NA, 1878)
+
+units1 <- c("second", "second")
+units2 <- rep("dimensionless" ,1876)
+
+number_types <- rep("real", 1878)
+
+missing_codes <- rep(NA, 1878)
+missing_explanations <- rep(NA, 1878)
+
+attributes <- data.frame(
+  attributeName = names_vector,
+  attributeDefinition = c(descriptions1, descriptions2),
+  measurementScale = c(scales1, scales2),
+  domain = domains,
+  formatString = format_strings,
+  definition = definitions,
+  unit = c(units1, units2),
+  numberType = number_types,
+  missingValueCode = missing_codes,
+  missingValueCodeExplanation = missing_explanations,
+  
+  stringsAsFactors = FALSE)

From a13e5f04d2a8b974bad33e177e41c1d58793ec36 Mon Sep 17 00:00:00 2001
From: Angel Chen <angelchen@ucsb.edu>
Date: Wed, 5 Aug 2020 11:29:44 -0700
Subject: [PATCH 2/2] adding another processing script

---
 R/Angel Chen/ticket #19667 Russell.R |  56 -----------
 R/Angel Chen/ticket #20492 Norman.R  | 144 +++++++++++++++++++++++++++
 2 files changed, 144 insertions(+), 56 deletions(-)
 delete mode 100644 R/Angel Chen/ticket #19667 Russell.R
 create mode 100644 R/Angel Chen/ticket #20492 Norman.R

diff --git a/R/Angel Chen/ticket #19667 Russell.R b/R/Angel Chen/ticket #19667 Russell.R
deleted file mode 100644
index dbb4374..0000000
--- a/R/Angel Chen/ticket #19667 Russell.R	
+++ /dev/null
@@ -1,56 +0,0 @@
-#ticket #19667: https://arcticdata.io/catalog/view/urn:uuid:6a64c7ad-aa08-424a-9cf9-8a5747436813
-#formatted like this https://www-air.larc.nasa.gov/missions/etc/IcarttDataFormat.htm
-#spending time to do https://learning.nceas.ucsb.edu/2020-02-RRCourse/data-cleaning-and-manipulation.html
-
-#making attributes for Barrow_Spectra_Barrow_20080302_R1_thru20100305.ict
-#grab the names of the columns using names() and make that a data frame (using data.frame()
-#split the names into wavelength and frame number columns using separate()
-#create descriptions of the attributes using a combination of mutate() and paste(), descriptions can match a format like "absorbance at x nm, frame y"
-#add units, etc using mutate (unit can be dimensionless
-
-#1878 total variables
-
-
-pkg <- get_package(adc, 'resource_map_urn:uuid:6b36ffd1-98f3-415d-a1b7-822a67745a1b', file_names = TRUE)
-doc <- read_eml(getObject(adc, pkg$metadata))
-
-dat <- read.csv("~/ticket #19667 Russell/Barrow_Spectra_Barrow_20080302_R1_thru20100305.ict", skip = 1906)
-
-names_df <- data.frame(names(dat))
-names_vector <- as.vector(names_df[[1]])
-
-descriptions1 <- c("number of seconds elapsed since 03/02/2008 in Coordinated Universal Time (UTC), recorded when a day began", "number of seconds elapsed since 03/02/2008 in Coordinated Universal Time (UTC), recorded when a day ended")
-
-wavenumbers <- seq(400, 4000, by=1.92)
-descriptions2 <- paste("absorbance at wavenumber", wavenumbers)
-
-scales1 <- c("ratio", "ratio")
-scales2 <- rep("ratio", 1876)
-
-domains <- rep("numericDomain", 1878)
-
-format_strings <- rep(NA, 1878)
-
-definitions <- rep(NA, 1878)
-
-units1 <- c("second", "second")
-units2 <- rep("dimensionless" ,1876)
-
-number_types <- rep("real", 1878)
-
-missing_codes <- rep(NA, 1878)
-missing_explanations <- rep(NA, 1878)
-
-attributes <- data.frame(
-  attributeName = names_vector,
-  attributeDefinition = c(descriptions1, descriptions2),
-  measurementScale = c(scales1, scales2),
-  domain = domains,
-  formatString = format_strings,
-  definition = definitions,
-  unit = c(units1, units2),
-  numberType = number_types,
-  missingValueCode = missing_codes,
-  missingValueCodeExplanation = missing_explanations,
-  
-  stringsAsFactors = FALSE)
diff --git a/R/Angel Chen/ticket #20492 Norman.R b/R/Angel Chen/ticket #20492 Norman.R
new file mode 100644
index 0000000..9039c29
--- /dev/null
+++ b/R/Angel Chen/ticket #20492 Norman.R	
@@ -0,0 +1,144 @@
+#ticket #20492 Norman
+#Human dataset:
+#https://arcticdata.io/catalog/#view/urn:uuid:c715fe12-6c05-4c27-8d00-b9c0c536c54b
+
+pkg <- get_package(adc, 'resource_map_urn:uuid:2a957ad3-1c0f-44e9-b79d-5ea2a2ed76bf', file_names = TRUE)
+doc <- read_eml(getObject(adc, pkg$metadata))
+emld::eml_version("eml-2.1.1")
+
+id_new <- publish_object(
+  adc,
+  path = "~/ticket #20492 Norman/CEBP_HumanGeneticData_Summary.csv",
+  format_id = "text/csv",
+  public = FALSE
+)
+
+update <- publish_update(adc, 
+                         metadata_pid = pkg$metadata,
+                         resource_map_pid = pkg$resource_map,
+                         data_pids = id_new,
+                         public = FALSE)
+#https://arcticdata.io/catalog/view/urn%3Auuid%3Ab7b1a184-4f25-4c7f-b66d-240e2dfcd348
+
+pkg <- get_package(adc, 'resource_map_urn:uuid:b7b1a184-4f25-4c7f-b66d-240e2dfcd348', file_names = TRUE)
+doc <- read_eml(getObject(adc, pkg$metadata))
+emld::eml_version("eml-2.1.1")
+
+doc <- eml_add_publisher(doc)
+doc <- eml_add_entity_system(doc)
+
+CEBP_HumanGeneticData_Summary <- read_csv("~/ticket #20492 Norman/CEBP_HumanGeneticData_Summary.csv")
+out <- shiny_attributes(CEBP_HumanGeneticData_Summary, NULL)
+
+doc$dataset$otherEntity <- NULL
+
+physical <- pid_to_eml_physical(adc, "urn:uuid:c17ef00e-7202-49b5-aa8e-a51afc999f41")
+
+Attributes_Table <- read_csv("~/ticket #20492 Norman/Attributes_Table.csv")
+attributeList <- set_attributes(attributes=Attributes_Table)
+
+dataTable <- eml$dataTable(entityName = "CEBP_HumanGeneticData_Summary.csv",
+                           entityDescription = "Summary of the genetic data collected from individuals",
+                           physical = physical,
+                           attributeList = attributeList
+)
+doc$dataset$dataTable[[1]] <- dataTable
+
+
+doc$dataset$contact[[3]] <- NULL
+
+doc$dataset$coverage$geographicCoverage[[2]]$boundingCoordinates$westBoundingCoordinate <- "156.51550"
+doc$dataset$coverage$geographicCoverage[[2]]$boundingCoordinates$eastBoundingCoordinate <- "156.64143"
+
+doc$dataset$project <- eml_nsf_to_project("1523059")
+
+eml_validate(doc)
+doc_path <- file.path(tempdir(), 'science_metadata.xml')
+write_eml(doc, doc_path)
+
+update <- publish_update(adc, 
+                         metadata_pid = pkg$metadata,
+                         resource_map_pid = pkg$resource_map,
+                         data_pids = pkg$data,
+                         metadata_path = doc_path, 
+                         public = FALSE)
+#https://arcticdata.io/catalog/view/urn%3Auuid%3Afff9acca-9e4c-481a-8186-678892ca5a6e
+
+pkg <- get_package(adc, 'resource_map_urn:uuid:fff9acca-9e4c-481a-8186-678892ca5a6e', file_names = TRUE)
+doc <- read_eml(getObject(adc, pkg$metadata))
+emld::eml_version("eml-2.1.1")
+
+
+doc$dataset$coverage$geographicCoverage[[2]]$boundingCoordinates$westBoundingCoordinate <- "-156.51550"
+doc$dataset$coverage$geographicCoverage[[2]]$boundingCoordinates$eastBoundingCoordinate <- "-156.64143"
+
+eml_validate(doc)
+doc_path <- file.path(tempdir(), 'science_metadata.xml')
+write_eml(doc, doc_path)
+
+update <- publish_update(adc, 
+                         metadata_pid = pkg$metadata,
+                         resource_map_pid = pkg$resource_map,
+                         data_pids = pkg$data,
+                         metadata_path = doc_path, 
+                         public = FALSE)
+#https://arcticdata.io/catalog/view/urn%3Auuid%3A8b86d811-2b0e-48ae-9f48-7a57d67eccfe
+
+pkg <- get_package(adc, 'resource_map_urn:uuid:8b86d811-2b0e-48ae-9f48-7a57d67eccfe', file_names = TRUE)
+doc <- read_eml(getObject(adc, pkg$metadata))
+emld::eml_version("eml-2.1.1")
+
+doc$dataset$coverage$geographicCoverage[[2]]$boundingCoordinates$northBoundingCoordinate <- "69.057876"
+doc$dataset$coverage$geographicCoverage[[2]]$boundingCoordinates$southBoundingCoordinate <- "69.057876"
+doc$dataset$coverage$geographicCoverage[[2]]$boundingCoordinates$westBoundingCoordinate <- "-152.862827"
+doc$dataset$coverage$geographicCoverage[[2]]$boundingCoordinates$eastBoundingCoordinate <- "-152.862827"
+
+eml_validate(doc)
+doc_path <- file.path(tempdir(), 'science_metadata.xml')
+write_eml(doc, doc_path)
+
+update <- publish_update(adc, 
+                         metadata_pid = pkg$metadata,
+                         resource_map_pid = pkg$resource_map,
+                         data_pids = pkg$data,
+                         metadata_path = doc_path, 
+                         public = FALSE)
+#https://arcticdata.io/catalog/view/urn:uuid:5b6546c3-b8a6-420f-b1ba-1422c4479f6b
+
+#https://arcticdata.io/catalog/view/urn%3Auuid%3Adda6d1ed-b817-42ad-9c23-a33b01c59a88
+
+pkg <- get_package(adc, 'resource_map_urn:uuid:160fbcbf-92af-4d3b-8ebf-6092a4435a27', file_names = TRUE)
+doc <- read_eml(getObject(adc, pkg$metadata))
+emld::eml_version("eml-2.1.1")
+
+set_rights_and_access(adc, pids = unlist(pkg), subject = 'http://orcid.org/0000-0002-5718-6032')
+
+doc$dataset$title <- "Cape Espenberg Birnirk Project (CEBP) human mitogenome summary analysis (2016-2019)"
+
+eml_validate(doc)
+doc_path <- file.path(tempdir(), 'science_metadata.xml')
+write_eml(doc, doc_path)
+
+update <- publish_update(adc, 
+                         metadata_pid = pkg$metadata,
+                         resource_map_pid = pkg$resource_map,
+                         data_pids = pkg$data,
+                         metadata_path = doc_path, 
+                         public = FALSE)
+#https://arcticdata.io/catalog/view/urn:uuid:ec4f2c29-bdb2-4927-b364-20f2d6ead811
+
+pkg <- get_package(adc, 'resource_map_urn:uuid:ec4f2c29-bdb2-4927-b364-20f2d6ead811', file_names = TRUE)
+doc <- read_eml(getObject(adc, pkg$metadata))
+emld::eml_version("eml-2.1.1")
+
+update <- publish_update(adc, 
+                         metadata_pid = pkg$metadata,
+                         resource_map_pid = pkg$resource_map,
+                         data_pids = pkg$data,
+                         use_doi=TRUE,
+                         public = TRUE)
+#https://arcticdata.io/catalog/view/doi%3A10.18739%2FA2NC5SD2M
+
+datamgmt::categorize_dataset("doi:10.18739/A2CZ32589", c("archaeology","anthropology"), "Angel")
+datamgmt::categorize_dataset("doi:10.18739/A2NC5SD2M", c("archaeology","anthropology"), "Angel")
+