From 202cc66abc85257b7c56a7273300011b9a02eb02 Mon Sep 17 00:00:00 2001 From: Angel Chen Date: Mon, 2 Mar 2020 10:38:33 -0800 Subject: [PATCH 1/2] adding processing scripts for ticket #19667 Russell --- R/Angel Chen/ticket #19667 Russell.R | 56 ++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 R/Angel Chen/ticket #19667 Russell.R diff --git a/R/Angel Chen/ticket #19667 Russell.R b/R/Angel Chen/ticket #19667 Russell.R new file mode 100644 index 0000000..dbb4374 --- /dev/null +++ b/R/Angel Chen/ticket #19667 Russell.R @@ -0,0 +1,56 @@ +#ticket #19667: https://arcticdata.io/catalog/view/urn:uuid:6a64c7ad-aa08-424a-9cf9-8a5747436813 +#formatted like this https://www-air.larc.nasa.gov/missions/etc/IcarttDataFormat.htm +#spending time to do https://learning.nceas.ucsb.edu/2020-02-RRCourse/data-cleaning-and-manipulation.html + +#making attributes for Barrow_Spectra_Barrow_20080302_R1_thru20100305.ict +#grab the names of the columns using names() and make that a data frame (using data.frame() +#split the names into wavelength and frame number columns using separate() +#create descriptions of the attributes using a combination of mutate() and paste(), descriptions can match a format like "absorbance at x nm, frame y" +#add units, etc using mutate (unit can be dimensionless + +#1878 total variables + + +pkg <- get_package(adc, 'resource_map_urn:uuid:6b36ffd1-98f3-415d-a1b7-822a67745a1b', file_names = TRUE) +doc <- read_eml(getObject(adc, pkg$metadata)) + +dat <- read.csv("~/ticket #19667 Russell/Barrow_Spectra_Barrow_20080302_R1_thru20100305.ict", skip = 1906) + +names_df <- data.frame(names(dat)) +names_vector <- as.vector(names_df[[1]]) + +descriptions1 <- c("number of seconds elapsed since 03/02/2008 in Coordinated Universal Time (UTC), recorded when a day began", "number of seconds elapsed since 03/02/2008 in Coordinated Universal Time (UTC), recorded when a day ended") + +wavenumbers <- seq(400, 4000, by=1.92) +descriptions2 <- paste("absorbance at wavenumber", wavenumbers) + +scales1 <- c("ratio", "ratio") +scales2 <- rep("ratio", 1876) + +domains <- rep("numericDomain", 1878) + +format_strings <- rep(NA, 1878) + +definitions <- rep(NA, 1878) + +units1 <- c("second", "second") +units2 <- rep("dimensionless" ,1876) + +number_types <- rep("real", 1878) + +missing_codes <- rep(NA, 1878) +missing_explanations <- rep(NA, 1878) + +attributes <- data.frame( + attributeName = names_vector, + attributeDefinition = c(descriptions1, descriptions2), + measurementScale = c(scales1, scales2), + domain = domains, + formatString = format_strings, + definition = definitions, + unit = c(units1, units2), + numberType = number_types, + missingValueCode = missing_codes, + missingValueCodeExplanation = missing_explanations, + + stringsAsFactors = FALSE) From a13e5f04d2a8b974bad33e177e41c1d58793ec36 Mon Sep 17 00:00:00 2001 From: Angel Chen Date: Wed, 5 Aug 2020 11:29:44 -0700 Subject: [PATCH 2/2] adding another processing script --- R/Angel Chen/ticket #19667 Russell.R | 56 ----------- R/Angel Chen/ticket #20492 Norman.R | 144 +++++++++++++++++++++++++++ 2 files changed, 144 insertions(+), 56 deletions(-) delete mode 100644 R/Angel Chen/ticket #19667 Russell.R create mode 100644 R/Angel Chen/ticket #20492 Norman.R diff --git a/R/Angel Chen/ticket #19667 Russell.R b/R/Angel Chen/ticket #19667 Russell.R deleted file mode 100644 index dbb4374..0000000 --- a/R/Angel Chen/ticket #19667 Russell.R +++ /dev/null @@ -1,56 +0,0 @@ -#ticket #19667: https://arcticdata.io/catalog/view/urn:uuid:6a64c7ad-aa08-424a-9cf9-8a5747436813 -#formatted like this https://www-air.larc.nasa.gov/missions/etc/IcarttDataFormat.htm -#spending time to do https://learning.nceas.ucsb.edu/2020-02-RRCourse/data-cleaning-and-manipulation.html - -#making attributes for Barrow_Spectra_Barrow_20080302_R1_thru20100305.ict -#grab the names of the columns using names() and make that a data frame (using data.frame() -#split the names into wavelength and frame number columns using separate() -#create descriptions of the attributes using a combination of mutate() and paste(), descriptions can match a format like "absorbance at x nm, frame y" -#add units, etc using mutate (unit can be dimensionless - -#1878 total variables - - -pkg <- get_package(adc, 'resource_map_urn:uuid:6b36ffd1-98f3-415d-a1b7-822a67745a1b', file_names = TRUE) -doc <- read_eml(getObject(adc, pkg$metadata)) - -dat <- read.csv("~/ticket #19667 Russell/Barrow_Spectra_Barrow_20080302_R1_thru20100305.ict", skip = 1906) - -names_df <- data.frame(names(dat)) -names_vector <- as.vector(names_df[[1]]) - -descriptions1 <- c("number of seconds elapsed since 03/02/2008 in Coordinated Universal Time (UTC), recorded when a day began", "number of seconds elapsed since 03/02/2008 in Coordinated Universal Time (UTC), recorded when a day ended") - -wavenumbers <- seq(400, 4000, by=1.92) -descriptions2 <- paste("absorbance at wavenumber", wavenumbers) - -scales1 <- c("ratio", "ratio") -scales2 <- rep("ratio", 1876) - -domains <- rep("numericDomain", 1878) - -format_strings <- rep(NA, 1878) - -definitions <- rep(NA, 1878) - -units1 <- c("second", "second") -units2 <- rep("dimensionless" ,1876) - -number_types <- rep("real", 1878) - -missing_codes <- rep(NA, 1878) -missing_explanations <- rep(NA, 1878) - -attributes <- data.frame( - attributeName = names_vector, - attributeDefinition = c(descriptions1, descriptions2), - measurementScale = c(scales1, scales2), - domain = domains, - formatString = format_strings, - definition = definitions, - unit = c(units1, units2), - numberType = number_types, - missingValueCode = missing_codes, - missingValueCodeExplanation = missing_explanations, - - stringsAsFactors = FALSE) diff --git a/R/Angel Chen/ticket #20492 Norman.R b/R/Angel Chen/ticket #20492 Norman.R new file mode 100644 index 0000000..9039c29 --- /dev/null +++ b/R/Angel Chen/ticket #20492 Norman.R @@ -0,0 +1,144 @@ +#ticket #20492 Norman +#Human dataset: +#https://arcticdata.io/catalog/#view/urn:uuid:c715fe12-6c05-4c27-8d00-b9c0c536c54b + +pkg <- get_package(adc, 'resource_map_urn:uuid:2a957ad3-1c0f-44e9-b79d-5ea2a2ed76bf', file_names = TRUE) +doc <- read_eml(getObject(adc, pkg$metadata)) +emld::eml_version("eml-2.1.1") + +id_new <- publish_object( + adc, + path = "~/ticket #20492 Norman/CEBP_HumanGeneticData_Summary.csv", + format_id = "text/csv", + public = FALSE +) + +update <- publish_update(adc, + metadata_pid = pkg$metadata, + resource_map_pid = pkg$resource_map, + data_pids = id_new, + public = FALSE) +#https://arcticdata.io/catalog/view/urn%3Auuid%3Ab7b1a184-4f25-4c7f-b66d-240e2dfcd348 + +pkg <- get_package(adc, 'resource_map_urn:uuid:b7b1a184-4f25-4c7f-b66d-240e2dfcd348', file_names = TRUE) +doc <- read_eml(getObject(adc, pkg$metadata)) +emld::eml_version("eml-2.1.1") + +doc <- eml_add_publisher(doc) +doc <- eml_add_entity_system(doc) + +CEBP_HumanGeneticData_Summary <- read_csv("~/ticket #20492 Norman/CEBP_HumanGeneticData_Summary.csv") +out <- shiny_attributes(CEBP_HumanGeneticData_Summary, NULL) + +doc$dataset$otherEntity <- NULL + +physical <- pid_to_eml_physical(adc, "urn:uuid:c17ef00e-7202-49b5-aa8e-a51afc999f41") + +Attributes_Table <- read_csv("~/ticket #20492 Norman/Attributes_Table.csv") +attributeList <- set_attributes(attributes=Attributes_Table) + +dataTable <- eml$dataTable(entityName = "CEBP_HumanGeneticData_Summary.csv", + entityDescription = "Summary of the genetic data collected from individuals", + physical = physical, + attributeList = attributeList +) +doc$dataset$dataTable[[1]] <- dataTable + + +doc$dataset$contact[[3]] <- NULL + +doc$dataset$coverage$geographicCoverage[[2]]$boundingCoordinates$westBoundingCoordinate <- "156.51550" +doc$dataset$coverage$geographicCoverage[[2]]$boundingCoordinates$eastBoundingCoordinate <- "156.64143" + +doc$dataset$project <- eml_nsf_to_project("1523059") + +eml_validate(doc) +doc_path <- file.path(tempdir(), 'science_metadata.xml') +write_eml(doc, doc_path) + +update <- publish_update(adc, + metadata_pid = pkg$metadata, + resource_map_pid = pkg$resource_map, + data_pids = pkg$data, + metadata_path = doc_path, + public = FALSE) +#https://arcticdata.io/catalog/view/urn%3Auuid%3Afff9acca-9e4c-481a-8186-678892ca5a6e + +pkg <- get_package(adc, 'resource_map_urn:uuid:fff9acca-9e4c-481a-8186-678892ca5a6e', file_names = TRUE) +doc <- read_eml(getObject(adc, pkg$metadata)) +emld::eml_version("eml-2.1.1") + + +doc$dataset$coverage$geographicCoverage[[2]]$boundingCoordinates$westBoundingCoordinate <- "-156.51550" +doc$dataset$coverage$geographicCoverage[[2]]$boundingCoordinates$eastBoundingCoordinate <- "-156.64143" + +eml_validate(doc) +doc_path <- file.path(tempdir(), 'science_metadata.xml') +write_eml(doc, doc_path) + +update <- publish_update(adc, + metadata_pid = pkg$metadata, + resource_map_pid = pkg$resource_map, + data_pids = pkg$data, + metadata_path = doc_path, + public = FALSE) +#https://arcticdata.io/catalog/view/urn%3Auuid%3A8b86d811-2b0e-48ae-9f48-7a57d67eccfe + +pkg <- get_package(adc, 'resource_map_urn:uuid:8b86d811-2b0e-48ae-9f48-7a57d67eccfe', file_names = TRUE) +doc <- read_eml(getObject(adc, pkg$metadata)) +emld::eml_version("eml-2.1.1") + +doc$dataset$coverage$geographicCoverage[[2]]$boundingCoordinates$northBoundingCoordinate <- "69.057876" +doc$dataset$coverage$geographicCoverage[[2]]$boundingCoordinates$southBoundingCoordinate <- "69.057876" +doc$dataset$coverage$geographicCoverage[[2]]$boundingCoordinates$westBoundingCoordinate <- "-152.862827" +doc$dataset$coverage$geographicCoverage[[2]]$boundingCoordinates$eastBoundingCoordinate <- "-152.862827" + +eml_validate(doc) +doc_path <- file.path(tempdir(), 'science_metadata.xml') +write_eml(doc, doc_path) + +update <- publish_update(adc, + metadata_pid = pkg$metadata, + resource_map_pid = pkg$resource_map, + data_pids = pkg$data, + metadata_path = doc_path, + public = FALSE) +#https://arcticdata.io/catalog/view/urn:uuid:5b6546c3-b8a6-420f-b1ba-1422c4479f6b + +#https://arcticdata.io/catalog/view/urn%3Auuid%3Adda6d1ed-b817-42ad-9c23-a33b01c59a88 + +pkg <- get_package(adc, 'resource_map_urn:uuid:160fbcbf-92af-4d3b-8ebf-6092a4435a27', file_names = TRUE) +doc <- read_eml(getObject(adc, pkg$metadata)) +emld::eml_version("eml-2.1.1") + +set_rights_and_access(adc, pids = unlist(pkg), subject = 'http://orcid.org/0000-0002-5718-6032') + +doc$dataset$title <- "Cape Espenberg Birnirk Project (CEBP) human mitogenome summary analysis (2016-2019)" + +eml_validate(doc) +doc_path <- file.path(tempdir(), 'science_metadata.xml') +write_eml(doc, doc_path) + +update <- publish_update(adc, + metadata_pid = pkg$metadata, + resource_map_pid = pkg$resource_map, + data_pids = pkg$data, + metadata_path = doc_path, + public = FALSE) +#https://arcticdata.io/catalog/view/urn:uuid:ec4f2c29-bdb2-4927-b364-20f2d6ead811 + +pkg <- get_package(adc, 'resource_map_urn:uuid:ec4f2c29-bdb2-4927-b364-20f2d6ead811', file_names = TRUE) +doc <- read_eml(getObject(adc, pkg$metadata)) +emld::eml_version("eml-2.1.1") + +update <- publish_update(adc, + metadata_pid = pkg$metadata, + resource_map_pid = pkg$resource_map, + data_pids = pkg$data, + use_doi=TRUE, + public = TRUE) +#https://arcticdata.io/catalog/view/doi%3A10.18739%2FA2NC5SD2M + +datamgmt::categorize_dataset("doi:10.18739/A2CZ32589", c("archaeology","anthropology"), "Angel") +datamgmt::categorize_dataset("doi:10.18739/A2NC5SD2M", c("archaeology","anthropology"), "Angel") +