From 8e515e829f5d410c771d8cd56510b60a095875a5 Mon Sep 17 00:00:00 2001 From: Thomas Keller Date: Fri, 26 Feb 2021 10:47:02 -0500 Subject: [PATCH] added alignment via decipher --- Dashboard.Rmd | 180 +++++++++++++++++++++++++------------------------- 1 file changed, 91 insertions(+), 89 deletions(-) diff --git a/Dashboard.Rmd b/Dashboard.Rmd index 987716c..ef7c304 100644 --- a/Dashboard.Rmd +++ b/Dashboard.Rmd @@ -1,89 +1,91 @@ ---- -title: "SARS-CoV-2 Non-human Shifts " -output: flexdashboard::flex_dashboard -runtime: shiny ---- - -```{r setup, include=FALSE} -library(shiny) -library(tidyverse) -library(flexdashboard) -library(ape) -library(phangorn) -library(phylocanvas) -``` - -```{r global, include=FALSE} -KnownSeqs <- read.dna("all_aligned.fasta", format = "fasta") -KnownSeqsMeta <- read.table("metadata.csv", header = T, sep = ',') -``` - -Column {.sidebar} ------------------------------------------------------------------------ - -Upload your data here. - -```{r} -fileInput("sample", label = "Sequence data:", buttonLabel = "Browse...", placeholder = "No file selected") - -selectInput("species", label = "Select organism:", - choices = c("All", "canis_lupus", "felis_catus", "human", "mink", - "panthera_leo", "panthera_tigris_jacksoni"), selected = "All") - -selectInput("location", label = "Select location:", - choices = c("All", "Netherlands", "Poland", "sars_cov2_wuhan_hu1_root", "Wisconson-USA", "Unknown"), - selected = "All") - -``` - -Column ------------------------------------------------------------------------ - -### SARS-CoV-2 Tree - -```{r} -# Defining dataset to use -renderTable({ -#renderPlot({ - if(input$species == "All" & input$location == "All"){ - DesiredSeqs <- KnownSeqs - DesiredIds <- names(KnownSeqs) - }else if(input$species == "All" & input$location != "All"){ - DesiredIds <- KnownSeqsMeta %>% filter(location == input$location) %>% select(seqid) - DesiredIndex <- which(names(KnownSeqs) %in% DesiredIds) - DesiredSeqs <- KnownSeqs[DesiredIndex] - }else if(input$species != "All" & input$location == "All") { - DesiredIds <- KnownSeqsMeta %>% filter(species == input$species) %>% select(seqid) - DesiredIndex <- which(names(KnownSeqs) %in% DesiredIds) - DesiredSeqs <- KnownSeqs[DesiredIndex] - }else{ - DesiredIds <- KnownSeqsMeta %>% filter(species == input$species, location == input$species) %>% select(seqid) - DesiredIndex <- which(names(KnownSeqs) %in% DesiredIds) - DesiredSeqs <- KnownSeqs[DesiredIndex] - } - #DesiredIds - -# Adding sample data - file <- input$sample - SampleSeq <- read.dna(file$datapath, format = "fasta") - AllSeqs <- cbind(as.matrix(DesiredSeqs), as.matrix(SampleSeq), fill.with.gaps = T) - names(AllSeqs) -#}) -#tree construction, uncomment when ready -#also, may be slow? not sure at 30 seqs X 30kb -#from the phangorn tutorial -#https://cran.r-project.org/web/packages/phangorn/vignettes/Trees.pdf -#initial tree - #msa <- phyDat(KnownSeqs, type = "DNA") - #dm <- dist.ml(msa) - #treeNJ <- NJ(dm) -#ML tuning -#fit <- pml(treeNJ, data=aln) -#can also try ratchet/stochastic for rearrangement, but slower (better topology) -#fitGTR <- optim.plm(fit, model="GTR",optInv=TRUE,optGamma=TRUE, -# rearrangement="NNI",control=pml.control(trace=0)) -#todo, figure out how to extract the tree - - #phylocanvas(treeNJ, treetype = "rectangular", alignlabels = T) -}) -``` +--- +title: "SARS-CoV-2 Non-human Shifts " +output: flexdashboard::flex_dashboard +runtime: shiny +--- + +```{r setup, include=FALSE} +library(shiny) +library(tidyverse) +library(flexdashboard) +library(ape) +library(phangorn) +library(phylocanvas) +library(DECIPHER) +library(tictoc) +library(dplyr) +``` + +```{r global, include=FALSE} +KnownSeqs <- read.dna("all_aligned.fasta", format = "fasta") +KnownSeqsMeta <- read.table("metadata.csv", header = T, sep = ',') +``` + +Column {.sidebar} +----------------------------------------------------------------------- + +Upload your data here. + +```{r} +fileInput("sample", label = "Sequence data:", buttonLabel = "Browse...", placeholder = "No file selected") +selectInput("species", label = "Select organism:", + choices = c("All", "canis_lupus", "felis_catus", "human", "mink", + "panthera_leo", "panthera_tigris_jacksoni"), selected = "All") +selectInput("location", label = "Select location:", + choices = c("All", "Netherlands", "Poland", "sars_cov2_wuhan_hu1_root", "Wisconson-USA", "Unknown"), + selected = "All") +``` + +Column +----------------------------------------------------------------------- + +### SARS-CoV-2 Tree + +```{r} +# Defining dataset to use +renderTable({ +#renderPlot({ + if(input$species == "All" & input$location == "All"){ + DesiredSeqs <- KnownSeqs + DesiredIds <- names(KnownSeqs) + }else if(input$species == "All" & input$location != "All"){ + DesiredIds <- KnownSeqsMeta %>% filter(location == input$location) %>% select(seqid) + DesiredIndex <- which(names(KnownSeqs) %in% DesiredIds) + DesiredSeqs <- KnownSeqs[DesiredIndex] + }else if(input$species != "All" & input$location == "All") { + DesiredIds <- KnownSeqsMeta %>% filter(species == input$species) %>% select(seqid) + DesiredIndex <- which(names(KnownSeqs) %in% DesiredIds) + DesiredSeqs <- KnownSeqs[DesiredIndex] + }else{ + DesiredIds <- KnownSeqsMeta %>% filter(species == input$species, location == input$species) %>% select(seqid) + DesiredIndex <- which(names(KnownSeqs) %in% DesiredIds) + DesiredSeqs <- KnownSeqs[DesiredIndex] + } + #DesiredIds +# Adding sample data + file <- input$sample + SampleSeq <- readDNAStringSet(file$datapath,format='fasta') + DesiredSeqsConv=DesiredSeqs %>% as.list %>% as.character %>% lapply(.,paste0,collapse="") %>% unlist %>% DNAStringSet + #AllSeqs <- cbind(as.matrix(DesiredSeqs), as.matrix(SampleSeq), fill.with.gaps = T) + aligned=AlignProfiles(DesiredSeqsConv,SampleSeq) + #names(AllSeqs) +#}) +#tree construction, uncomment when ready +#also, may be slow? not sure at 30 seqs X 30kb +#from the phangorn tutorial +#https://cran.r-project.org/web/packages/phangorn/vignettes/Trees.pdf +#initial tree + tic() + msa <- phyDat(KnownSeqs, type = "DNA") + dm <- dist.ml(msa) + treeNJ <- NJ(dm) + ttime=toc() +#ML tuning +#fit <- pml(treeNJ, data=aln) +#can also try ratchet/stochastic for rearrangement, but slower (better topology) +#fitGTR <- optim.plm(fit, model="GTR",optInv=TRUE,optGamma=TRUE, +# rearrangement="NNI",control=pml.control(trace=0)) +#todo, figure out how to extract the tree + #phylocanvas(treeNJ, treetype = "rectangular", alignlabels = T) +}) +``` \ No newline at end of file