Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
.Rproj.user
.Rhistory
.RData
.Ruserdata
Binary file added Figures/c_bot_month_stype_2015.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Figures/c_bot_presence_2015.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Figures/clost_family_presence_2015.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
File renamed without changes.
143 changes: 125 additions & 18 deletions Phyloseq_41816 → Phyloseq_41816.R
Original file line number Diff line number Diff line change
@@ -1,37 +1,42 @@
#atttempting to work with sequencing data in order to do analysis for UROP poster

#reading off of Michelle Berry's Phyloseq Tutorial
#install necessary packages
install.packages("vegan")
source("http://bioconductor.org/biocLite.R")
biocLite("phyloseq")
##########################################################################################
#load necessary packages
library(ggplot2)
library(magrittr)
library(dplyr)
library(scales)
library(grid)
library(reshape2)
library(phyloseq)
library(vegan)
library(tidyverse)
##########################################################################################
#saved 'Final' (raw phyloseq object) for later use
save(Final, file = "Final.RData")

#set-up working directory
setwd("C:/Users/morga/Documents/UROP/Denef/Morgan/")

#import data
sharedfile = "Sed_QM.trim.contigs.good.unique.good.filter.unique.precluster.pick.pick.an.unique_list.shared"
taxfile = "Sed_QM.trim.contigs.good.unique.good.filter.unique.precluster.pick.pick.an.unique_list.0.03.cons.taxonomy"
sharedfile = "stability.trim.contigs.good.unique.good.filter.unique.precluster.pick.pick.an.shared"
taxfile = "stability.trim.contigs.good.unique.good.filter.unique.precluster.pick.pick.an.0.03.cons.taxonomy"
mapfile = "Metadata_Table.csv"

#import mothur data and metadata
mothur_data <- import_mothur(mothur_shared_file = sharedfile, mothur_constaxonomy_file = taxfile)
metadata <- read.csv(mapfile)
metadata <- read.csv(mapfile) %>%
select(-Read_Number) %>%
mutate(Sediment_Type = ifelse(Sediment_Type == "clay ", "clay",
ifelse(Sediment_Type == "", "Unlabeled", Sediment_Type)))
#Rename data so that mothur data matches metadata
metadata$New_Name <- str_replace_all(string = metadata$New_Name,
pattern = "\\.",
replacement = "_")
metadata$Original_Name <- str_replace_all(string = metadata$Original_Name,
pattern = "\\.",
replacement = "_")
#convert metadata to phyloseq format
metadata <- sample_data(metadata)
rownames(metadata) <- metadata$New_Name
rownames(metadata) <- metadata$Original_Name

##########################################################################################
#alterations to metadata_table
Expand All @@ -52,12 +57,12 @@ metadata$Depth_in_Meters <- as.factor(metadata$Depth_in_Meters)
#merge metadata into mothur file
moth.merge = merge_phyloseq(mothur_data, metadata)
#15 samples lost when metadata and mothur_data were merged together. 11 were blanks and controls. 4 were samples
#missing samples: FM45.Ts, FM45.ZM, MLB.B1.S515.2, MLB.B1.S915
#missing samples: NFW_Blank and MLBB1_S916, one of the samples says CONTAMINATED, so assuming it is this one.

#reformat taxonomy file column names and rename
colnames(tax_table(moth.merge))
colnames(tax_table(moth.merge)) <- c("Kingdom", "Phylum", "Class", "Order",
"Family", "Genus", "Rank7", "Rank8")
colnames(tax_table(moth.merge)) <- c("Domain", "Phylum", "Class", "Order",
"Family", "Genus", "Species")

#filter out samples that I do not want to use, on the tutorial it says to use sample type
#but for my metadata table I made a column named "Use" that has the same affect
Expand All @@ -67,27 +72,31 @@ moth.sub <- prune_taxa(taxa_sums(moth.sub) > 0, moth.sub) #unsure of what this s
#make new data.frame by removing any eukaryotic cells from the (tax_table(moth.sub))
Final <-
moth.sub %>%
subset_taxa(Kingdom == "Bacteria" &
subset_taxa(Domain == "Bacteria" &
Family != "mitochondria" &
Class != "Chloroplast")

#Scale the reads for "Final" by using code originally from Michelle Berry
#functions for "matround" and "scale_reads" found at end of document
scale_Final <-scale_reads(physeq = Final, n = 5000, round = "matround")
scale_Final <-scale_reads(physeq = Final,n=5000, round = "matround")

###########################################################################################################################



############################################################################################################################
########################### Graph Sample Reads #############################################################################
#Begin graphing with set that has removed eukaryotic "Final" (should not be different)
theme_set(theme_bw()) +

ggplot(data.frame(sum = sample_sums(Final)), aes(sum)) +
geom_histogram(color = "black", fill = "indianred", stat = "bin", binwidth = 500) +
ggtitle("Distribution of sample sequencing depth") +
xlab("Read counts") +
ylab("")
# mean, max and min of sample read counts
smin <- min(sample_sums(Final))
smean <- mean(sample_sums(Final))
smax <- max(sample_sums(Final))

#Graph with eukaryotic cells included "moth.sub" (we don't expect this to be different)
ggplot(data.frame(sum = sample_sums(moth.sub)), aes(sum)) +
Expand Down Expand Up @@ -204,6 +213,104 @@ ggplot(All_phylum3, aes(x = Sed_or_Mus, y = Abundance, fill = Phylum)) +
guides(fill = guide_legend(reverse = TRUE, keywidth = 1, keyheight = 1)) +
ylab("Relative Abundance (Phyla > 3%) \n") +
ggtitle("Phylum Composition of Transect Sample \n Bacterial Communities by Sample Type")
####################

#Stacked barplot of Clostridium genera in all samples
clost_1 <- subset_taxa(Final, Family == "Clostridiaceae_1")
c_bot_screen <- clost_1 %>%
tax_glom(taxrank = "Genus") %>%
#transform_sample_counts(function(x) {x/sum(x)} ) %>% We want raw reads, so skipping this step
psmelt() %>%
arrange(Genus) %>%
#Include genera that contain Clostridium botulinum. https://www.arb-silva.de/browser/ssu-123/AF275922/
#These are sensu_stricto 1 and 18, including unclassified as well in case there are strains that
#produce bontE but are not yet described. Number of reads should be >0.
filter(Genus == "Clostridium_sensu_stricto_1" |
Genus == "Clostridium_sensu_stricto_18" |
Genus == "Clostridiaceae_1_unclassified",
Abundance > 0) %>%
select(Sample,New_Name, Abundance, Type, Sediment_Type, Genus, Month_Collected, Lake, Mussel_Density)

c_bot_family<- subset_taxa(Final, Family == "Clostridiaceae_1") %>%
subset_taxa(., str_detect(Genus, "Clost"))


c25 <- c(
"dodgerblue2", "#E31A1C", # red
"green4",
"#6A3D9A", # purple
"#FF7F00", # orange
"black", "gold1",
"skyblue2", "#FB9A99", # lt pink
"palegreen2",
"#CAB2D6", # lt purple
"#FDBF6F", # lt orange
"gray70", "khaki2",
"maroon", "orchid1", "deeppink1", "blue1", "steelblue4",
"darkturquoise", "green1", "yellow4", "yellow3",
"darkorange4", "brown"
)

plot_bar(c_bot_family, x="Type", fill="Genus", facet_grid = ~Sediment_Type) +
geom_bar(aes(color=Genus, fill=Genus), stat="identity", position="stack") +
scale_fill_manual(values = c25) +
scale_color_manual(values = c25) +
ylab("Number of Reads") +
xlab("Sample Type") +
theme(legend.position="bottom") +
ggtitle(expression("Presence of Clostridiaceae family sequences in 2015 sediment and mussel samples"))

ggsave("clost_family_presence_2015.png",path = "./Figures", width = 12.5)

c_bot_genus <- subset_taxa(Final, Genus == "Clostridium_sensu_stricto_1" | Genus ==
"Clostridiaceae_1_unclassified")
samples_per_matrix <- c_bot_genus@sam_data %>%
group_by(Type, Sediment_Type) %>%
summarize(count = n())

write.csv(samples_per_matrix, file = "./Sample Matrices by type.csv")
# c_bot_genus <- subset_taxa(scale_Final, Genus == "Clostridium_sensu_stricto_1" | Genus ==
# "Clostridiaceae_1_unclassified")

plot_bar(c_bot_genus, x="Type", fill="Genus", facet_grid = ~Sediment_Type) +
geom_bar(aes(color=Genus, fill=Genus), stat="identity", position="stack") +
scale_fill_manual(values = c25) +
scale_color_manual(values = c25) +
ylab("Number of Reads") +
xlab("Sample Type") +
theme(legend.position="bottom") +
ggtitle(expression("Presence of"~italic("Clostridium botulinum")~
"-like 16S sequences in 2015 sediment and mussel samples"))
ggsave("c_bot_presence_2015.png",path = "./Figures", width = 12.5)

plot_bar(c_bot_family, x="Sed_or_Mus", fill="Genus") +
geom_bar(aes(color=Genus, fill=Genus), stat="identity", position="stack")
plot_bar(c_bot_family, x="Sediment_Type", fill="Genus") +
geom_bar(aes(color=Genus, fill=Genus), stat="identity", position="stack")

plot_bar(c_bot_genus, x="Month_Collected", fill="Genus", facet_grid = ~Sediment_Type) +
geom_bar(aes(color=Genus, fill=Genus), stat="identity", position="stack") +
scale_fill_manual(values = c25) +
scale_color_manual(values = c25) +
ylab("Number of Reads") +
xlab("Sample Type") +
theme(legend.position = "bottom", axis.text.x = element_text(vjust =0.5))
ggsave("c_bot_month_stype_2015.png",path = "./Figures", width = 12.5)

c_bot_genus %>%
plot_bar(., x="New_Name", fill="Genus") +
geom_bar(aes(color=Genus, fill=Genus), stat="identity", position="stack")

ggplot(c_bot_screen, aes(x = Sample, y = Abundance, fill = Genus)) +
#facet_grid(~.) +
geom_bar(stat = "identity", position = "stack") +
# Remove x axis title
theme(legend.position = "bottom",
axis.text.x = element_text(angle = 90, hjust = 1)) +
xlab("Sample Name") +
guides(fill = guide_legend(reverse = TRUE, keywidth = 1, keyheight = 1)) +
ylab("Number of Reads") +
ggtitle("Clostridiaceae_1 genera in \n Bacterial Communities by Sample Type")
##########################################################################################
#Stacked bar plot by plyha >3% in the mussel samples distinguished by size
MusselSizePhy3 <- scale_Final %>%
Expand All @@ -214,7 +321,7 @@ MusselSizePhy3 <- scale_Final %>%
filter(Abundance > 0.03) %>%
arrange(Phylum)

ggplot(MusselSizePhy3, aes(x = Mussel_Classification, y = Abundance, fill = Phylum)) +
ggplot(MusselSizePhy3, aes(x = Mussel.Classification, y = Abundance, fill = Phylum)) +
# facet_grid(Station~.) +
geom_bar(stat = "identity") +
scale_fill_manual(values = phylum.colors) +
Expand Down
25 changes: 25 additions & 0 deletions Sample Matrices by type.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
"","Type","Sediment_Type","count"
"1","Mussel Shell","silt over sandy silt",6
"2","Mussel Shell","silty sand",11
"3","Mussel Shell","soft silt",6
"4","Mussel Tissue","silt over sandy silt",6
"5","Mussel Tissue","silty sand",11
"6","Mussel Tissue","soft silt",6
"7","Sediment","black mud",27
"8","Sediment","clay",14
"9","Sediment","clay and sand",3
"10","Sediment","sand",14
"11","Sediment","sandy silt",9
"12","Sediment","silt",3
"13","Sediment","silt over sandy silt",6
"14","Sediment","silty sand",30
"15","Sediment","soft silt",9
"16","Sediment","Unlabeled",24
"17","Whole Mussel","black mud",6
"18","Whole Mussel","clay",6
"19","Whole Mussel","clay and sand",2
"20","Whole Mussel","sandy silt",6
"21","Whole Mussel","silt",2
"22","Whole Mussel","silt over sandy silt",4
"23","Whole Mussel","silty sand",20
"24","Whole Mussel","soft silt",6
Binary file added Sediment_type vs clostridium.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
13 changes: 13 additions & 0 deletions Sequencing_Analysis.Rproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
Version: 1.0

RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default

EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8

RnwWeave: Sweave
LaTeX: pdfLaTeX
Binary file added month vs clostridium.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
File renamed without changes.
File renamed without changes.
Binary file added sample name vs genus.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added sample_type vs clostridium.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading